"""
Classes for working with (downloading, importing) the online census
data.
"""

import os
import urllib
import zipfile

import FileUtils


class State:
    """
    A state contains zero or more counties and cities. Each state has
    its own ID, as well as its own directory on the server.

    Example:

    http://www2.census.gov/geo/tiger/TIGER2009/24_MARYLAND
    
    """

    TIGER_ROOT = 'http://www2.census.gov/geo/tiger/TIGER2009'
    
    def __init__(self, initial_id=None, initial_name=None):
        self.id = initial_id
        self.name = initial_name
        self.counties = []


    def tiger_data_url(self):
        tdu =  self.TIGER_ROOT + '/'
        tdu += str(self.id) + '_' + self.name.upper().replace(' ', '_')
        return tdu


    def lines_data_path(self):
        ldp =  'data/census2000/'
        ldp += self.name.lower().replace(' ', '_')
        ldp += '/lines'
        return ldp


    def add_county(self, county_id, county_name, override_name=False):
        """
        We would like each county to have a pointer to its containing
        state. This so we can compute the file URL, directory, and so
        forth from within the county.
        """
        self.counties.append(County(county_id, county_name, self, override_name))

        
class County:
    """
    A county represents either a county or city. It doesn't make
    sense, but 'county-level' data is given for certain cities which
    don't technically belong to any county.    
    """
    
    def __init__(self, initial_id=None,
                 initial_name=None,
                 initial_state=None,
                 override_name=False):
        """
        If this is a city, we should override our name with
        e.g. 'Baltimore city' so that full_name() doesn't transform
        'Baltimore' in to 'Baltmore County'.
        """
        self.id = initial_id
        self.name = initial_name
        self.state = initial_state
        self.override_name = override_name


    def state_county_id(self):
        return str(self.state.id) + ("%03d" % self.id)


    def full_name(self):
        """
        Some of the counties (e.g. Baltimore City, Washington D.C.),
        need to have their names overridden since they aren't
        technically counties, but are treated as such by the Census.
        """
        if (self.override_name == False):
            return self.name + ' County'
        else:
            # "Override name" basically means, "use the name I passed
            # you and don't add the word 'County' on to it."
            return self.name

    
    def tiger_data_url(self):
        tdp =  self.state.tiger_data_url() + '/'
        tdp += self.state_county_id()
        tdp += '_' + self.full_name().replace(' ', '_') + '/'
        tdp += self.zipfile_name()
        return tdp


    def zipfile_name(self):
        return 'tl_2009_' + self.state_county_id() + '_edges.zip'

    
    def shapefile_path(self):
        sfp =  self.state.lines_data_path() + '/'
        sfp += 'tl_2009_' + self.state_county_id() + '_edges.shp'
        return sfp


def download_lines(states):
    """
    Download the TIGER/Line 'all lines' files for each county in states.
    """
    
    for state in states:
        # First, create the lines data path if it doesn't exist.
        FileUtils.mkdir_p(state.lines_data_path(), 0755)
    
        # Now loop through the counties, and download/unzip the lines
        # data if necessary.
        for county in state.counties:
            if not os.path.exists(county.shapefile_path()):
                url = county.tiger_data_url()
                tmpfile = county.zipfile_name()
                print "Grabbing data for %s (%s)." % (county.full_name(), state.name)
                print "Downloading %s to %s..." % (url, tmpfile)
                
                try:
                    # This can fail for a bunch of reasons...
                    urllib.urlretrieve(url, tmpfile)
                    print "Unzipping %s to %s..." % (tmpfile, state.lines_data_path())
                    z = zipfile.ZipFile(tmpfile)
                    z.extractall(state.lines_data_path())
                except:
                    # That we don't care about.
                    pass
                finally:
                    # But we always clean up after ourselves.
                    print "Removing %s..." % tmpfile
                    FileUtils.rm_f(tmpfile)
                    print "Done.\n"