"""
Classes for working with (downloading, importing) the online census
data.
"""

import os
import urllib
import zipfile

import FileUtils


class State:
    """
    A state contains zero or more counties and cities. Each state has
    its own ID, as well as its own directory on the server.

    Example:

    SF1:   http://www2.census.gov/census_2000/datasets/Summary_File_1/Maryland
    TIGER: http://www2.census.gov/geo/tiger/TIGER2009/24_MARYLAND
    
    """

    SF1_ROOT = 'http://www2.census.gov/census_2000/datasets/Summary_File_1'
    TIGER_ROOT = 'http://www2.census.gov/geo/tiger/TIGER2009'
    
    def __init__(self, initial_id=None, initial_name=None, abbreviation = None):
        self.id = initial_id
        self.abbreviation = abbreviation
        self.name = initial_name
        self.counties = []


    def sf1_data_root(self):
        sdr =  self.SF1_ROOT + '/'
        sdr += self.name.replace(' ', '_')
        return sdr


    def tiger_data_root(self):
        tdr =  self.TIGER_ROOT + '/'
        tdr += str(self.id) + '_' + self.name.upper().replace(' ', '_')
        return tdr


    def sf1_data_url(self):
        sdu = self.sf1_data_root() + '/'
        sdu += self.sf1_zipfile_name()
        return sdu


    def blocks_data_url(self):
        bdu =  self.tiger_data_root() + '/'
        bdu += self.blocks_zipfile_name()
        return bdu


    def sf1_data_path(self):
        sdp = 'data/census2000/'
        sdp += self.name.lower().replace(' ', '_')
        sdp += '/sf1'
        return sdp


    def blocks_data_path(self):
        bdp =  'data/census2000/'
        bdp += self.name.lower().replace(' ', '_')
        bdp += '/blocks'
        return bdp


    def lines_data_path(self):
        ldp =  'data/census2000/'
        ldp += self.name.lower().replace(' ', '_')
        ldp += '/lines'
        return ldp


    def sf1_zipfile_name(self):
        return self.abbreviation.lower() + 'geo_uf1.zip'


    def blocks_zipfile_name(self):
        return 'tl_2009_' + str(self.id) + '_tabblock00.zip'


    def sf1_geo_file_path(self):
        sgfp = self.sf1_data_path() + '/'
        sgfp += self.abbreviation.lower() + 'geo.uf1'
        return sgfp


    def blocks_shapefile_path(self):
        bsp =  self.blocks_data_path() + '/'
        bsp += 'tl_2009_' + str(self.id) + '_tabblock00.shp'
        return bsp


    def add_county(self, county_id, county_name, override_name=False):
        """
        We would like each county to have a pointer to its containing
        state. This so we can compute the file URL, directory, and so
        forth from within the county.
        """
        self.counties.append(County(county_id,
                                    county_name,
                                    self,
                                    override_name))


class County:
    """
    A county represents either a county or city. It doesn't make
    sense, but 'county-level' data is given for certain cities which
    don't technically belong to any county.    
    """
    
    def __init__(self, initial_id=None,
                 initial_name=None,
                 initial_state=None,
                 override_name=False):
        """
        If this is a city, we should override our name with
        e.g. 'Baltimore city' so that full_name() doesn't transform
        'Baltimore' in to 'Baltmore County'.
        """
        self.id = initial_id
        self.name = initial_name
        self.state = initial_state
        self.override_name = override_name


    def state_county_id(self):
        return str(self.state.id) + ("%03d" % self.id)


    def full_name(self):
        """
        Some of the counties (e.g. Baltimore City, Washington D.C.),
        need to have their names overridden since they aren't
        technically counties, but are treated as such by the Census.
        """
        if (self.override_name == False):
            return self.name + ' County'
        else:
            # "Override name" basically means, "use the name I passed
            # you and don't add the word 'County' on to it."
            return self.name


    def lines_data_url(self):
        tdp =  self.state.tiger_data_root() + '/'
        tdp += self.state_county_id()
        tdp += '_' + self.full_name().replace(' ', '_') + '/'
        tdp += self.lines_zipfile_name()
        return tdp


    def lines_zipfile_name(self):
        return 'tl_2009_' + self.state_county_id() + '_edges.zip'


    def lines_shapefile_path(self):
        sfp =  self.state.lines_data_path() + '/'
        sfp += 'tl_2009_' + self.state_county_id() + '_edges.shp'
        return sfp


def download_sf1(states):
    """
    Download the Summary File 1 geo file for each state.
    """

    for state in states:
        # First, create the blocks data path if it doesn't exist.
        FileUtils.mkdir_p(state.sf1_data_path(), 0755)

        if not os.path.exists(state.sf1_geo_file_path()):
            url = state.sf1_data_url()
            tmpfile = state.sf1_zipfile_name()
            print "Grabbing SF1 data for %s." % state.name
            print "Downloading %s to %s..." % (url, tmpfile)

            try:
                # This can fail for a bunch of reasons...
                urllib.urlretrieve(url, tmpfile)
                print "Unzipping %s to %s..." % (tmpfile, state.sf1_data_path())
                z = zipfile.ZipFile(tmpfile)
                z.extractall(state.sf1_data_path())
            except:
                # That we don't care about.
                pass
            finally:
                # But we always clean up after ourselves.
                print "Removing %s..." % tmpfile
                FileUtils.rm_f(tmpfile)
                print "Done.\n"


def download_blocks(states):
    """
    Download the TIGER/Line block files for each state.
    """

    for state in states:
        # First, create the blocks data path if it doesn't exist.
        FileUtils.mkdir_p(state.blocks_data_path(), 0755)

        if not os.path.exists(state.blocks_shapefile_path()):
            url = state.blocks_data_url()
            tmpfile = state.blocks_zipfile_name()
            print "Grabbing TIGER blocks data for %s." % state.name
            print "Downloading %s to %s..." % (url, tmpfile)

            try:
                # This can fail for a bunch of reasons...
                urllib.urlretrieve(url, tmpfile)
                print "Unzipping %s to %s..." % (tmpfile, state.blocks_data_path())
                z = zipfile.ZipFile(tmpfile)
                z.extractall(state.blocks_data_path())
            except:
                # That we don't care about.
                pass
            finally:
                # But we always clean up after ourselves.
                print "Removing %s..." % tmpfile
                FileUtils.rm_f(tmpfile)
                print "Done.\n"


def download_lines(states):
    """
    Download the TIGER/Line 'all lines' files for each county in states.
    """
    
    for state in states:
        # First, create the lines data path if it doesn't exist.
        FileUtils.mkdir_p(state.lines_data_path(), 0755)
    
        # Now loop through the counties, and download/unzip the lines
        # data if necessary.
        for county in state.counties:
            if not os.path.exists(county.lines_shapefile_path()):
                url = county.lines_data_url()
                tmpfile = county.lines_zipfile_name()
                print "Grabbing TIGER lines data for %s (%s)." % (county.full_name(), state.name)
                print "Downloading %s to %s..." % (url, tmpfile)
                
                try:
                    # This can fail for a bunch of reasons...
                    urllib.urlretrieve(url, tmpfile)
                    print "Unzipping %s to %s..." % (tmpfile, state.lines_data_path())
                    z = zipfile.ZipFile(tmpfile)
                    z.extractall(state.lines_data_path())
                except:
                    # That we don't care about.
                    pass
                finally:
                     # But we always clean up after ourselves.
                    print "Removing %s..." % tmpfile
                    FileUtils.rm_f(tmpfile)
                    print "Done.\n"