""" Classes for working with (downloading, importing) the online census data. """ import os import urllib import zipfile import FileUtils class State: """ A state contains zero or more counties and cities. Each state has its own ID, as well as its own directory on the server. Example: SF1: http://www2.census.gov/census_2000/datasets/Summary_File_1/Maryland TIGER: http://www2.census.gov/geo/tiger/TIGER2009/24_MARYLAND """ SF1_ROOT = 'http://www2.census.gov/census_2000/datasets/Summary_File_1' TIGER_ROOT = 'http://www2.census.gov/geo/tiger/TIGER2009' def __init__(self, initial_id=None, initial_name=None, abbreviation = None): self.id = initial_id self.abbreviation = abbreviation self.name = initial_name self.counties = [] def sf1_data_root(self): sdr = self.SF1_ROOT + '/' sdr += self.name.replace(' ', '_') return sdr def tiger_data_root(self): tdr = self.TIGER_ROOT + '/' tdr += str(self.id) + '_' + self.name.upper().replace(' ', '_') return tdr def sf1_data_url(self): sdu = self.sf1_data_root() + '/' sdu += self.sf1_zipfile_name() return sdu def blocks_data_url(self): bdu = self.tiger_data_root() + '/' bdu += self.blocks_zipfile_name() return bdu def sf1_data_path(self): sdp = 'data/census2000/' sdp += self.name.lower().replace(' ', '_') sdp += '/sf1' return sdp def blocks_data_path(self): bdp = 'data/census2000/' bdp += self.name.lower().replace(' ', '_') bdp += '/blocks' return bdp def lines_data_path(self): ldp = 'data/census2000/' ldp += self.name.lower().replace(' ', '_') ldp += '/lines' return ldp def sf1_zipfile_name(self): return self.abbreviation.lower() + 'geo_uf1.zip' def blocks_zipfile_name(self): return 'tl_2009_' + str(self.id) + '_tabblock00.zip' def sf1_geo_file_path(self): sgfp = self.sf1_data_path() + '/' sgfp += self.abbreviation.lower() + 'geo.uf1' return sgfp def blocks_shapefile_path(self): bsp = self.blocks_data_path() + '/' bsp += 'tl_2009_' + str(self.id) + '_tabblock00.shp' return bsp def add_county(self, county_id, county_name, override_name=False): """ We would like each county to have a pointer to its containing state. This so we can compute the file URL, directory, and so forth from within the county. """ self.counties.append(County(county_id, county_name, self, override_name)) class County: """ A county represents either a county or city. It doesn't make sense, but 'county-level' data is given for certain cities which don't technically belong to any county. """ def __init__(self, initial_id=None, initial_name=None, initial_state=None, override_name=False): """ If this is a city, we should override our name with e.g. 'Baltimore city' so that full_name() doesn't transform 'Baltimore' in to 'Baltmore County'. """ self.id = initial_id self.name = initial_name self.state = initial_state self.override_name = override_name def state_county_id(self): return str(self.state.id) + ("%03d" % self.id) def full_name(self): """ Some of the counties (e.g. Baltimore City, Washington D.C.), need to have their names overridden since they aren't technically counties, but are treated as such by the Census. """ if (self.override_name == False): return self.name + ' County' else: # "Override name" basically means, "use the name I passed # you and don't add the word 'County' on to it." return self.name def lines_data_url(self): tdp = self.state.tiger_data_root() + '/' tdp += self.state_county_id() tdp += '_' + self.full_name().replace(' ', '_') + '/' tdp += self.lines_zipfile_name() return tdp def lines_zipfile_name(self): return 'tl_2009_' + self.state_county_id() + '_edges.zip' def lines_shapefile_path(self): sfp = self.state.lines_data_path() + '/' sfp += 'tl_2009_' + self.state_county_id() + '_edges.shp' return sfp def download_sf1(states): """ Download the Summary File 1 geo file for each state. """ for state in states: # First, create the blocks data path if it doesn't exist. FileUtils.mkdir_p(state.sf1_data_path(), 0755) if not os.path.exists(state.sf1_geo_file_path()): url = state.sf1_data_url() tmpfile = state.sf1_zipfile_name() print "Grabbing SF1 data for %s." % state.name print "Downloading %s to %s..." % (url, tmpfile) try: # This can fail for a bunch of reasons... urllib.urlretrieve(url, tmpfile) print "Unzipping %s to %s..." % (tmpfile, state.sf1_data_path()) z = zipfile.ZipFile(tmpfile) z.extractall(state.sf1_data_path()) except: # That we don't care about. pass finally: # But we always clean up after ourselves. print "Removing %s..." % tmpfile FileUtils.rm_f(tmpfile) print "Done.\n" def download_blocks(states): """ Download the TIGER/Line block files for each state. """ for state in states: # First, create the blocks data path if it doesn't exist. FileUtils.mkdir_p(state.blocks_data_path(), 0755) if not os.path.exists(state.blocks_shapefile_path()): url = state.blocks_data_url() tmpfile = state.blocks_zipfile_name() print "Grabbing TIGER blocks data for %s." % state.name print "Downloading %s to %s..." % (url, tmpfile) try: # This can fail for a bunch of reasons... urllib.urlretrieve(url, tmpfile) print "Unzipping %s to %s..." % (tmpfile, state.blocks_data_path()) z = zipfile.ZipFile(tmpfile) z.extractall(state.blocks_data_path()) except: # That we don't care about. pass finally: # But we always clean up after ourselves. print "Removing %s..." % tmpfile FileUtils.rm_f(tmpfile) print "Done.\n" def download_lines(states): """ Download the TIGER/Line 'all lines' files for each county in states. """ for state in states: # First, create the lines data path if it doesn't exist. FileUtils.mkdir_p(state.lines_data_path(), 0755) # Now loop through the counties, and download/unzip the lines # data if necessary. for county in state.counties: if not os.path.exists(county.lines_shapefile_path()): url = county.lines_data_url() tmpfile = county.lines_zipfile_name() print "Grabbing TIGER lines data for %s (%s)." % (county.full_name(), state.name) print "Downloading %s to %s..." % (url, tmpfile) try: # This can fail for a bunch of reasons... urllib.urlretrieve(url, tmpfile) print "Unzipping %s to %s..." % (tmpfile, state.lines_data_path()) z = zipfile.ZipFile(tmpfile) z.extractall(state.lines_data_path()) except: # That we don't care about. pass finally: # But we always clean up after ourselves. print "Removing %s..." % tmpfile FileUtils.rm_f(tmpfile) print "Done.\n"