X-Git-Url: http://gitweb.michael.orlitzky.com/?a=blobdiff_plain;f=src%2FData.py;h=be5b1d23d26398d452fe8579cf2f09ddf0e03584;hb=3a1235a834118bb52c5d92fce9c7182c04a44e0b;hp=a8d962d1c52518fccfb881f2c1ff46dc8d099ba2;hpb=a6cd5954135c79ba6057a933f38b961e4e55c2d2;p=dead%2Fcensus-tools.git diff --git a/src/Data.py b/src/Data.py index a8d962d..be5b1d2 100644 --- a/src/Data.py +++ b/src/Data.py @@ -17,22 +17,57 @@ class State: Example: - http://www2.census.gov/geo/tiger/TIGER2009/24_MARYLAND + SF1: http://www2.census.gov/census_2000/datasets/Summary_File_1/Maryland + TIGER: http://www2.census.gov/geo/tiger/TIGER2009/24_MARYLAND """ + SF1_ROOT = 'http://www2.census.gov/census_2000/datasets/Summary_File_1' TIGER_ROOT = 'http://www2.census.gov/geo/tiger/TIGER2009' - def __init__(self, initial_id=None, initial_name=None): + def __init__(self, initial_id=None, initial_name=None, abbreviation = None): self.id = initial_id + self.abbreviation = abbreviation self.name = initial_name self.counties = [] - def tiger_data_url(self): - tdu = self.TIGER_ROOT + '/' - tdu += str(self.id) + '_' + self.name.upper().replace(' ', '_') - return tdu + def sf1_data_root(self): + sdr = self.SF1_ROOT + '/' + sdr += self.name.replace(' ', '_') + return sdr + + + def tiger_data_root(self): + tdr = self.TIGER_ROOT + '/' + tdr += str(self.id) + '_' + self.name.upper().replace(' ', '_') + return tdr + + + def sf1_data_url(self): + sdu = self.sf1_data_root() + '/' + sdu += self.sf1_zipfile_name() + return sdu + + + def blocks_data_url(self): + bdu = self.tiger_data_root() + '/' + bdu += self.blocks_zipfile_name() + return bdu + + + def sf1_data_path(self): + sdp = 'data/census2000/' + sdp += self.name.lower().replace(' ', '_') + sdp += '/sf1' + return sdp + + + def blocks_data_path(self): + bdp = 'data/census2000/' + bdp += self.name.lower().replace(' ', '_') + bdp += '/blocks' + return bdp def lines_data_path(self): @@ -42,16 +77,39 @@ class State: return ldp + def sf1_zipfile_name(self): + return self.abbreviation.lower() + 'geo_uf1.zip' + + + def blocks_zipfile_name(self): + return 'tl_2009_' + str(self.id) + '_tabblock00.zip' + + + def sf1_geo_file_path(self): + sgfp = self.sf1_data_path() + '/' + sgfp += self.abbreviation.lower() + 'geo.uf1' + return sgfp + + + def blocks_shapefile_path(self): + bsp = self.blocks_data_path() + '/' + bsp += 'tl_2009_' + str(self.id) + '_tabblock00.shp' + return bsp + + def add_county(self, county_id, county_name, override_name=False): """ We would like each county to have a pointer to its containing state. This so we can compute the file URL, directory, and so forth from within the county. """ - self.counties.append(County(county_id, county_name, self, override_name)) + self.counties.append(County(county_id, + county_name, + self, + override_name)) + + - - class County: """ A county represents either a county or city. It doesn't make @@ -91,25 +149,90 @@ class County: # you and don't add the word 'County' on to it." return self.name - - def tiger_data_url(self): - tdp = self.state.tiger_data_url() + '/' + + def lines_data_url(self): + tdp = self.state.tiger_data_root() + '/' tdp += self.state_county_id() tdp += '_' + self.full_name().replace(' ', '_') + '/' - tdp += self.zipfile_name() + tdp += self.lines_zipfile_name() return tdp - def zipfile_name(self): + def lines_zipfile_name(self): return 'tl_2009_' + self.state_county_id() + '_edges.zip' - - def shapefile_path(self): + + def lines_shapefile_path(self): sfp = self.state.lines_data_path() + '/' sfp += 'tl_2009_' + self.state_county_id() + '_edges.shp' return sfp + +def download_sf1(states): + """ + Download the Summary File 1 geo file for each state. + """ + + for state in states: + # First, create the blocks data path if it doesn't exist. + FileUtils.mkdir_p(state.sf1_data_path(), 0755) + + if not os.path.exists(state.sf1_geo_file_path()): + url = state.sf1_data_url() + tmpfile = state.sf1_zipfile_name() + print "Grabbing SF1 data for %s." % state.name + print "Downloading %s to %s..." % (url, tmpfile) + + try: + # This can fail for a bunch of reasons... + urllib.urlretrieve(url, tmpfile) + print "Unzipping %s to %s..." % (tmpfile, state.sf1_data_path()) + z = zipfile.ZipFile(tmpfile) + z.extractall(state.sf1_data_path()) + except: + # That we don't care about. + pass + finally: + # But we always clean up after ourselves. + print "Removing %s..." % tmpfile + FileUtils.rm_f(tmpfile) + print "Done.\n" + + + +def download_blocks(states): + """ + Download the TIGER/Line block files for each state. + """ + + for state in states: + # First, create the blocks data path if it doesn't exist. + FileUtils.mkdir_p(state.blocks_data_path(), 0755) + + if not os.path.exists(state.blocks_shapefile_path()): + url = state.blocks_data_url() + tmpfile = state.blocks_zipfile_name() + print "Grabbing TIGER blocks data for %s." % state.name + print "Downloading %s to %s..." % (url, tmpfile) + + try: + # This can fail for a bunch of reasons... + urllib.urlretrieve(url, tmpfile) + print "Unzipping %s to %s..." % (tmpfile, state.blocks_data_path()) + z = zipfile.ZipFile(tmpfile) + z.extractall(state.blocks_data_path()) + except: + # That we don't care about. + pass + finally: + # But we always clean up after ourselves. + print "Removing %s..." % tmpfile + FileUtils.rm_f(tmpfile) + print "Done.\n" + + + def download_lines(states): """ Download the TIGER/Line 'all lines' files for each county in states. @@ -122,10 +245,10 @@ def download_lines(states): # Now loop through the counties, and download/unzip the lines # data if necessary. for county in state.counties: - if not os.path.exists(county.shapefile_path()): - url = county.tiger_data_url() - tmpfile = county.zipfile_name() - print "Grabbing data for %s (%s)." % (county.full_name(), state.name) + if not os.path.exists(county.lines_shapefile_path()): + url = county.lines_data_url() + tmpfile = county.lines_zipfile_name() + print "Grabbing TIGER lines data for %s (%s)." % (county.full_name(), state.name) print "Downloading %s to %s..." % (url, tmpfile) try: @@ -138,7 +261,7 @@ def download_lines(states): # That we don't care about. pass finally: - # But we always clean up after ourselves. + # But we always clean up after ourselves. print "Removing %s..." % tmpfile FileUtils.rm_f(tmpfile) print "Done.\n"