From f4d0245a7e3e026779ef7baa997b8793737f327e Mon Sep 17 00:00:00 2001 From: Michael Orlitzky Date: Wed, 11 Nov 2009 10:57:20 -0500 Subject: [PATCH] Modified the Data module and download script to download the Summary File 1 data. --- bin/download_data | 15 +++++----- src/Data.py | 72 ++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 76 insertions(+), 11 deletions(-) diff --git a/bin/download_data b/bin/download_data index 37741cd..81bffc4 100755 --- a/bin/download_data +++ b/bin/download_data @@ -17,13 +17,13 @@ from Data import State, County # Create the State objects. -de = State(10, 'Delaware') -dc = State(11, 'District of Columbia') -md = State(24, 'Maryland') -ny = State(36, 'New York') -pa = State(42, 'Pennsylvania') -va = State(51, 'Virginia') -wv = State(54, 'West Virginia') +de = State(10, 'Delaware', 'DE') +dc = State(11, 'District of Columbia', 'DC') +md = State(24, 'Maryland', 'MD') +ny = State(36, 'New York', 'NY') +pa = State(42, 'Pennsylvania', 'PA') +va = State(51, 'Virginia', 'VA') +wv = State(54, 'West Virginia', 'WV') # Now we add all of the counties belonging to each state, starting @@ -404,5 +404,6 @@ wv.add_county(109, 'Wyoming') # Now that the counties have been added, create a list of states to # pass to the download_lines function. states = [de, dc, md, ny, pa, va, wv] +Data.download_sf1(states) Data.download_blocks(states) Data.download_lines(states) diff --git a/src/Data.py b/src/Data.py index e08f466..d953124 100644 --- a/src/Data.py +++ b/src/Data.py @@ -17,30 +17,52 @@ class State: Example: - http://www2.census.gov/geo/tiger/TIGER2009/24_MARYLAND + SF1: http://www2.census.gov/census_2000/datasets/Summary_File_1/Maryland + TIGER: http://www2.census.gov/geo/tiger/TIGER2009/24_MARYLAND """ + SF1_ROOT = 'http://www2.census.gov/census_2000/datasets/Summary_File_1' TIGER_ROOT = 'http://www2.census.gov/geo/tiger/TIGER2009' - def __init__(self, initial_id=None, initial_name=None): + def __init__(self, initial_id=None, initial_name=None, abbreviation = None): self.id = initial_id + self.abbreviation = abbreviation self.name = initial_name self.counties = [] + def sf1_data_root(self): + sdr = self.SF1_ROOT + '/' + sdr += self.name.replace(' ', '_') + return sdr + + def tiger_data_root(self): tdr = self.TIGER_ROOT + '/' tdr += str(self.id) + '_' + self.name.upper().replace(' ', '_') return tdr + def sf1_data_url(self): + sdu = self.sf1_data_root() + '/' + sdu += self.sf1_zipfile_name() + return sdu + + def blocks_data_url(self): bdu = self.tiger_data_root() + '/' bdu += self.blocks_zipfile_name() return bdu + def sf1_data_path(self): + sdp = 'data/census2000/' + sdp += self.name.lower().replace(' ', '_') + sdp += '/sf1' + return sdp + + def blocks_data_path(self): bdp = 'data/census2000/' bdp += self.name.lower().replace(' ', '_') @@ -55,10 +77,20 @@ class State: return ldp + def sf1_zipfile_name(self): + return self.abbreviation.lower() + 'geo_uf1.zip' + + def blocks_zipfile_name(self): return 'tl_2009_' + str(self.id) + '_tabblock00.zip' + def sf1_geo_file_path(self): + sgfp = self.sf1_data_path() + '/' + sgfp += self.abbreviation.lower() + 'geo.uf1' + return sgfp + + def blocks_shapefile_path(self): bsp = self.blocks_data_path() + '/' bsp += 'tl_2009_' + str(self.id) + '_tabblock00.shp' @@ -137,6 +169,38 @@ class County: +def download_sf1(states): + """ + Download the Summary File 1 geo file for each state. + """ + + for state in states: + # First, create the blocks data path if it doesn't exist. + FileUtils.mkdir_p(state.sf1_data_path(), 0755) + + if not os.path.exists(state.sf1_geo_file_path()): + url = state.sf1_data_url() + tmpfile = state.sf1_zipfile_name() + print "Grabbing SF1 data for %s." % state.name + print "Downloading %s to %s..." % (url, tmpfile) + + try: + # This can fail for a bunch of reasons... + urllib.urlretrieve(url, tmpfile) + print "Unzipping %s to %s..." % (tmpfile, state.sf1_data_path()) + z = zipfile.ZipFile(tmpfile) + z.extractall(state.sf1_data_path()) + except: + # That we don't care about. + pass + finally: + # But we always clean up after ourselves. + print "Removing %s..." % tmpfile + FileUtils.rm_f(tmpfile) + print "Done.\n" + + + def download_blocks(states): """ Download the TIGER/Line block files for each state. @@ -149,7 +213,7 @@ def download_blocks(states): if not os.path.exists(state.blocks_shapefile_path()): url = state.blocks_data_url() tmpfile = state.blocks_zipfile_name() - print "Grabbing data for %s." % state.name + print "Grabbing TIGER blocks data for %s." % state.name print "Downloading %s to %s..." % (url, tmpfile) try: @@ -184,7 +248,7 @@ def download_lines(states): if not os.path.exists(county.lines_shapefile_path()): url = county.tiger_data_url() tmpfile = county.lines_zipfile_name() - print "Grabbing data for %s (%s)." % (county.full_name(), state.name) + print "Grabbing TIGER lines data for %s (%s)." % (county.full_name(), state.name) print "Downloading %s to %s..." % (url, tmpfile) try: -- 2.44.2