From 5b2b3a2bb2c4620143c0a3a8ca20f88babe96f7b Mon Sep 17 00:00:00 2001 From: Michael Orlitzky Date: Tue, 10 Nov 2009 10:06:40 -0500 Subject: [PATCH] Modified the download_data script to download the TIGER blocks. Removed the blocks download targets from the makefile. --- bin/download_data | 1 + makefile | 73 ++----------------------------------- src/Data.py | 93 ++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 80 insertions(+), 87 deletions(-) diff --git a/bin/download_data b/bin/download_data index 6830f5f..37741cd 100755 --- a/bin/download_data +++ b/bin/download_data @@ -404,4 +404,5 @@ wv.add_county(109, 'Wyoming') # Now that the counties have been added, create a list of states to # pass to the download_lines function. states = [de, dc, md, ny, pa, va, wv] +Data.download_blocks(states) Data.download_lines(states) diff --git a/makefile b/makefile index 11a9c5c..b47b5f2 100644 --- a/makefile +++ b/makefile @@ -3,22 +3,6 @@ DB_USER=postgres TIGER_SRID=4269 SHAPELY_URL=http://pypi.python.org/packages/source/S/Shapely/Shapely-1.0.14.tar.gz -# Root folder for the shapefiles. -TIGER_ROOT=http://www2.census.gov/geo/tiger/TIGER2009 - -# State-specific folders. -DC_ROOT=$(TIGER_ROOT)/11_DISTRICT_OF_COLUMBIA -MD_ROOT=$(TIGER_ROOT)/24_MARYLAND -VA_ROOT=$(TIGER_ROOT)/51_VIRGINIA -PA_ROOT=$(TIGER_ROOT)/42_PENNSYLVANIA -NY_ROOT=$(TIGER_ROOT)/36_NEW_YORK - -# URLs for the TIGER/Line block-level shapefiles. -DC_BLOCKS_URL=$(DC_ROOT)/tl_2009_11_tabblock00.zip -MD_BLOCKS_URL=$(MD_ROOT)/tl_2009_24_tabblock00.zip -VA_BLOCKS_URL=$(VA_ROOT)/tl_2009_51_tabblock00.zip -PA_BLOCKS_URL=$(PA_ROOT)/tl_2009_42_tabblock00.zip -NY_BLOCKS_URL=$(NY_ROOT)/tl_2009_36_tabblock00.zip # Starting with PostGIS 1.4.0, these paths are calculated at install # time using the pg_config utility. Rather than try to guess where @@ -57,58 +41,7 @@ clean: find ./ -name '*.pyc' -print0 | xargs -0 rm -f -# Download the shapefiles from Tiger if they don't already exist. -data: tiger_blocks tiger_lines - -tiger_blocks: dc_blocks md_blocks va_blocks pa_blocks ny_blocks - -dc_blocks: - mkdir -p data/census2000/district_of_columbia/block - if [ ! -f data/census2000/district_of_columbia/block/tl_2009_11_tabblock00.shp ]; \ - then \ - wget -O dcblocks.zip $(DC_BLOCKS_URL); \ - unzip dcblocks.zip -d ./data/census2000/district_of_columbia/block; \ - rm dcblocks.zip; \ - fi; - -md_blocks: - mkdir -p data/census2000/maryland/block - if [ ! -f data/census2000/maryland/block/tl_2009_24_tabblock00.shp ]; \ - then \ - wget -O mdblocks.zip $(MD_BLOCKS_URL); \ - unzip mdblocks.zip -d ./data/census2000/maryland/block; \ - rm mdblocks.zip; \ - fi; - -va_blocks: - mkdir -p data/census2000/virginia/block - if [ ! -f data/census2000/virginia/block/tl_2009_51_tabblock00.shp ]; \ - then \ - wget -O vablocks.zip $(VA_BLOCKS_URL); \ - unzip vablocks.zip -d ./data/census2000/virginia/block; \ - rm vablocks.zip; \ - fi; - -pa_blocks: - mkdir -p data/census2000/pennsylvania/block - if [ ! -f data/census2000/pennsylvania/block/tl_2009_42_tabblock00.shp ]; \ - then \ - wget -O pablocks.zip $(PA_BLOCKS_URL); \ - unzip pablocks.zip -d ./data/census2000/pennsylvania/block; \ - rm pablocks.zip; \ - fi; - -ny_blocks: - mkdir -p data/census2000/new_york/block - if [ ! -f data/census2000/new_york/block/tl_2009_36_tabblock00.shp ]; \ - then \ - wget -O nyblocks.zip $(NY_BLOCKS_URL); \ - unzip nyblocks.zip -d ./data/census2000/new_york/block; \ - rm nyblocks.zip; \ - fi; - - -tiger_lines: +data: bin/download_data @@ -129,7 +62,7 @@ db: data newdb tiger_blocks_table tiger_lines_table sf1_blocks_table -a \ -s $(TIGER_SRID) \ -D \ - $$state/block/*.shp \ + $$state/blocks/*.shp \ tiger_blocks \ | psql -U $(DB_USER) -d $(DB_NAME); \ done; @@ -199,7 +132,7 @@ tiger_blocks_table: -p \ -I \ -s $(TIGER_SRID) \ - data/census2000/maryland/block/tl_2009_24_tabblock00.shp \ + data/census2000/maryland/blocks/tl_2009_24_tabblock00.shp \ tiger_blocks \ | psql -U postgres -d $(DB_NAME) \ > /dev/null diff --git a/src/Data.py b/src/Data.py index a8d962d..e08f466 100644 --- a/src/Data.py +++ b/src/Data.py @@ -29,10 +29,23 @@ class State: self.counties = [] - def tiger_data_url(self): - tdu = self.TIGER_ROOT + '/' - tdu += str(self.id) + '_' + self.name.upper().replace(' ', '_') - return tdu + def tiger_data_root(self): + tdr = self.TIGER_ROOT + '/' + tdr += str(self.id) + '_' + self.name.upper().replace(' ', '_') + return tdr + + + def blocks_data_url(self): + bdu = self.tiger_data_root() + '/' + bdu += self.blocks_zipfile_name() + return bdu + + + def blocks_data_path(self): + bdp = 'data/census2000/' + bdp += self.name.lower().replace(' ', '_') + bdp += '/blocks' + return bdp def lines_data_path(self): @@ -42,16 +55,29 @@ class State: return ldp + def blocks_zipfile_name(self): + return 'tl_2009_' + str(self.id) + '_tabblock00.zip' + + + def blocks_shapefile_path(self): + bsp = self.blocks_data_path() + '/' + bsp += 'tl_2009_' + str(self.id) + '_tabblock00.shp' + return bsp + + def add_county(self, county_id, county_name, override_name=False): """ We would like each county to have a pointer to its containing state. This so we can compute the file URL, directory, and so forth from within the county. """ - self.counties.append(County(county_id, county_name, self, override_name)) + self.counties.append(County(county_id, + county_name, + self, + override_name)) + + - - class County: """ A county represents either a county or city. It doesn't make @@ -91,25 +117,58 @@ class County: # you and don't add the word 'County' on to it." return self.name - - def tiger_data_url(self): - tdp = self.state.tiger_data_url() + '/' + + def lines_data_url(self): + tdp = self.state.tiger_data_root() + '/' tdp += self.state_county_id() tdp += '_' + self.full_name().replace(' ', '_') + '/' - tdp += self.zipfile_name() + tdp += self.lines_zipfile_name() return tdp - def zipfile_name(self): + def lines_zipfile_name(self): return 'tl_2009_' + self.state_county_id() + '_edges.zip' - - def shapefile_path(self): + + def lines_shapefile_path(self): sfp = self.state.lines_data_path() + '/' sfp += 'tl_2009_' + self.state_county_id() + '_edges.shp' return sfp + +def download_blocks(states): + """ + Download the TIGER/Line block files for each state. + """ + + for state in states: + # First, create the blocks data path if it doesn't exist. + FileUtils.mkdir_p(state.blocks_data_path(), 0755) + + if not os.path.exists(state.blocks_shapefile_path()): + url = state.blocks_data_url() + tmpfile = state.blocks_zipfile_name() + print "Grabbing data for %s." % state.name + print "Downloading %s to %s..." % (url, tmpfile) + + try: + # This can fail for a bunch of reasons... + urllib.urlretrieve(url, tmpfile) + print "Unzipping %s to %s..." % (tmpfile, state.blocks_data_path()) + z = zipfile.ZipFile(tmpfile) + z.extractall(state.blocks_data_path()) + except: + # That we don't care about. + pass + finally: + # But we always clean up after ourselves. + print "Removing %s..." % tmpfile + FileUtils.rm_f(tmpfile) + print "Done.\n" + + + def download_lines(states): """ Download the TIGER/Line 'all lines' files for each county in states. @@ -122,9 +181,9 @@ def download_lines(states): # Now loop through the counties, and download/unzip the lines # data if necessary. for county in state.counties: - if not os.path.exists(county.shapefile_path()): + if not os.path.exists(county.lines_shapefile_path()): url = county.tiger_data_url() - tmpfile = county.zipfile_name() + tmpfile = county.lines_zipfile_name() print "Grabbing data for %s (%s)." % (county.full_name(), state.name) print "Downloading %s to %s..." % (url, tmpfile) @@ -138,7 +197,7 @@ def download_lines(states): # That we don't care about. pass finally: - # But we always clean up after ourselves. + # But we always clean up after ourselves. print "Removing %s..." % tmpfile FileUtils.rm_f(tmpfile) print "Done.\n" -- 2.44.2