TIGER_SRID=4269
SHAPELY_URL=http://pypi.python.org/packages/source/S/Shapely/Shapely-1.0.14.tar.gz
-# Root folder for the shapefiles.
-TIGER_ROOT=http://www2.census.gov/geo/tiger/TIGER2009
-
-# State-specific folders.
-DC_ROOT=$(TIGER_ROOT)/11_DISTRICT_OF_COLUMBIA
-MD_ROOT=$(TIGER_ROOT)/24_MARYLAND
-VA_ROOT=$(TIGER_ROOT)/51_VIRGINIA
-PA_ROOT=$(TIGER_ROOT)/42_PENNSYLVANIA
-NY_ROOT=$(TIGER_ROOT)/36_NEW_YORK
-
-# URLs for the TIGER/Line block-level shapefiles.
-DC_BLOCKS_URL=$(DC_ROOT)/tl_2009_11_tabblock00.zip
-MD_BLOCKS_URL=$(MD_ROOT)/tl_2009_24_tabblock00.zip
-VA_BLOCKS_URL=$(VA_ROOT)/tl_2009_51_tabblock00.zip
-PA_BLOCKS_URL=$(PA_ROOT)/tl_2009_42_tabblock00.zip
-NY_BLOCKS_URL=$(NY_ROOT)/tl_2009_36_tabblock00.zip
# Starting with PostGIS 1.4.0, these paths are calculated at install
# time using the pg_config utility. Rather than try to guess where
find ./ -name '*.pyc' -print0 | xargs -0 rm -f
-# Download the shapefiles from Tiger if they don't already exist.
-data: tiger_blocks tiger_lines
-
-tiger_blocks: dc_blocks md_blocks va_blocks pa_blocks ny_blocks
-
-dc_blocks:
- mkdir -p data/census2000/district_of_columbia/block
- if [ ! -f data/census2000/district_of_columbia/block/tl_2009_11_tabblock00.shp ]; \
- then \
- wget -O dcblocks.zip $(DC_BLOCKS_URL); \
- unzip dcblocks.zip -d ./data/census2000/district_of_columbia/block; \
- rm dcblocks.zip; \
- fi;
-
-md_blocks:
- mkdir -p data/census2000/maryland/block
- if [ ! -f data/census2000/maryland/block/tl_2009_24_tabblock00.shp ]; \
- then \
- wget -O mdblocks.zip $(MD_BLOCKS_URL); \
- unzip mdblocks.zip -d ./data/census2000/maryland/block; \
- rm mdblocks.zip; \
- fi;
-
-va_blocks:
- mkdir -p data/census2000/virginia/block
- if [ ! -f data/census2000/virginia/block/tl_2009_51_tabblock00.shp ]; \
- then \
- wget -O vablocks.zip $(VA_BLOCKS_URL); \
- unzip vablocks.zip -d ./data/census2000/virginia/block; \
- rm vablocks.zip; \
- fi;
-
-pa_blocks:
- mkdir -p data/census2000/pennsylvania/block
- if [ ! -f data/census2000/pennsylvania/block/tl_2009_42_tabblock00.shp ]; \
- then \
- wget -O pablocks.zip $(PA_BLOCKS_URL); \
- unzip pablocks.zip -d ./data/census2000/pennsylvania/block; \
- rm pablocks.zip; \
- fi;
-
-ny_blocks:
- mkdir -p data/census2000/new_york/block
- if [ ! -f data/census2000/new_york/block/tl_2009_36_tabblock00.shp ]; \
- then \
- wget -O nyblocks.zip $(NY_BLOCKS_URL); \
- unzip nyblocks.zip -d ./data/census2000/new_york/block; \
- rm nyblocks.zip; \
- fi;
-
-
-tiger_lines:
+data:
bin/download_data
-a \
-s $(TIGER_SRID) \
-D \
- $$state/block/*.shp \
+ $$state/blocks/*.shp \
tiger_blocks \
| psql -U $(DB_USER) -d $(DB_NAME); \
done;
-p \
-I \
-s $(TIGER_SRID) \
- data/census2000/maryland/block/tl_2009_24_tabblock00.shp \
+ data/census2000/maryland/blocks/tl_2009_24_tabblock00.shp \
tiger_blocks \
| psql -U postgres -d $(DB_NAME) \
> /dev/null
self.counties = []
- def tiger_data_url(self):
- tdu = self.TIGER_ROOT + '/'
- tdu += str(self.id) + '_' + self.name.upper().replace(' ', '_')
- return tdu
+ def tiger_data_root(self):
+ tdr = self.TIGER_ROOT + '/'
+ tdr += str(self.id) + '_' + self.name.upper().replace(' ', '_')
+ return tdr
+
+
+ def blocks_data_url(self):
+ bdu = self.tiger_data_root() + '/'
+ bdu += self.blocks_zipfile_name()
+ return bdu
+
+
+ def blocks_data_path(self):
+ bdp = 'data/census2000/'
+ bdp += self.name.lower().replace(' ', '_')
+ bdp += '/blocks'
+ return bdp
def lines_data_path(self):
return ldp
+ def blocks_zipfile_name(self):
+ return 'tl_2009_' + str(self.id) + '_tabblock00.zip'
+
+
+ def blocks_shapefile_path(self):
+ bsp = self.blocks_data_path() + '/'
+ bsp += 'tl_2009_' + str(self.id) + '_tabblock00.shp'
+ return bsp
+
+
def add_county(self, county_id, county_name, override_name=False):
"""
We would like each county to have a pointer to its containing
state. This so we can compute the file URL, directory, and so
forth from within the county.
"""
- self.counties.append(County(county_id, county_name, self, override_name))
+ self.counties.append(County(county_id,
+ county_name,
+ self,
+ override_name))
+
+
-
-
class County:
"""
A county represents either a county or city. It doesn't make
# you and don't add the word 'County' on to it."
return self.name
-
- def tiger_data_url(self):
- tdp = self.state.tiger_data_url() + '/'
+
+ def lines_data_url(self):
+ tdp = self.state.tiger_data_root() + '/'
tdp += self.state_county_id()
tdp += '_' + self.full_name().replace(' ', '_') + '/'
- tdp += self.zipfile_name()
+ tdp += self.lines_zipfile_name()
return tdp
- def zipfile_name(self):
+ def lines_zipfile_name(self):
return 'tl_2009_' + self.state_county_id() + '_edges.zip'
-
- def shapefile_path(self):
+
+ def lines_shapefile_path(self):
sfp = self.state.lines_data_path() + '/'
sfp += 'tl_2009_' + self.state_county_id() + '_edges.shp'
return sfp
+
+def download_blocks(states):
+ """
+ Download the TIGER/Line block files for each state.
+ """
+
+ for state in states:
+ # First, create the blocks data path if it doesn't exist.
+ FileUtils.mkdir_p(state.blocks_data_path(), 0755)
+
+ if not os.path.exists(state.blocks_shapefile_path()):
+ url = state.blocks_data_url()
+ tmpfile = state.blocks_zipfile_name()
+ print "Grabbing data for %s." % state.name
+ print "Downloading %s to %s..." % (url, tmpfile)
+
+ try:
+ # This can fail for a bunch of reasons...
+ urllib.urlretrieve(url, tmpfile)
+ print "Unzipping %s to %s..." % (tmpfile, state.blocks_data_path())
+ z = zipfile.ZipFile(tmpfile)
+ z.extractall(state.blocks_data_path())
+ except:
+ # That we don't care about.
+ pass
+ finally:
+ # But we always clean up after ourselves.
+ print "Removing %s..." % tmpfile
+ FileUtils.rm_f(tmpfile)
+ print "Done.\n"
+
+
+
def download_lines(states):
"""
Download the TIGER/Line 'all lines' files for each county in states.
# Now loop through the counties, and download/unzip the lines
# data if necessary.
for county in state.counties:
- if not os.path.exists(county.shapefile_path()):
+ if not os.path.exists(county.lines_shapefile_path()):
url = county.tiger_data_url()
- tmpfile = county.zipfile_name()
+ tmpfile = county.lines_zipfile_name()
print "Grabbing data for %s (%s)." % (county.full_name(), state.name)
print "Downloading %s to %s..." % (url, tmpfile)
# That we don't care about.
pass
finally:
- # But we always clean up after ourselves.
+ # But we always clean up after ourselves.
print "Removing %s..." % tmpfile
FileUtils.rm_f(tmpfile)
print "Done.\n"