--- /dev/null
+"""
+Classes for working with (downloading, importing) the online census
+data.
+"""
+
+import os
+import urllib
+import zipfile
+
+import FileUtils
+
+
+class State:
+ """
+ A state contains zero or more counties and cities. Each state has
+ its own ID, as well as its own directory on the server.
+
+ Example:
+
+ http://www2.census.gov/geo/tiger/TIGER2009/24_MARYLAND
+
+ """
+
+ TIGER_ROOT = 'http://www2.census.gov/geo/tiger/TIGER2009'
+
+ def __init__(self, initial_id=None, initial_name=None):
+ self.id = initial_id
+ self.name = initial_name
+ self.counties = []
+
+
+ def tiger_data_url(self):
+ tdu = self.TIGER_ROOT + '/'
+ tdu += str(self.id) + '_' + self.name.upper().replace(' ', '_')
+ return tdu
+
+
+ def lines_data_path(self):
+ ldp = 'data/census2000/'
+ ldp += self.name.lower().replace(' ', '_')
+ ldp += '/lines'
+ return ldp
+
+
+ def add_county(self, county_id, county_name, override_name=False):
+ """
+ We would like each county to have a pointer to its containing
+ state. This so we can compute the file URL, directory, and so
+ forth from within the county.
+ """
+ self.counties.append(County(county_id, county_name, self, override_name))
+
+
+
+class County:
+ """
+ A county represents either a county or city. It doesn't make
+ sense, but 'county-level' data is given for certain cities which
+ don't technically belong to any county.
+ """
+
+ def __init__(self, initial_id=None,
+ initial_name=None,
+ initial_state=None,
+ override_name=False):
+ """
+ If this is a city, we should override our name with
+ e.g. 'Baltimore city' so that full_name() doesn't transform
+ 'Baltimore' in to 'Baltmore County'.
+ """
+ self.id = initial_id
+ self.name = initial_name
+ self.state = initial_state
+ self.override_name = override_name
+
+
+ def state_county_id(self):
+ return str(self.state.id) + ("%03d" % self.id)
+
+
+ def full_name(self):
+ """
+ Some of the counties (e.g. Baltimore City, Washington D.C.),
+ need to have their names overridden since they aren't
+ technically counties, but are treated as such by the Census.
+ """
+ if (self.override_name == False):
+ return self.name + ' County'
+ else:
+ # "Override name" basically means, "use the name I passed
+ # you and don't add the word 'County' on to it."
+ return self.name
+
+
+ def tiger_data_url(self):
+ tdp = self.state.tiger_data_url() + '/'
+ tdp += self.state_county_id()
+ tdp += '_' + self.full_name().replace(' ', '_') + '/'
+ tdp += self.zipfile_name()
+ return tdp
+
+
+ def zipfile_name(self):
+ return 'tl_2009_' + self.state_county_id() + '_edges.zip'
+
+
+ def shapefile_path(self):
+ sfp = self.state.lines_data_path() + '/'
+ sfp += 'tl_2009_' + self.state_county_id() + '_edges.shp'
+ return sfp
+
+
+def download_lines(states):
+ """
+ Download the TIGER/Line 'all lines' files for each county in states.
+ """
+
+ for state in states:
+ # First, create the lines data path if it doesn't exist.
+ FileUtils.mkdir_p(state.lines_data_path(), 0755)
+
+ # Now loop through the counties, and download/unzip the lines
+ # data if necessary.
+ for county in state.counties:
+ if not os.path.exists(county.shapefile_path()):
+ url = county.tiger_data_url()
+ tmpfile = county.zipfile_name()
+ print "Grabbing data for %s (%s)." % (county.full_name(), state.name)
+ print "Downloading %s to %s..." % (url, tmpfile)
+
+ try:
+ # This can fail for a bunch of reasons...
+ urllib.urlretrieve(url, tmpfile)
+ print "Unzipping %s to %s..." % (tmpfile, state.lines_data_path())
+ z = zipfile.ZipFile(tmpfile)
+ z.extractall(state.lines_data_path())
+ except:
+ # That we don't care about.
+ pass
+ finally:
+ # But we always clean up after ourselves.
+ print "Removing %s..." % tmpfile
+ FileUtils.rm_f(tmpfile)
+ print "Done.\n"