]>
gitweb.michael.orlitzky.com - dead/census-tools.git/blob - src/Data.py
2 Classes for working with (downloading, importing) the online census
15 A state contains zero or more counties and cities. Each state has
16 its own ID, as well as its own directory on the server.
20 http://www2.census.gov/geo/tiger/TIGER2009/24_MARYLAND
24 TIGER_ROOT
= 'http://www2.census.gov/geo/tiger/TIGER2009'
26 def __init__(self
, initial_id
=None, initial_name
=None):
28 self
.name
= initial_name
32 def tiger_data_url(self
):
33 tdu
= self
.TIGER_ROOT
+ '/'
34 tdu
+= str(self
.id) + '_' + self
.name
.upper().replace(' ', '_')
38 def lines_data_path(self
):
39 ldp
= 'data/census2000/'
40 ldp
+= self
.name
.lower().replace(' ', '_')
45 def add_county(self
, county_id
, county_name
, override_name
=False):
47 We would like each county to have a pointer to its containing
48 state. This so we can compute the file URL, directory, and so
49 forth from within the county.
51 self
.counties
.append(County(county_id
, county_name
, self
, override_name
))
57 A county represents either a county or city. It doesn't make
58 sense, but 'county-level' data is given for certain cities which
59 don't technically belong to any county.
62 def __init__(self
, initial_id
=None,
67 If this is a city, we should override our name with
68 e.g. 'Baltimore city' so that full_name() doesn't transform
69 'Baltimore' in to 'Baltmore County'.
72 self
.name
= initial_name
73 self
.state
= initial_state
74 self
.override_name
= override_name
77 def state_county_id(self
):
78 return str(self
.state
.id) + ("%03d" % self
.id)
83 Some of the counties (e.g. Baltimore City, Washington D.C.),
84 need to have their names overridden since they aren't
85 technically counties, but are treated as such by the Census.
87 if (self
.override_name
== False):
88 return self
.name
+ ' County'
90 # "Override name" basically means, "use the name I passed
91 # you and don't add the word 'County' on to it."
95 def tiger_data_url(self
):
96 tdp
= self
.state
.tiger_data_url() + '/'
97 tdp
+= self
.state_county_id()
98 tdp
+= '_' + self
.full_name().replace(' ', '_') + '/'
99 tdp
+= self
.zipfile_name()
103 def zipfile_name(self
):
104 return 'tl_2009_' + self
.state_county_id() + '_edges.zip'
107 def shapefile_path(self
):
108 sfp
= self
.state
.lines_data_path() + '/'
109 sfp
+= 'tl_2009_' + self
.state_county_id() + '_edges.shp'
113 def download_lines(states
):
115 Download the TIGER/Line 'all lines' files for each county in states.
119 # First, create the lines data path if it doesn't exist.
120 FileUtils
.mkdir_p(state
.lines_data_path(), 0755)
122 # Now loop through the counties, and download/unzip the lines
124 for county
in state
.counties
:
125 if not os
.path
.exists(county
.shapefile_path()):
126 url
= county
.tiger_data_url()
127 tmpfile
= county
.zipfile_name()
128 print "Grabbing data for %s (%s)." % (county
.full_name(), state
.name
)
129 print "Downloading %s to %s..." % (url
, tmpfile
)
132 # This can fail for a bunch of reasons...
133 urllib
.urlretrieve(url
, tmpfile
)
134 print "Unzipping %s to %s..." % (tmpfile
, state
.lines_data_path())
135 z
= zipfile
.ZipFile(tmpfile
)
136 z
.extractall(state
.lines_data_path())
138 # That we don't care about.
141 # But we always clean up after ourselves.
142 print "Removing %s..." % tmpfile
143 FileUtils
.rm_f(tmpfile
)