]>
gitweb.michael.orlitzky.com - dead/census-tools.git/blob - src/Data.py
2 Classes for working with (downloading, importing) the online census
15 A state contains zero or more counties and cities. Each state has
16 its own ID, as well as its own directory on the server.
20 http://www2.census.gov/geo/tiger/TIGER2009/24_MARYLAND
24 TIGER_ROOT
= 'http://www2.census.gov/geo/tiger/TIGER2009'
26 def __init__(self
, initial_id
=None, initial_name
=None):
28 self
.name
= initial_name
32 def tiger_data_root(self
):
33 tdr
= self
.TIGER_ROOT
+ '/'
34 tdr
+= str(self
.id) + '_' + self
.name
.upper().replace(' ', '_')
38 def blocks_data_url(self
):
39 bdu
= self
.tiger_data_root() + '/'
40 bdu
+= self
.blocks_zipfile_name()
44 def blocks_data_path(self
):
45 bdp
= 'data/census2000/'
46 bdp
+= self
.name
.lower().replace(' ', '_')
51 def lines_data_path(self
):
52 ldp
= 'data/census2000/'
53 ldp
+= self
.name
.lower().replace(' ', '_')
58 def blocks_zipfile_name(self
):
59 return 'tl_2009_' + str(self
.id) + '_tabblock00.zip'
62 def blocks_shapefile_path(self
):
63 bsp
= self
.blocks_data_path() + '/'
64 bsp
+= 'tl_2009_' + str(self
.id) + '_tabblock00.shp'
68 def add_county(self
, county_id
, county_name
, override_name
=False):
70 We would like each county to have a pointer to its containing
71 state. This so we can compute the file URL, directory, and so
72 forth from within the county.
74 self
.counties
.append(County(county_id
,
83 A county represents either a county or city. It doesn't make
84 sense, but 'county-level' data is given for certain cities which
85 don't technically belong to any county.
88 def __init__(self
, initial_id
=None,
93 If this is a city, we should override our name with
94 e.g. 'Baltimore city' so that full_name() doesn't transform
95 'Baltimore' in to 'Baltmore County'.
98 self
.name
= initial_name
99 self
.state
= initial_state
100 self
.override_name
= override_name
103 def state_county_id(self
):
104 return str(self
.state
.id) + ("%03d" % self
.id)
109 Some of the counties (e.g. Baltimore City, Washington D.C.),
110 need to have their names overridden since they aren't
111 technically counties, but are treated as such by the Census.
113 if (self
.override_name
== False):
114 return self
.name
+ ' County'
116 # "Override name" basically means, "use the name I passed
117 # you and don't add the word 'County' on to it."
121 def lines_data_url(self
):
122 tdp
= self
.state
.tiger_data_root() + '/'
123 tdp
+= self
.state_county_id()
124 tdp
+= '_' + self
.full_name().replace(' ', '_') + '/'
125 tdp
+= self
.lines_zipfile_name()
129 def lines_zipfile_name(self
):
130 return 'tl_2009_' + self
.state_county_id() + '_edges.zip'
133 def lines_shapefile_path(self
):
134 sfp
= self
.state
.lines_data_path() + '/'
135 sfp
+= 'tl_2009_' + self
.state_county_id() + '_edges.shp'
140 def download_blocks(states
):
142 Download the TIGER/Line block files for each state.
146 # First, create the blocks data path if it doesn't exist.
147 FileUtils
.mkdir_p(state
.blocks_data_path(), 0755)
149 if not os
.path
.exists(state
.blocks_shapefile_path()):
150 url
= state
.blocks_data_url()
151 tmpfile
= state
.blocks_zipfile_name()
152 print "Grabbing data for %s." % state
.name
153 print "Downloading %s to %s..." % (url
, tmpfile
)
156 # This can fail for a bunch of reasons...
157 urllib
.urlretrieve(url
, tmpfile
)
158 print "Unzipping %s to %s..." % (tmpfile
, state
.blocks_data_path())
159 z
= zipfile
.ZipFile(tmpfile
)
160 z
.extractall(state
.blocks_data_path())
162 # That we don't care about.
165 # But we always clean up after ourselves.
166 print "Removing %s..." % tmpfile
167 FileUtils
.rm_f(tmpfile
)
172 def download_lines(states
):
174 Download the TIGER/Line 'all lines' files for each county in states.
178 # First, create the lines data path if it doesn't exist.
179 FileUtils
.mkdir_p(state
.lines_data_path(), 0755)
181 # Now loop through the counties, and download/unzip the lines
183 for county
in state
.counties
:
184 if not os
.path
.exists(county
.lines_shapefile_path()):
185 url
= county
.tiger_data_url()
186 tmpfile
= county
.lines_zipfile_name()
187 print "Grabbing data for %s (%s)." % (county
.full_name(), state
.name
)
188 print "Downloading %s to %s..." % (url
, tmpfile
)
191 # This can fail for a bunch of reasons...
192 urllib
.urlretrieve(url
, tmpfile
)
193 print "Unzipping %s to %s..." % (tmpfile
, state
.lines_data_path())
194 z
= zipfile
.ZipFile(tmpfile
)
195 z
.extractall(state
.lines_data_path())
197 # That we don't care about.
200 # But we always clean up after ourselves.
201 print "Removing %s..." % tmpfile
202 FileUtils
.rm_f(tmpfile
)