]>
gitweb.michael.orlitzky.com - dead/census-tools.git/blob - src/Data.py
2 Classes for working with (downloading, importing) the online census
15 A state contains zero or more counties and cities. Each state has
16 its own ID, as well as its own directory on the server.
20 SF1: http://www2.census.gov/census_2000/datasets/Summary_File_1/Maryland
21 TIGER: http://www2.census.gov/geo/tiger/TIGER2009/24_MARYLAND
25 SF1_ROOT
= 'http://www2.census.gov/census_2000/datasets/Summary_File_1'
26 TIGER_ROOT
= 'http://www2.census.gov/geo/tiger/TIGER2009'
28 def __init__(self
, initial_id
=None, initial_name
=None, abbreviation
= None):
30 self
.abbreviation
= abbreviation
31 self
.name
= initial_name
35 def sf1_data_root(self
):
36 sdr
= self
.SF1_ROOT
+ '/'
37 sdr
+= self
.name
.replace(' ', '_')
41 def tiger_data_root(self
):
42 tdr
= self
.TIGER_ROOT
+ '/'
43 tdr
+= str(self
.id) + '_' + self
.name
.upper().replace(' ', '_')
47 def sf1_data_url(self
):
48 sdu
= self
.sf1_data_root() + '/'
49 sdu
+= self
.sf1_zipfile_name()
53 def blocks_data_url(self
):
54 bdu
= self
.tiger_data_root() + '/'
55 bdu
+= self
.blocks_zipfile_name()
59 def sf1_data_path(self
):
60 sdp
= 'data/census2000/'
61 sdp
+= self
.name
.lower().replace(' ', '_')
66 def blocks_data_path(self
):
67 bdp
= 'data/census2000/'
68 bdp
+= self
.name
.lower().replace(' ', '_')
73 def lines_data_path(self
):
74 ldp
= 'data/census2000/'
75 ldp
+= self
.name
.lower().replace(' ', '_')
80 def sf1_zipfile_name(self
):
81 return self
.abbreviation
.lower() + 'geo_uf1.zip'
84 def blocks_zipfile_name(self
):
85 return 'tl_2009_' + str(self
.id) + '_tabblock00.zip'
88 def sf1_geo_file_path(self
):
89 sgfp
= self
.sf1_data_path() + '/'
90 sgfp
+= self
.abbreviation
.lower() + 'geo.uf1'
94 def blocks_shapefile_path(self
):
95 bsp
= self
.blocks_data_path() + '/'
96 bsp
+= 'tl_2009_' + str(self
.id) + '_tabblock00.shp'
100 def add_county(self
, county_id
, county_name
, override_name
=False):
102 We would like each county to have a pointer to its containing
103 state. This so we can compute the file URL, directory, and so
104 forth from within the county.
106 self
.counties
.append(County(county_id
,
115 A county represents either a county or city. It doesn't make
116 sense, but 'county-level' data is given for certain cities which
117 don't technically belong to any county.
120 def __init__(self
, initial_id
=None,
123 override_name
=False):
125 If this is a city, we should override our name with
126 e.g. 'Baltimore city' so that full_name() doesn't transform
127 'Baltimore' in to 'Baltmore County'.
130 self
.name
= initial_name
131 self
.state
= initial_state
132 self
.override_name
= override_name
135 def state_county_id(self
):
136 return str(self
.state
.id) + ("%03d" % self
.id)
141 Some of the counties (e.g. Baltimore City, Washington D.C.),
142 need to have their names overridden since they aren't
143 technically counties, but are treated as such by the Census.
145 if (self
.override_name
== False):
146 return self
.name
+ ' County'
148 # "Override name" basically means, "use the name I passed
149 # you and don't add the word 'County' on to it."
153 def lines_data_url(self
):
154 tdp
= self
.state
.tiger_data_root() + '/'
155 tdp
+= self
.state_county_id()
156 tdp
+= '_' + self
.full_name().replace(' ', '_') + '/'
157 tdp
+= self
.lines_zipfile_name()
161 def lines_zipfile_name(self
):
162 return 'tl_2009_' + self
.state_county_id() + '_edges.zip'
165 def lines_shapefile_path(self
):
166 sfp
= self
.state
.lines_data_path() + '/'
167 sfp
+= 'tl_2009_' + self
.state_county_id() + '_edges.shp'
172 def download_sf1(states
):
174 Download the Summary File 1 geo file for each state.
178 # First, create the blocks data path if it doesn't exist.
179 FileUtils
.mkdir_p(state
.sf1_data_path(), 0755)
181 if not os
.path
.exists(state
.sf1_geo_file_path()):
182 url
= state
.sf1_data_url()
183 tmpfile
= state
.sf1_zipfile_name()
184 print "Grabbing SF1 data for %s." % state
.name
185 print "Downloading %s to %s..." % (url
, tmpfile
)
188 # This can fail for a bunch of reasons...
189 urllib
.urlretrieve(url
, tmpfile
)
190 print "Unzipping %s to %s..." % (tmpfile
, state
.sf1_data_path())
191 z
= zipfile
.ZipFile(tmpfile
)
192 z
.extractall(state
.sf1_data_path())
194 # That we don't care about.
197 # But we always clean up after ourselves.
198 print "Removing %s..." % tmpfile
199 FileUtils
.rm_f(tmpfile
)
204 def download_blocks(states
):
206 Download the TIGER/Line block files for each state.
210 # First, create the blocks data path if it doesn't exist.
211 FileUtils
.mkdir_p(state
.blocks_data_path(), 0755)
213 if not os
.path
.exists(state
.blocks_shapefile_path()):
214 url
= state
.blocks_data_url()
215 tmpfile
= state
.blocks_zipfile_name()
216 print "Grabbing TIGER blocks data for %s." % state
.name
217 print "Downloading %s to %s..." % (url
, tmpfile
)
220 # This can fail for a bunch of reasons...
221 urllib
.urlretrieve(url
, tmpfile
)
222 print "Unzipping %s to %s..." % (tmpfile
, state
.blocks_data_path())
223 z
= zipfile
.ZipFile(tmpfile
)
224 z
.extractall(state
.blocks_data_path())
226 # That we don't care about.
229 # But we always clean up after ourselves.
230 print "Removing %s..." % tmpfile
231 FileUtils
.rm_f(tmpfile
)
236 def download_lines(states
):
238 Download the TIGER/Line 'all lines' files for each county in states.
242 # First, create the lines data path if it doesn't exist.
243 FileUtils
.mkdir_p(state
.lines_data_path(), 0755)
245 # Now loop through the counties, and download/unzip the lines
247 for county
in state
.counties
:
248 if not os
.path
.exists(county
.lines_shapefile_path()):
249 url
= county
.lines_data_url()
250 tmpfile
= county
.lines_zipfile_name()
251 print "Grabbing TIGER lines data for %s (%s)." % (county
.full_name(), state
.name
)
252 print "Downloading %s to %s..." % (url
, tmpfile
)
255 # This can fail for a bunch of reasons...
256 urllib
.urlretrieve(url
, tmpfile
)
257 print "Unzipping %s to %s..." % (tmpfile
, state
.lines_data_path())
258 z
= zipfile
.ZipFile(tmpfile
)
259 z
.extractall(state
.lines_data_path())
261 # That we don't care about.
264 # But we always clean up after ourselves.
265 print "Removing %s..." % tmpfile
266 FileUtils
.rm_f(tmpfile
)