]> gitweb.michael.orlitzky.com - dead/census-tools.git/blob - src/Data.py
Fixed a method name in the download_lines function.
[dead/census-tools.git] / src / Data.py
1 """
2 Classes for working with (downloading, importing) the online census
3 data.
4 """
5
6 import os
7 import urllib
8 import zipfile
9
10 import FileUtils
11
12
13 class State:
14 """
15 A state contains zero or more counties and cities. Each state has
16 its own ID, as well as its own directory on the server.
17
18 Example:
19
20 SF1: http://www2.census.gov/census_2000/datasets/Summary_File_1/Maryland
21 TIGER: http://www2.census.gov/geo/tiger/TIGER2009/24_MARYLAND
22
23 """
24
25 SF1_ROOT = 'http://www2.census.gov/census_2000/datasets/Summary_File_1'
26 TIGER_ROOT = 'http://www2.census.gov/geo/tiger/TIGER2009'
27
28 def __init__(self, initial_id=None, initial_name=None, abbreviation = None):
29 self.id = initial_id
30 self.abbreviation = abbreviation
31 self.name = initial_name
32 self.counties = []
33
34
35 def sf1_data_root(self):
36 sdr = self.SF1_ROOT + '/'
37 sdr += self.name.replace(' ', '_')
38 return sdr
39
40
41 def tiger_data_root(self):
42 tdr = self.TIGER_ROOT + '/'
43 tdr += str(self.id) + '_' + self.name.upper().replace(' ', '_')
44 return tdr
45
46
47 def sf1_data_url(self):
48 sdu = self.sf1_data_root() + '/'
49 sdu += self.sf1_zipfile_name()
50 return sdu
51
52
53 def blocks_data_url(self):
54 bdu = self.tiger_data_root() + '/'
55 bdu += self.blocks_zipfile_name()
56 return bdu
57
58
59 def sf1_data_path(self):
60 sdp = 'data/census2000/'
61 sdp += self.name.lower().replace(' ', '_')
62 sdp += '/sf1'
63 return sdp
64
65
66 def blocks_data_path(self):
67 bdp = 'data/census2000/'
68 bdp += self.name.lower().replace(' ', '_')
69 bdp += '/blocks'
70 return bdp
71
72
73 def lines_data_path(self):
74 ldp = 'data/census2000/'
75 ldp += self.name.lower().replace(' ', '_')
76 ldp += '/lines'
77 return ldp
78
79
80 def sf1_zipfile_name(self):
81 return self.abbreviation.lower() + 'geo_uf1.zip'
82
83
84 def blocks_zipfile_name(self):
85 return 'tl_2009_' + str(self.id) + '_tabblock00.zip'
86
87
88 def sf1_geo_file_path(self):
89 sgfp = self.sf1_data_path() + '/'
90 sgfp += self.abbreviation.lower() + 'geo.uf1'
91 return sgfp
92
93
94 def blocks_shapefile_path(self):
95 bsp = self.blocks_data_path() + '/'
96 bsp += 'tl_2009_' + str(self.id) + '_tabblock00.shp'
97 return bsp
98
99
100 def add_county(self, county_id, county_name, override_name=False):
101 """
102 We would like each county to have a pointer to its containing
103 state. This so we can compute the file URL, directory, and so
104 forth from within the county.
105 """
106 self.counties.append(County(county_id,
107 county_name,
108 self,
109 override_name))
110
111
112
113 class County:
114 """
115 A county represents either a county or city. It doesn't make
116 sense, but 'county-level' data is given for certain cities which
117 don't technically belong to any county.
118 """
119
120 def __init__(self, initial_id=None,
121 initial_name=None,
122 initial_state=None,
123 override_name=False):
124 """
125 If this is a city, we should override our name with
126 e.g. 'Baltimore city' so that full_name() doesn't transform
127 'Baltimore' in to 'Baltmore County'.
128 """
129 self.id = initial_id
130 self.name = initial_name
131 self.state = initial_state
132 self.override_name = override_name
133
134
135 def state_county_id(self):
136 return str(self.state.id) + ("%03d" % self.id)
137
138
139 def full_name(self):
140 """
141 Some of the counties (e.g. Baltimore City, Washington D.C.),
142 need to have their names overridden since they aren't
143 technically counties, but are treated as such by the Census.
144 """
145 if (self.override_name == False):
146 return self.name + ' County'
147 else:
148 # "Override name" basically means, "use the name I passed
149 # you and don't add the word 'County' on to it."
150 return self.name
151
152
153 def lines_data_url(self):
154 tdp = self.state.tiger_data_root() + '/'
155 tdp += self.state_county_id()
156 tdp += '_' + self.full_name().replace(' ', '_') + '/'
157 tdp += self.lines_zipfile_name()
158 return tdp
159
160
161 def lines_zipfile_name(self):
162 return 'tl_2009_' + self.state_county_id() + '_edges.zip'
163
164
165 def lines_shapefile_path(self):
166 sfp = self.state.lines_data_path() + '/'
167 sfp += 'tl_2009_' + self.state_county_id() + '_edges.shp'
168 return sfp
169
170
171
172 def download_sf1(states):
173 """
174 Download the Summary File 1 geo file for each state.
175 """
176
177 for state in states:
178 # First, create the blocks data path if it doesn't exist.
179 FileUtils.mkdir_p(state.sf1_data_path(), 0755)
180
181 if not os.path.exists(state.sf1_geo_file_path()):
182 url = state.sf1_data_url()
183 tmpfile = state.sf1_zipfile_name()
184 print "Grabbing SF1 data for %s." % state.name
185 print "Downloading %s to %s..." % (url, tmpfile)
186
187 try:
188 # This can fail for a bunch of reasons...
189 urllib.urlretrieve(url, tmpfile)
190 print "Unzipping %s to %s..." % (tmpfile, state.sf1_data_path())
191 z = zipfile.ZipFile(tmpfile)
192 z.extractall(state.sf1_data_path())
193 except:
194 # That we don't care about.
195 pass
196 finally:
197 # But we always clean up after ourselves.
198 print "Removing %s..." % tmpfile
199 FileUtils.rm_f(tmpfile)
200 print "Done.\n"
201
202
203
204 def download_blocks(states):
205 """
206 Download the TIGER/Line block files for each state.
207 """
208
209 for state in states:
210 # First, create the blocks data path if it doesn't exist.
211 FileUtils.mkdir_p(state.blocks_data_path(), 0755)
212
213 if not os.path.exists(state.blocks_shapefile_path()):
214 url = state.blocks_data_url()
215 tmpfile = state.blocks_zipfile_name()
216 print "Grabbing TIGER blocks data for %s." % state.name
217 print "Downloading %s to %s..." % (url, tmpfile)
218
219 try:
220 # This can fail for a bunch of reasons...
221 urllib.urlretrieve(url, tmpfile)
222 print "Unzipping %s to %s..." % (tmpfile, state.blocks_data_path())
223 z = zipfile.ZipFile(tmpfile)
224 z.extractall(state.blocks_data_path())
225 except:
226 # That we don't care about.
227 pass
228 finally:
229 # But we always clean up after ourselves.
230 print "Removing %s..." % tmpfile
231 FileUtils.rm_f(tmpfile)
232 print "Done.\n"
233
234
235
236 def download_lines(states):
237 """
238 Download the TIGER/Line 'all lines' files for each county in states.
239 """
240
241 for state in states:
242 # First, create the lines data path if it doesn't exist.
243 FileUtils.mkdir_p(state.lines_data_path(), 0755)
244
245 # Now loop through the counties, and download/unzip the lines
246 # data if necessary.
247 for county in state.counties:
248 if not os.path.exists(county.lines_shapefile_path()):
249 url = county.lines_data_url()
250 tmpfile = county.lines_zipfile_name()
251 print "Grabbing TIGER lines data for %s (%s)." % (county.full_name(), state.name)
252 print "Downloading %s to %s..." % (url, tmpfile)
253
254 try:
255 # This can fail for a bunch of reasons...
256 urllib.urlretrieve(url, tmpfile)
257 print "Unzipping %s to %s..." % (tmpfile, state.lines_data_path())
258 z = zipfile.ZipFile(tmpfile)
259 z.extractall(state.lines_data_path())
260 except:
261 # That we don't care about.
262 pass
263 finally:
264 # But we always clean up after ourselves.
265 print "Removing %s..." % tmpfile
266 FileUtils.rm_f(tmpfile)
267 print "Done.\n"