]> gitweb.michael.orlitzky.com - dead/census-tools.git/blob - src/Data.py
e08f4666788478991057996776d0ff051c6f8739
[dead/census-tools.git] / src / Data.py
1 """
2 Classes for working with (downloading, importing) the online census
3 data.
4 """
5
6 import os
7 import urllib
8 import zipfile
9
10 import FileUtils
11
12
13 class State:
14 """
15 A state contains zero or more counties and cities. Each state has
16 its own ID, as well as its own directory on the server.
17
18 Example:
19
20 http://www2.census.gov/geo/tiger/TIGER2009/24_MARYLAND
21
22 """
23
24 TIGER_ROOT = 'http://www2.census.gov/geo/tiger/TIGER2009'
25
26 def __init__(self, initial_id=None, initial_name=None):
27 self.id = initial_id
28 self.name = initial_name
29 self.counties = []
30
31
32 def tiger_data_root(self):
33 tdr = self.TIGER_ROOT + '/'
34 tdr += str(self.id) + '_' + self.name.upper().replace(' ', '_')
35 return tdr
36
37
38 def blocks_data_url(self):
39 bdu = self.tiger_data_root() + '/'
40 bdu += self.blocks_zipfile_name()
41 return bdu
42
43
44 def blocks_data_path(self):
45 bdp = 'data/census2000/'
46 bdp += self.name.lower().replace(' ', '_')
47 bdp += '/blocks'
48 return bdp
49
50
51 def lines_data_path(self):
52 ldp = 'data/census2000/'
53 ldp += self.name.lower().replace(' ', '_')
54 ldp += '/lines'
55 return ldp
56
57
58 def blocks_zipfile_name(self):
59 return 'tl_2009_' + str(self.id) + '_tabblock00.zip'
60
61
62 def blocks_shapefile_path(self):
63 bsp = self.blocks_data_path() + '/'
64 bsp += 'tl_2009_' + str(self.id) + '_tabblock00.shp'
65 return bsp
66
67
68 def add_county(self, county_id, county_name, override_name=False):
69 """
70 We would like each county to have a pointer to its containing
71 state. This so we can compute the file URL, directory, and so
72 forth from within the county.
73 """
74 self.counties.append(County(county_id,
75 county_name,
76 self,
77 override_name))
78
79
80
81 class County:
82 """
83 A county represents either a county or city. It doesn't make
84 sense, but 'county-level' data is given for certain cities which
85 don't technically belong to any county.
86 """
87
88 def __init__(self, initial_id=None,
89 initial_name=None,
90 initial_state=None,
91 override_name=False):
92 """
93 If this is a city, we should override our name with
94 e.g. 'Baltimore city' so that full_name() doesn't transform
95 'Baltimore' in to 'Baltmore County'.
96 """
97 self.id = initial_id
98 self.name = initial_name
99 self.state = initial_state
100 self.override_name = override_name
101
102
103 def state_county_id(self):
104 return str(self.state.id) + ("%03d" % self.id)
105
106
107 def full_name(self):
108 """
109 Some of the counties (e.g. Baltimore City, Washington D.C.),
110 need to have their names overridden since they aren't
111 technically counties, but are treated as such by the Census.
112 """
113 if (self.override_name == False):
114 return self.name + ' County'
115 else:
116 # "Override name" basically means, "use the name I passed
117 # you and don't add the word 'County' on to it."
118 return self.name
119
120
121 def lines_data_url(self):
122 tdp = self.state.tiger_data_root() + '/'
123 tdp += self.state_county_id()
124 tdp += '_' + self.full_name().replace(' ', '_') + '/'
125 tdp += self.lines_zipfile_name()
126 return tdp
127
128
129 def lines_zipfile_name(self):
130 return 'tl_2009_' + self.state_county_id() + '_edges.zip'
131
132
133 def lines_shapefile_path(self):
134 sfp = self.state.lines_data_path() + '/'
135 sfp += 'tl_2009_' + self.state_county_id() + '_edges.shp'
136 return sfp
137
138
139
140 def download_blocks(states):
141 """
142 Download the TIGER/Line block files for each state.
143 """
144
145 for state in states:
146 # First, create the blocks data path if it doesn't exist.
147 FileUtils.mkdir_p(state.blocks_data_path(), 0755)
148
149 if not os.path.exists(state.blocks_shapefile_path()):
150 url = state.blocks_data_url()
151 tmpfile = state.blocks_zipfile_name()
152 print "Grabbing data for %s." % state.name
153 print "Downloading %s to %s..." % (url, tmpfile)
154
155 try:
156 # This can fail for a bunch of reasons...
157 urllib.urlretrieve(url, tmpfile)
158 print "Unzipping %s to %s..." % (tmpfile, state.blocks_data_path())
159 z = zipfile.ZipFile(tmpfile)
160 z.extractall(state.blocks_data_path())
161 except:
162 # That we don't care about.
163 pass
164 finally:
165 # But we always clean up after ourselves.
166 print "Removing %s..." % tmpfile
167 FileUtils.rm_f(tmpfile)
168 print "Done.\n"
169
170
171
172 def download_lines(states):
173 """
174 Download the TIGER/Line 'all lines' files for each county in states.
175 """
176
177 for state in states:
178 # First, create the lines data path if it doesn't exist.
179 FileUtils.mkdir_p(state.lines_data_path(), 0755)
180
181 # Now loop through the counties, and download/unzip the lines
182 # data if necessary.
183 for county in state.counties:
184 if not os.path.exists(county.lines_shapefile_path()):
185 url = county.tiger_data_url()
186 tmpfile = county.lines_zipfile_name()
187 print "Grabbing data for %s (%s)." % (county.full_name(), state.name)
188 print "Downloading %s to %s..." % (url, tmpfile)
189
190 try:
191 # This can fail for a bunch of reasons...
192 urllib.urlretrieve(url, tmpfile)
193 print "Unzipping %s to %s..." % (tmpfile, state.lines_data_path())
194 z = zipfile.ZipFile(tmpfile)
195 z.extractall(state.lines_data_path())
196 except:
197 # That we don't care about.
198 pass
199 finally:
200 # But we always clean up after ourselves.
201 print "Removing %s..." % tmpfile
202 FileUtils.rm_f(tmpfile)
203 print "Done.\n"