]> gitweb.michael.orlitzky.com - dead/census-tools.git/blob - src/SummaryFile1.py
3c82186474108e72908d3d3ded341d48470c1409
[dead/census-tools.git] / src / SummaryFile1.py
1 import os, GPS, inspect
2
3 class RecordError(StandardError):
4 pass
5
6 class InvalidAreaError(StandardError):
7 pass
8
9 class GeoRecord:
10 """
11 This class wraps one record in an SF1 geo file.
12 """
13
14 MinimumLineLength = 400
15
16
17 class Block:
18 """
19 Represents a block (which is a special case of a GeoRecord.
20 All we care about here is the block number, population,
21 area, and coordinates.
22 """
23
24 def __init__(self, geo_record):
25 """We initialize from a GeoRecord object"""
26 # All of these int/float conversions will throw a ValueError
27 # if the input string cannot be converted o the specified
28 # type.
29 self.block_number = int(geo_record.block)
30 self.tract_number = int(geo_record.tract)
31 self.population = int(geo_record.pop100)
32 self.area_land = float(geo_record.arealand)
33 self.area_water = float(geo_record.areawatr)
34
35 self.coordinates = GPS.Coordinates()
36 self.coordinates.latitude = float(geo_record.intptlat)
37 self.coordinates.longitude = float(geo_record.intptlon)
38
39 if (self.total_area() == 0):
40 raise InvalidAreaError('A block may not have zero area.')
41
42
43 def total_area(self):
44 return (self.area_land + self.area_water)
45
46
47 def population_density(self):
48 return (self.population / self.total_area())
49
50
51 class GeoRecordParser:
52
53 def parse_file(self, path):
54 """
55 Assuming that path refers to an SF1 (geo) file, parse the
56 geographic header records contained within it. Return a list
57 of GeoRecord objects.
58 """
59
60 # Our list of GeoRecord objects to return. Empty at first.
61 records = []
62
63 f = open(path, 'r')
64
65 for line in f:
66 record = self.parse_line(line)
67 records.append(record)
68
69 f.close()
70
71 return records
72
73
74 def parse_blocks(self, path):
75 """Parse only the blocks from a geo file."""
76 blocks = []
77 records = self.parse_file(path)
78
79 for record in records:
80 try:
81 block = Block(record)
82 blocks.append(block)
83 except ValueError:
84 # A value couldn't be converted to the appropriate type.
85 continue
86 except InvalidAreaError:
87 # Something is funny with the geometry.
88 continue
89
90 return blocks
91
92
93
94 def parse_line(self, line):
95 """
96 Parse one line of an SF1 geo file. Hopefully, the input will
97 match the specification. We can check the line length here, or
98 allow the GeoRecord class to parse the data meaningfully and
99 throw an error if something doesn't look right.
100 """
101 if (len(line) < GeoRecord.MinimumLineLength):
102 raise RecordError("The input line is too short. The SF1 specification requires a line length of %d characters; this line contains only %d characters" % (GeoRecord.MinimumLineLength, len(line)))
103
104 record = GeoRecord()
105
106 # Note that Python list indexes are zero-based, whereas the SF1
107 # specification gives the field offsets as one-based. For example,
108 # the first field, "File Identification," is defined as beginning
109 # at position 1, and having length 6. The following line corresponds
110 # to this definition.
111 record.fileid = line[0:6]
112
113 # State / US Abbreviation (USPS)
114 record.stusab = line[6:8]
115
116 # Summary Level
117 record.sumlev = line[8:11]
118
119 # Geographic Component
120 record.geocomp = line[11:13]
121
122 # Characteristic Iteration
123 record.chariter = line[13:16]
124
125 # Characteristic Iteration File Sequence Number
126 record.cifsn = line[16:18]
127
128 # Logical Record Number
129 record.logrecno = line[18:25]
130
131 # Region
132 record.region = line[25]
133
134 # Division
135 record.division = line[26]
136
137 # State (Census)
138 record.statece = line[27:29]
139
140 # State (FIPS)
141 record.state = line[29:31]
142
143 # County
144 record.county = line[31:34]
145
146 # County Size Code
147 record.countysc = line[34:36]
148
149 # County Subdivision (FIPS)
150 record.cousub = line[36:41]
151
152 # FIPS County Subdivision Class Code
153 record.cousubcc = line[41:43]
154
155 # County Subdivision Size Code
156 record.cousubsc = line[43:45]
157
158 # Place (FIPS)
159 record.place = line[45:50]
160
161 # FIPS Place Class Code
162 record.placecc = line[50:52]
163
164 # Place Description Code
165 record.placedc = line[52]
166
167 # Place Size Code
168 record.placesc = line[53:55]
169
170 # Census Tract
171 record.tract = line[55:61]
172
173 # Block Group
174 record.blkgrp = line[61]
175
176 # Block
177 record.block = line[62:66]
178
179 # Internal Use Code
180 record.iuc = line[66:68]
181
182 # Consolidated City (FIPS)
183 record.concit = line[68:71]
184
185 # FIPS Consolidated City Class Code
186 record.concitcc = line[73:75]
187
188 # Consolidated City Size Code
189 record.concitsc = line[75:77]
190
191 # American Indian Area/Alaska Native Area/Hawaiian Home Land
192 # (Census)
193 record.aianhh = line[77:81]
194
195 # American Indian Area/Alaska Native Area/Hawaiian Home Land
196 # (FIPS)
197 record.aianhhfp = line[81:86]
198
199 # FIPS American Indian Area/Alaska Native Area/Hawaiian Home
200 # Land Class Code
201 record.aianhhcc = line[86:88]
202
203 # American Indian Trust Land/Hawaiian Home Land Indicator
204 record.aihhtli = line[88]
205
206 # American Indian Tribal Subdivision (Census)
207 record.aitsce = line[89:92]
208
209 # American Indian Tribal Subdivision (FIPS)
210 record.aits = line[92:97]
211
212 # FIPS American Indian Tribal Subdivision Class Code
213 record.aitscc = line[97:99]
214
215 # Alaska Native Regional Corporation (FIPS)
216 record.anrc = line[99:104]
217
218 # FIPS Alaska Native Regional Corporation Class Code
219 record.anrccc = line[104:106]
220
221 # Metropolitan Statistical Area/Consolidated Metropolitan
222 # Statistical Area
223 record.msacmsa = line[106:110]
224
225 # MSA/CMSA Size Code
226 record.masc = line[110:112]
227
228 # Consolidated Metropolitan Statistical Area
229 record.cmsa = line[112:114]
230
231 # Metropolitan Area Central City Indicator
232 record.macci = line[114]
233
234 # Primary Metropolitan Statistical Area
235 record.pmsa = line[115:119]
236
237 # New England County Metropolitan Area
238 record.necma = line[119:123]
239
240 # New England County Metropolitan Area Central City Indicator
241 record.necmacci = line[123]
242
243 # New England County Metropolitan Area Size Code
244 record.necmasc = line[124:126]
245
246 # Extended Place Indicator
247 record.exi = line[126]
248
249 # Urban Area
250 record.ua = line[127:132]
251
252 # Urban Area Size Code
253 record.uasc = line[132:134]
254
255 # Urban Area Type
256 record.ustype = line[134]
257
258 # Urban/Rural
259 record.ur = line[135]
260
261 # Congressional District (106th)
262 record.cd106 = line[136:138]
263
264 # Congressional District (108th)
265 record.cd108 = line[138:140]
266
267 # Congressional District (109th)
268 record.cd109 = line[140:142]
269
270 # Congressional District (110th)
271 record.cd110 = line[142:144]
272
273 # State Legislative District (Upper Chamber)
274 record.sldu = line[144:147]
275
276 # State Legislative District (Lower Chamber)
277 record.sldl = line[147:150]
278
279 # Voting District
280 record.vtd = line[150:156]
281
282 # Voting District Indicator
283 record.vtdi = line[156]
284
285 # ZIP Code Tabulation Area (3 digit)
286 record.zcta3 = line[157:160]
287
288 # ZIP Code Tabulation Area (5 digit)
289 record.zcta5 = line[160:165]
290
291 # Subbarrio (FIPS)
292 record.submcd = line[165:170]
293
294 # FIPS Subbarrio Class Code
295 record.submcdcc = line[170:172]
296
297 # Area (Land)
298 record.arealand = line[172:186]
299
300 # Area (Water)
301 record.areawatr = line[186:200]
302
303 # Area Name - Legal/Statistical
304 # Area Description (LSAD)
305 # Term - Part Indicator
306 record.name = line[200:290]
307
308 # Functional Status Code
309 record.funcstat = line[290]
310
311 # Geographic Change User Note Indicator
312 record.gcuni = line[291]
313
314 # Population Count (100%)
315 record.pop100 = line[292:301]
316
317 # Housing Unit Count (100%)
318 record.hu100 = line[301:310]
319
320 # Internal Point (Latitude)
321 record.intptlat = line[310:319]
322
323 # Internal Point (Longitude)
324 record.intptlon = line[319:329]
325
326 # Legal/Statistical Area Description Code
327 record.lsadc = line[329:331]
328
329 # Part Flag
330 record.partflag = line[331]
331
332 # School District (Elementary)
333 record.sdelm = line[332:337]
334
335 # School District (Secondary)
336 record.sdsec = line[337:342]
337
338 # School District (Unified)
339 record.sduni = line[342:347]
340
341 # Traffic Analysis Zone
342 record.taz = line[347:353]
343
344 # Oregon Urban Growth Area
345 record.uga = line[353:358]
346
347 # Public Use Microdata Area - 5% File
348 record.puma5 = line[358:363]
349
350 # Public Use Microdata Area - 1% File
351 record.puma1 = line[363:368]
352
353 # Reserved
354 record.reserve2 = line[368:383]
355
356 # Metropolitan Area Central City
357 record.macc = line[383:388]
358
359 # Urban Area Central Place
360 record.uacp = line[388:393]
361
362 # Reserved
363 record.reserved = line[393:400]
364
365
366 return record
367
368
369
370 def FindClosestBlock(blocks, target_coords):
371 """
372 Find the closest block (from within blocks) to the GPS
373 coordinates given by target_coords.
374 """
375
376 # Empty by default. Hopefully we're passed some blocks.
377 closest_block = None
378 min_distance = 999999999.0 # Don't look at me like that.
379
380 for block in blocks:
381 this_distance = GPS.CalculateDistance(target_coords, block.coordinates)
382 if (this_distance < min_distance):
383 closest_block = block
384 min_distance = this_distance
385
386 return closest_block
387
388
389
390 def FindAveragePopulationDensity(coords, geo_file_path):
391 grp = GeoRecordParser()
392 blocks = grp.parse_blocks(geo_file_path)
393 closest_block = FindClosestBlock(blocks, coords)
394
395 return closest_block.population_density()