]> gitweb.michael.orlitzky.com - dead/census-tools.git/blob - src/SummaryFile1.py
bfc62582f2d80db6c9b96f6d57b8386838d4e065
[dead/census-tools.git] / src / SummaryFile1.py
1 import os
2
3 import GPS
4 import StringUtils
5
6
7 class RecordError(StandardError):
8 pass
9
10 class GeoRecord:
11 """
12 This class wraps one record in an SF1 geo file.
13 """
14
15 MINIMUM_LINE_LENGTH = 400
16
17
18 class Block:
19 """
20 Represents a block (which is a special case of a GeoRecord).
21 There are some convenience methods tacked on to make computation
22 and querying easier.
23 """
24
25 def __init__(self, geo_record):
26 """
27 We initialize from a GeoRecord object. It is important that
28 we raise some kind of error if there is no 'block' field, since
29 that means we weren't passed a block.
30 """
31 if not (StringUtils.is_integer(geo_record.block)):
32 raise RecordError('GeoRecord object does not represent a block.')
33
34 # These need to be stored as strings so they don't
35 # affect the block_identifier() generation.
36 self.state = geo_record.state
37 self.county = geo_record.county
38 self.tract = geo_record.tract
39 self.block = geo_record.block
40
41 # All of these int/float conversions will throw a ValueError
42 # if the input string cannot be converted o the specified
43 # type.
44 self.pop100 = int(geo_record.pop100)
45 self.arealand = float(geo_record.arealand)
46 self.areawatr = float(geo_record.areawatr)
47
48 self.coordinates = GPS.Coordinates()
49 self.coordinates.latitude = float(geo_record.intptlat)
50 self.coordinates.longitude = float(geo_record.intptlon)
51
52
53 def tiger_blkidfp00(self):
54 # From the Tiger/Line shapefile documentation:
55 #
56 # Current block identifier; a concatenation of Census 2000
57 # state FIPS code, Census 2000 county FIPS code, Census
58 # BLKIDFP 16 String 2000 census tract code, Census 2000
59 # tabulation block number, and current block suffix 1.
60 #
61 return (self.state +
62 self.county +
63 self.tract +
64 self.block)
65
66
67 def total_area(self):
68 return (self.arealand + self.areawatr)
69
70
71 def population_density(self):
72 # There are some unusual cases where a block will have a
73 # total area of zero. It also seems that these unusual blocks
74 # do in fact posess geometries, provided in the Tiger database.
75 # Therefore, we allow them to be parsed.
76 #
77 # The choice to assign these blocks an average density of 0
78 # was arbitrary.
79 #
80 if (self.total_area() == 0):
81 return 0
82 else:
83 return (self.pop100 / self.total_area())
84
85
86
87 class GeoRecordParser:
88
89 def parse_file(self, path):
90 """
91 Assuming that path refers to an SF1 (geo) file, parse the
92 geographic header records contained within it. Return a list
93 of GeoRecord objects.
94 """
95
96 # Our list of GeoRecord objects to return. Empty at first.
97 records = []
98
99 f = open(path, 'r')
100
101 for line in f:
102 record = self.parse_line(line)
103 records.append(record)
104
105 f.close()
106
107 return records
108
109
110 def parse_blocks(self, path):
111 """Parse only the blocks from a geo file."""
112 blocks = []
113 records = self.parse_file(path)
114
115 for record in records:
116 try:
117 block = Block(record)
118 blocks.append(block)
119 except RecordError:
120 # Ain't a block.
121 continue
122 except ValueError:
123 # A value couldn't be converted to the appropriate type.
124 continue
125
126 return blocks
127
128
129
130 def parse_line(self, line):
131 """
132 Parse one line of an SF1 geo file. Hopefully, the input will
133 match the specification. We can check the line length here, or
134 allow the GeoRecord class to parse the data meaningfully and
135 throw an error if something doesn't look right.
136 """
137 if (len(line) < GeoRecord.MINIMUM_LINE_LENGTH):
138 raise RecordError("The input line is too short. The SF1 specification requires a line length of %d characters; this line contains only %d characters" % (GeoRecord.MINIMUM_LINE_LENGTH, len(line)))
139
140 record = GeoRecord()
141
142 # Note that Python list indexes are zero-based, whereas the SF1
143 # specification gives the field offsets as one-based. For example,
144 # the first field, "File Identification," is defined as beginning
145 # at position 1, and having length 6. The following line corresponds
146 # to this definition.
147 record.fileid = line[0:6]
148
149 # State / US Abbreviation (USPS)
150 record.stusab = line[6:8]
151
152 # Summary Level
153 record.sumlev = line[8:11]
154
155 # Geographic Component
156 record.geocomp = line[11:13]
157
158 # Characteristic Iteration
159 record.chariter = line[13:16]
160
161 # Characteristic Iteration File Sequence Number
162 record.cifsn = line[16:18]
163
164 # Logical Record Number
165 record.logrecno = line[18:25]
166
167 # Region
168 record.region = line[25]
169
170 # Division
171 record.division = line[26]
172
173 # State (Census)
174 record.statece = line[27:29]
175
176 # State (FIPS)
177 record.state = line[29:31]
178
179 # County
180 record.county = line[31:34]
181
182 # County Size Code
183 record.countysc = line[34:36]
184
185 # County Subdivision (FIPS)
186 record.cousub = line[36:41]
187
188 # FIPS County Subdivision Class Code
189 record.cousubcc = line[41:43]
190
191 # County Subdivision Size Code
192 record.cousubsc = line[43:45]
193
194 # Place (FIPS)
195 record.place = line[45:50]
196
197 # FIPS Place Class Code
198 record.placecc = line[50:52]
199
200 # Place Description Code
201 record.placedc = line[52]
202
203 # Place Size Code
204 record.placesc = line[53:55]
205
206 # Census Tract
207 record.tract = line[55:61]
208
209 # Block Group
210 record.blkgrp = line[61]
211
212 # Block
213 record.block = line[62:66]
214
215 # Internal Use Code
216 record.iuc = line[66:68]
217
218 # Consolidated City (FIPS)
219 record.concit = line[68:71]
220
221 # FIPS Consolidated City Class Code
222 record.concitcc = line[73:75]
223
224 # Consolidated City Size Code
225 record.concitsc = line[75:77]
226
227 # American Indian Area/Alaska Native Area/Hawaiian Home Land
228 # (Census)
229 record.aianhh = line[77:81]
230
231 # American Indian Area/Alaska Native Area/Hawaiian Home Land
232 # (FIPS)
233 record.aianhhfp = line[81:86]
234
235 # FIPS American Indian Area/Alaska Native Area/Hawaiian Home
236 # Land Class Code
237 record.aianhhcc = line[86:88]
238
239 # American Indian Trust Land/Hawaiian Home Land Indicator
240 record.aihhtli = line[88]
241
242 # American Indian Tribal Subdivision (Census)
243 record.aitsce = line[89:92]
244
245 # American Indian Tribal Subdivision (FIPS)
246 record.aits = line[92:97]
247
248 # FIPS American Indian Tribal Subdivision Class Code
249 record.aitscc = line[97:99]
250
251 # Alaska Native Regional Corporation (FIPS)
252 record.anrc = line[99:104]
253
254 # FIPS Alaska Native Regional Corporation Class Code
255 record.anrccc = line[104:106]
256
257 # Metropolitan Statistical Area/Consolidated Metropolitan
258 # Statistical Area
259 record.msacmsa = line[106:110]
260
261 # MSA/CMSA Size Code
262 record.masc = line[110:112]
263
264 # Consolidated Metropolitan Statistical Area
265 record.cmsa = line[112:114]
266
267 # Metropolitan Area Central City Indicator
268 record.macci = line[114]
269
270 # Primary Metropolitan Statistical Area
271 record.pmsa = line[115:119]
272
273 # New England County Metropolitan Area
274 record.necma = line[119:123]
275
276 # New England County Metropolitan Area Central City Indicator
277 record.necmacci = line[123]
278
279 # New England County Metropolitan Area Size Code
280 record.necmasc = line[124:126]
281
282 # Extended Place Indicator
283 record.exi = line[126]
284
285 # Urban Area
286 record.ua = line[127:132]
287
288 # Urban Area Size Code
289 record.uasc = line[132:134]
290
291 # Urban Area Type
292 record.ustype = line[134]
293
294 # Urban/Rural
295 record.ur = line[135]
296
297 # Congressional District (106th)
298 record.cd106 = line[136:138]
299
300 # Congressional District (108th)
301 record.cd108 = line[138:140]
302
303 # Congressional District (109th)
304 record.cd109 = line[140:142]
305
306 # Congressional District (110th)
307 record.cd110 = line[142:144]
308
309 # State Legislative District (Upper Chamber)
310 record.sldu = line[144:147]
311
312 # State Legislative District (Lower Chamber)
313 record.sldl = line[147:150]
314
315 # Voting District
316 record.vtd = line[150:156]
317
318 # Voting District Indicator
319 record.vtdi = line[156]
320
321 # ZIP Code Tabulation Area (3 digit)
322 record.zcta3 = line[157:160]
323
324 # ZIP Code Tabulation Area (5 digit)
325 record.zcta5 = line[160:165]
326
327 # Subbarrio (FIPS)
328 record.submcd = line[165:170]
329
330 # FIPS Subbarrio Class Code
331 record.submcdcc = line[170:172]
332
333 # Area (Land)
334 record.arealand = line[172:186]
335
336 # Area (Water)
337 record.areawatr = line[186:200]
338
339 # Area Name - Legal/Statistical
340 # Area Description (LSAD)
341 # Term - Part Indicator
342 record.name = line[200:290]
343
344 # Functional Status Code
345 record.funcstat = line[290]
346
347 # Geographic Change User Note Indicator
348 record.gcuni = line[291]
349
350 # Population Count (100%)
351 record.pop100 = line[292:301]
352
353 # Housing Unit Count (100%)
354 record.hu100 = line[301:310]
355
356 # Internal Point (Latitude)
357 record.intptlat = line[310:319]
358
359 # Internal Point (Longitude)
360 record.intptlon = line[319:329]
361
362 # Legal/Statistical Area Description Code
363 record.lsadc = line[329:331]
364
365 # Part Flag
366 record.partflag = line[331]
367
368 # School District (Elementary)
369 record.sdelm = line[332:337]
370
371 # School District (Secondary)
372 record.sdsec = line[337:342]
373
374 # School District (Unified)
375 record.sduni = line[342:347]
376
377 # Traffic Analysis Zone
378 record.taz = line[347:353]
379
380 # Oregon Urban Growth Area
381 record.uga = line[353:358]
382
383 # Public Use Microdata Area - 5% File
384 record.puma5 = line[358:363]
385
386 # Public Use Microdata Area - 1% File
387 record.puma1 = line[363:368]
388
389 # Reserved
390 record.reserve2 = line[368:383]
391
392 # Metropolitan Area Central City
393 record.macc = line[383:388]
394
395 # Urban Area Central Place
396 record.uacp = line[388:393]
397
398 # Reserved
399 record.reserved = line[393:400]
400
401
402 return record
403
404
405
406 def FindClosestBlock(blocks, target_coords):
407 """
408 Find the closest block (from within blocks) to the GPS
409 coordinates given by target_coords.
410 """
411
412 # Empty by default. Hopefully we're passed some blocks.
413 closest_block = None
414 min_distance = 999999999.0 # Don't look at me like that.
415
416 for block in blocks:
417 this_distance = GPS.CalculateDistance(target_coords, block.coordinates)
418 if (this_distance < min_distance):
419 closest_block = block
420 min_distance = this_distance
421
422 return closest_block
423
424
425
426 def FindAveragePopulationDensity(coords, geo_file_path):
427 grp = GeoRecordParser()
428 blocks = grp.parse_blocks(geo_file_path)
429 closest_block = FindClosestBlock(blocks, coords)
430
431 return closest_block.population_density()