]> gitweb.michael.orlitzky.com - dead/census-tools.git/blob - src/SummaryFile1.py
0ad410d0dc4152b6642d96fb10c3b9647e466c3c
[dead/census-tools.git] / src / SummaryFile1.py
1 import os
2
3 import GPS
4 import StringUtils
5
6
7 class RecordError(StandardError):
8 pass
9
10 class GeoRecord:
11 """
12 This class wraps one record in an SF1 geo file.
13 """
14
15 MINIMUM_LINE_LENGTH = 400
16
17
18 class Block:
19 """
20 Represents a block (which is a special case of a GeoRecord.
21 All we care about here is the block number, population,
22 area, and coordinates.
23 """
24
25 def __init__(self, geo_record):
26 """We initialize from a GeoRecord object"""
27 # All of these int/float conversions will throw a ValueError
28 # if the input string cannot be converted o the specified
29 # type.
30 self.block_number = int(geo_record.block)
31 self.tract_number = int(geo_record.tract)
32 self.population = int(geo_record.pop100)
33 self.area_land = float(geo_record.arealand)
34 self.area_water = float(geo_record.areawatr)
35
36 self.coordinates = GPS.Coordinates()
37 self.coordinates.latitude = float(geo_record.intptlat)
38 self.coordinates.longitude = float(geo_record.intptlon)
39
40 if (self.total_area() == 0):
41 raise InvalidAreaError('A block may not have zero area.')
42
43
44 def total_area(self):
45 return (self.area_land + self.area_water)
46
47
48 def population_density(self):
49 return (self.population / self.total_area())
50
51
52 class GeoRecordParser:
53
54 def parse_file(self, path):
55 """
56 Assuming that path refers to an SF1 (geo) file, parse the
57 geographic header records contained within it. Return a list
58 of GeoRecord objects.
59 """
60
61 # Our list of GeoRecord objects to return. Empty at first.
62 records = []
63
64 f = open(path, 'r')
65
66 for line in f:
67 record = self.parse_line(line)
68 records.append(record)
69
70 f.close()
71
72 return records
73
74
75 def parse_blocks(self, path):
76 """Parse only the blocks from a geo file."""
77 blocks = []
78 records = self.parse_file(path)
79
80 for record in records:
81 try:
82 block = Block(record)
83 blocks.append(block)
84 except ValueError:
85 # A value couldn't be converted to the appropriate type.
86 continue
87
88 return blocks
89
90
91
92 def parse_line(self, line):
93 """
94 Parse one line of an SF1 geo file. Hopefully, the input will
95 match the specification. We can check the line length here, or
96 allow the GeoRecord class to parse the data meaningfully and
97 throw an error if something doesn't look right.
98 """
99 if (len(line) < GeoRecord.MINIMUM_LINE_LENGTH):
100 raise RecordError("The input line is too short. The SF1 specification requires a line length of %d characters; this line contains only %d characters" % (GeoRecord.MINIMUM_LINE_LENGTH, len(line)))
101
102 record = GeoRecord()
103
104 # Note that Python list indexes are zero-based, whereas the SF1
105 # specification gives the field offsets as one-based. For example,
106 # the first field, "File Identification," is defined as beginning
107 # at position 1, and having length 6. The following line corresponds
108 # to this definition.
109 record.fileid = line[0:6]
110
111 # State / US Abbreviation (USPS)
112 record.stusab = line[6:8]
113
114 # Summary Level
115 record.sumlev = line[8:11]
116
117 # Geographic Component
118 record.geocomp = line[11:13]
119
120 # Characteristic Iteration
121 record.chariter = line[13:16]
122
123 # Characteristic Iteration File Sequence Number
124 record.cifsn = line[16:18]
125
126 # Logical Record Number
127 record.logrecno = line[18:25]
128
129 # Region
130 record.region = line[25]
131
132 # Division
133 record.division = line[26]
134
135 # State (Census)
136 record.statece = line[27:29]
137
138 # State (FIPS)
139 record.state = line[29:31]
140
141 # County
142 record.county = line[31:34]
143
144 # County Size Code
145 record.countysc = line[34:36]
146
147 # County Subdivision (FIPS)
148 record.cousub = line[36:41]
149
150 # FIPS County Subdivision Class Code
151 record.cousubcc = line[41:43]
152
153 # County Subdivision Size Code
154 record.cousubsc = line[43:45]
155
156 # Place (FIPS)
157 record.place = line[45:50]
158
159 # FIPS Place Class Code
160 record.placecc = line[50:52]
161
162 # Place Description Code
163 record.placedc = line[52]
164
165 # Place Size Code
166 record.placesc = line[53:55]
167
168 # Census Tract
169 record.tract = line[55:61]
170
171 # Block Group
172 record.blkgrp = line[61]
173
174 # Block
175 record.block = line[62:66]
176
177 # Internal Use Code
178 record.iuc = line[66:68]
179
180 # Consolidated City (FIPS)
181 record.concit = line[68:71]
182
183 # FIPS Consolidated City Class Code
184 record.concitcc = line[73:75]
185
186 # Consolidated City Size Code
187 record.concitsc = line[75:77]
188
189 # American Indian Area/Alaska Native Area/Hawaiian Home Land
190 # (Census)
191 record.aianhh = line[77:81]
192
193 # American Indian Area/Alaska Native Area/Hawaiian Home Land
194 # (FIPS)
195 record.aianhhfp = line[81:86]
196
197 # FIPS American Indian Area/Alaska Native Area/Hawaiian Home
198 # Land Class Code
199 record.aianhhcc = line[86:88]
200
201 # American Indian Trust Land/Hawaiian Home Land Indicator
202 record.aihhtli = line[88]
203
204 # American Indian Tribal Subdivision (Census)
205 record.aitsce = line[89:92]
206
207 # American Indian Tribal Subdivision (FIPS)
208 record.aits = line[92:97]
209
210 # FIPS American Indian Tribal Subdivision Class Code
211 record.aitscc = line[97:99]
212
213 # Alaska Native Regional Corporation (FIPS)
214 record.anrc = line[99:104]
215
216 # FIPS Alaska Native Regional Corporation Class Code
217 record.anrccc = line[104:106]
218
219 # Metropolitan Statistical Area/Consolidated Metropolitan
220 # Statistical Area
221 record.msacmsa = line[106:110]
222
223 # MSA/CMSA Size Code
224 record.masc = line[110:112]
225
226 # Consolidated Metropolitan Statistical Area
227 record.cmsa = line[112:114]
228
229 # Metropolitan Area Central City Indicator
230 record.macci = line[114]
231
232 # Primary Metropolitan Statistical Area
233 record.pmsa = line[115:119]
234
235 # New England County Metropolitan Area
236 record.necma = line[119:123]
237
238 # New England County Metropolitan Area Central City Indicator
239 record.necmacci = line[123]
240
241 # New England County Metropolitan Area Size Code
242 record.necmasc = line[124:126]
243
244 # Extended Place Indicator
245 record.exi = line[126]
246
247 # Urban Area
248 record.ua = line[127:132]
249
250 # Urban Area Size Code
251 record.uasc = line[132:134]
252
253 # Urban Area Type
254 record.ustype = line[134]
255
256 # Urban/Rural
257 record.ur = line[135]
258
259 # Congressional District (106th)
260 record.cd106 = line[136:138]
261
262 # Congressional District (108th)
263 record.cd108 = line[138:140]
264
265 # Congressional District (109th)
266 record.cd109 = line[140:142]
267
268 # Congressional District (110th)
269 record.cd110 = line[142:144]
270
271 # State Legislative District (Upper Chamber)
272 record.sldu = line[144:147]
273
274 # State Legislative District (Lower Chamber)
275 record.sldl = line[147:150]
276
277 # Voting District
278 record.vtd = line[150:156]
279
280 # Voting District Indicator
281 record.vtdi = line[156]
282
283 # ZIP Code Tabulation Area (3 digit)
284 record.zcta3 = line[157:160]
285
286 # ZIP Code Tabulation Area (5 digit)
287 record.zcta5 = line[160:165]
288
289 # Subbarrio (FIPS)
290 record.submcd = line[165:170]
291
292 # FIPS Subbarrio Class Code
293 record.submcdcc = line[170:172]
294
295 # Area (Land)
296 record.arealand = line[172:186]
297
298 # Area (Water)
299 record.areawatr = line[186:200]
300
301 # Area Name - Legal/Statistical
302 # Area Description (LSAD)
303 # Term - Part Indicator
304 record.name = line[200:290]
305
306 # Functional Status Code
307 record.funcstat = line[290]
308
309 # Geographic Change User Note Indicator
310 record.gcuni = line[291]
311
312 # Population Count (100%)
313 record.pop100 = line[292:301]
314
315 # Housing Unit Count (100%)
316 record.hu100 = line[301:310]
317
318 # Internal Point (Latitude)
319 record.intptlat = line[310:319]
320
321 # Internal Point (Longitude)
322 record.intptlon = line[319:329]
323
324 # Legal/Statistical Area Description Code
325 record.lsadc = line[329:331]
326
327 # Part Flag
328 record.partflag = line[331]
329
330 # School District (Elementary)
331 record.sdelm = line[332:337]
332
333 # School District (Secondary)
334 record.sdsec = line[337:342]
335
336 # School District (Unified)
337 record.sduni = line[342:347]
338
339 # Traffic Analysis Zone
340 record.taz = line[347:353]
341
342 # Oregon Urban Growth Area
343 record.uga = line[353:358]
344
345 # Public Use Microdata Area - 5% File
346 record.puma5 = line[358:363]
347
348 # Public Use Microdata Area - 1% File
349 record.puma1 = line[363:368]
350
351 # Reserved
352 record.reserve2 = line[368:383]
353
354 # Metropolitan Area Central City
355 record.macc = line[383:388]
356
357 # Urban Area Central Place
358 record.uacp = line[388:393]
359
360 # Reserved
361 record.reserved = line[393:400]
362
363
364 return record
365
366
367
368 def FindClosestBlock(blocks, target_coords):
369 """
370 Find the closest block (from within blocks) to the GPS
371 coordinates given by target_coords.
372 """
373
374 # Empty by default. Hopefully we're passed some blocks.
375 closest_block = None
376 min_distance = 999999999.0 # Don't look at me like that.
377
378 for block in blocks:
379 this_distance = GPS.CalculateDistance(target_coords, block.coordinates)
380 if (this_distance < min_distance):
381 closest_block = block
382 min_distance = this_distance
383
384 return closest_block
385
386
387
388 def FindAveragePopulationDensity(coords, geo_file_path):
389 grp = GeoRecordParser()
390 blocks = grp.parse_blocks(geo_file_path)
391 closest_block = FindClosestBlock(blocks, coords)
392
393 return closest_block.population_density()