]> gitweb.michael.orlitzky.com - dead/census-tools.git/blob - src/SummaryFile1.py
Initial commit.
[dead/census-tools.git] / src / SummaryFile1.py
1 import os, GPS, inspect
2
3 class RecordError(StandardError):
4 pass
5
6
7 class GeoRecord:
8 """
9 This class wraps one record in an SF1 geo file.
10 """
11
12 MinimumLineLength = 400
13
14
15 class Block:
16 """
17 Represents a block (which is a special case of a GeoRecord.
18 All we care about here is the block number, population,
19 area, and coordinates.
20 """
21
22 def __init__(self, geo_record):
23 """We initialize from a GeoRecord object"""
24 # All of these int/float conversions will throw a ValueError
25 # if the input string cannot be converted o the specified
26 # type.
27 self.block_number = int(geo_record.block)
28 self.population = int(geo_record.pop100)
29 self.area_land = float(geo_record.arealand)
30 self.area_water = float(geo_record.areawatr)
31
32 self.coordinates = GPS.Coordinates()
33 self.coordinates.latitude = float(geo_record.intptlat)
34 self.coordinates.longitude = float(geo_record.intptlon)
35
36
37 def total_area(self):
38 return (self.area_land + self.area_water)
39
40
41 def population_density(self):
42 return (self.population / self.total_area())
43
44
45 class GeoRecordParser:
46
47 def parse_file(self, path):
48 """
49 Assuming that path refers to an SF1 (geo) file, parse the
50 geographic header records contained within it. Return a list
51 of GeoRecord objects.
52 """
53
54 # Our list of GeoRecord objects to return. Empty at first.
55 records = []
56
57 f = open(path, 'r')
58
59 for line in f:
60 record = self.parse_line(line)
61 records.append(record)
62
63 f.close()
64
65 return records
66
67
68 def parse_blocks(self, path):
69 """Parse only the blocks from a geo file."""
70 blocks = []
71 records = self.parse_file(path)
72
73 for record in records:
74 try:
75 block = Block(record)
76 blocks.append(block)
77 except ValueError:
78 continue
79
80 return blocks
81
82
83
84 def parse_line(self, line):
85 """
86 Parse one line of an SF1 geo file. Hopefully, the input will
87 match the specification. We can check the line length here, or
88 allow the GeoRecord class to parse the data meaningfully and
89 throw an error if something doesn't look right.
90 """
91 if (len(line) < GeoRecord.MinimumLineLength):
92 raise RecordError("The input line is too short. The SF1 specification requires a line length of %d characters; this line contains only %d characters" % (GeoRecord.MinimumLineLength, len(line)))
93
94 record = GeoRecord()
95
96 # Note that Python list indexes are zero-based, whereas the SF1
97 # specification gives the field offsets as one-based. For example,
98 # the first field, "File Identification," is defined as beginning
99 # at position 1, and having length 6. The following line corresponds
100 # to this definition.
101 record.fileid = line[0:6]
102
103 # State / US Abbreviation (USPS)
104 record.stusab = line[6:8]
105
106 # Summary Level
107 record.sumlev = line[8:11]
108
109 # Geographic Component
110 record.geocomp = line[11:13]
111
112 # Characteristic Iteration
113 record.chariter = line[13:16]
114
115 # Characteristic Iteration File Sequence Number
116 record.cifsn = line[16:18]
117
118 # Logical Record Number
119 record.logrecno = line[18:25]
120
121 # Region
122 record.region = line[25]
123
124 # Division
125 record.division = line[26]
126
127 # State (Census)
128 record.statece = line[27:29]
129
130 # State (FIPS)
131 record.state = line[29:31]
132
133 # County
134 record.county = line[31:34]
135
136 # County Size Code
137 record.countysc = line[34:36]
138
139 # County Subdivision (FIPS)
140 record.cousub = line[36:41]
141
142 # FIPS County Subdivision Class Code
143 record.cousubcc = line[41:43]
144
145 # County Subdivision Size Code
146 record.cousubsc = line[43:45]
147
148 # Place (FIPS)
149 record.place = line[45:50]
150
151 # FIPS Place Class Code
152 record.placecc = line[50:52]
153
154 # Place Description Code
155 record.placedc = line[52]
156
157 # Place Size Code
158 record.placesc = line[53:55]
159
160 # Census Tract
161 record.tract = line[55:61]
162
163 # Block Group
164 record.blkgrp = line[61]
165
166 # Block
167 record.block = line[62:66]
168
169 # Internal Use Code
170 record.iuc = line[66:68]
171
172 # Consolidated City (FIPS)
173 record.concit = line[68:71]
174
175 # FIPS Consolidated City Class Code
176 record.concitcc = line[73:75]
177
178 # Consolidated City Size Code
179 record.concitsc = line[75:77]
180
181 # American Indian Area/Alaska Native Area/Hawaiian Home Land
182 # (Census)
183 record.aianhh = line[77:81]
184
185 # American Indian Area/Alaska Native Area/Hawaiian Home Land
186 # (FIPS)
187 record.aianhhfp = line[81:86]
188
189 # FIPS American Indian Area/Alaska Native Area/Hawaiian Home
190 # Land Class Code
191 record.aianhhcc = line[86:88]
192
193 # American Indian Trust Land/Hawaiian Home Land Indicator
194 record.aihhtli = line[88]
195
196 # American Indian Tribal Subdivision (Census)
197 record.aitsce = line[89:92]
198
199 # American Indian Tribal Subdivision (FIPS)
200 record.aits = line[92:97]
201
202 # FIPS American Indian Tribal Subdivision Class Code
203 record.aitscc = line[97:99]
204
205 # Alaska Native Regional Corporation (FIPS)
206 record.anrc = line[99:104]
207
208 # FIPS Alaska Native Regional Corporation Class Code
209 record.anrccc = line[104:106]
210
211 # Metropolitan Statistical Area/Consolidated Metropolitan
212 # Statistical Area
213 record.msacmsa = line[106:110]
214
215 # MSA/CMSA Size Code
216 record.masc = line[110:112]
217
218 # Consolidated Metropolitan Statistical Area
219 record.cmsa = line[112:114]
220
221 # Metropolitan Area Central City Indicator
222 record.macci = line[114]
223
224 # Primary Metropolitan Statistical Area
225 record.pmsa = line[115:119]
226
227 # New England County Metropolitan Area
228 record.necma = line[119:123]
229
230 # New England County Metropolitan Area Central City Indicator
231 record.necmacci = line[123]
232
233 # New England County Metropolitan Area Size Code
234 record.necmasc = line[124:126]
235
236 # Extended Place Indicator
237 record.exi = line[126]
238
239 # Urban Area
240 record.ua = line[127:132]
241
242 # Urban Area Size Code
243 record.uasc = line[132:134]
244
245 # Urban Area Type
246 record.ustype = line[134]
247
248 # Urban/Rural
249 record.ur = line[135]
250
251 # Congressional District (106th)
252 record.cd106 = line[136:138]
253
254 # Congressional District (108th)
255 record.cd108 = line[138:140]
256
257 # Congressional District (109th)
258 record.cd109 = line[140:142]
259
260 # Congressional District (110th)
261 record.cd110 = line[142:144]
262
263 # State Legislative District (Upper Chamber)
264 record.sldu = line[144:147]
265
266 # State Legislative District (Lower Chamber)
267 record.sldl = line[147:150]
268
269 # Voting District
270 record.vtd = line[150:156]
271
272 # Voting District Indicator
273 record.vtdi = line[156]
274
275 # ZIP Code Tabulation Area (3 digit)
276 record.zcta3 = line[157:160]
277
278 # ZIP Code Tabulation Area (5 digit)
279 record.zcta5 = line[160:165]
280
281 # Subbarrio (FIPS)
282 record.submcd = line[165:170]
283
284 # FIPS Subbarrio Class Code
285 record.submcdcc = line[170:172]
286
287 # Area (Land)
288 record.arealand = line[172:186]
289
290 # Area (Water)
291 record.areawatr = line[186:200]
292
293 # Area Name - Legal/Statistical
294 # Area Description (LSAD)
295 # Term - Part Indicator
296 record.name = line[200:290]
297
298 # Functional Status Code
299 record.funcstat = line[290]
300
301 # Geographic Change User Note Indicator
302 record.gcuni = line[291]
303
304 # Population Count (100%)
305 record.pop100 = line[292:301]
306
307 # Housing Unit Count (100%)
308 record.hu100 = line[301:310]
309
310 # Internal Point (Latitude)
311 record.intptlat = line[310:319]
312
313 # Internal Point (Longitude)
314 record.intptlon = line[319:329]
315
316 # Legal/Statistical Area Description Code
317 record.lsadc = line[329:331]
318
319 # Part Flag
320 record.partflag = line[331]
321
322 # School District (Elementary)
323 record.sdelm = line[332:337]
324
325 # School District (Secondary)
326 record.sdsec = line[337:342]
327
328 # School District (Unified)
329 record.sduni = line[342:347]
330
331 # Traffic Analysis Zone
332 record.taz = line[347:353]
333
334 # Oregon Urban Growth Area
335 record.uga = line[353:358]
336
337 # Public Use Microdata Area - 5% File
338 record.puma5 = line[358:363]
339
340 # Public Use Microdata Area - 1% File
341 record.puma1 = line[363:368]
342
343 # Reserved
344 record.reserve2 = line[368:383]
345
346 # Metropolitan Area Central City
347 record.macc = line[383:388]
348
349 # Urban Area Central Place
350 record.uacp = line[388:393]
351
352 # Reserved
353 record.reserved = line[393:400]
354
355
356 return record
357
358
359
360 def FindClosestBlock(blocks, target_coords):
361 """
362 Find the closest block (from within blocks) to the GPS
363 coordinates given by target_coords.
364 """
365
366 # Empty by default. Hopefully we're passed some blocks.
367 closest_block = None
368 min_distance = 999999999.0 # Don't look at me like that.
369
370 for block in blocks:
371 this_distance = GPS.CalculateDistance(target_coords, block.coordinates)
372 if (this_distance < min_distance):
373 closest_block = block
374 min_distance = this_distance
375
376 return closest_block
377
378
379
380 def FindAveragePopulationDensity(coords, geo_file_path):
381 grp = GeoRecordParser()
382 blocks = grp.parse_blocks(geo_file_path)
383 closest_block = FindClosestBlock(blocks, coords)
384
385 return closest_block.population_density()