]> gitweb.michael.orlitzky.com - dead/census-tools.git/blob - src/SummaryFile1.py
Rename the run_tests script to test_suite.
[dead/census-tools.git] / src / SummaryFile1.py
1 import os
2
3 from Errors import RecordError
4 import GPS
5 import StringUtils
6
7
8 class GeoRecord:
9 """
10 This class wraps one record in an SF1 geo file.
11 """
12
13 MINIMUM_LINE_LENGTH = 400
14
15
16 class Block:
17 """
18 Represents a block (which is a special case of a GeoRecord).
19 There are some convenience methods tacked on to make computation
20 and querying easier.
21 """
22
23 def __init__(self, geo_record):
24 """
25 We initialize from a GeoRecord object. It is important that
26 we raise some kind of error if there is no 'block' field, since
27 that means we weren't passed a block.
28 """
29 if not (StringUtils.is_integer(geo_record.block)):
30 raise RecordError('GeoRecord object does not represent a block.')
31
32 # These need to be stored as strings so they don't
33 # affect the block_identifier() generation.
34 self.state = geo_record.state
35 self.county = geo_record.county
36 self.tract = geo_record.tract
37 self.block = geo_record.block
38
39 # All of these int/float conversions will throw a ValueError
40 # if the input string cannot be converted o the specified
41 # type.
42 self.pop100 = int(geo_record.pop100)
43 self.arealand = float(geo_record.arealand)
44 self.areawatr = float(geo_record.areawatr)
45
46 # Both latitude and longitude are given to six digits of
47 # precision (i.e. after the decimal point). But, there are no
48 # decimal points in the intptlon/intptlat fields, so we need
49 # to add them.
50 #
51 # By default, the coordinates will be parsed as integers. For
52 # example, +12345678 will be parsed as 12345678.0. So, we need
53 # to "move" that decimal point 6 places to the left. We know
54 # how to do that.
55 #
56 self.coordinates = GPS.Coordinates()
57 self.coordinates.latitude = (float(geo_record.intptlat) / (10**6))
58 self.coordinates.longitude = (float(geo_record.intptlon) / (10**6))
59
60
61 def blkidfp00(self):
62 # From the Tiger/Line shapefile documentation:
63 #
64 # Current block identifier; a concatenation of Census 2000
65 # state FIPS code, Census 2000 county FIPS code, Census
66 # BLKIDFP 16 String 2000 census tract code, Census 2000
67 # tabulation block number, and current block suffix 1.
68 #
69 return (self.state +
70 self.county +
71 self.tract +
72 self.block)
73
74
75 def total_area(self):
76 return (self.arealand + self.areawatr)
77
78
79 def population_density(self):
80 # There are some unusual cases where a block will have a
81 # total area of zero. It also seems that these unusual blocks
82 # do in fact posess geometries, provided in the Tiger database.
83 # Therefore, we allow them to be parsed.
84 #
85 # The choice to assign these blocks an average density of 0
86 # was arbitrary.
87 #
88 if (self.total_area() == 0):
89 return 0
90 else:
91 return (self.pop100 / self.total_area())
92
93
94
95 class GeoRecordParser:
96
97 def parse_file(self, path):
98 """
99 Assuming that path refers to an SF1 (geo) file, parse the
100 geographic header records contained within it. Return a list
101 of GeoRecord objects.
102 """
103
104 # Our list of GeoRecord objects to return. Empty at first.
105 records = []
106
107 f = open(path, 'r')
108
109 for line in f:
110 record = self.parse_line(line)
111 records.append(record)
112
113 f.close()
114
115 return records
116
117
118 def parse_blocks(self, path):
119 """Parse only the blocks from a geo file."""
120 blocks = []
121 records = self.parse_file(path)
122
123 for record in records:
124 try:
125 block = Block(record)
126 blocks.append(block)
127 except RecordError:
128 # Ain't a block.
129 continue
130 except ValueError:
131 # A value couldn't be converted to the appropriate type.
132 continue
133
134 return blocks
135
136
137
138 def parse_line(self, line):
139 """
140 Parse one line of an SF1 geo file. Hopefully, the input will
141 match the specification. We can check the line length here, or
142 allow the GeoRecord class to parse the data meaningfully and
143 throw an error if something doesn't look right.
144 """
145 if (len(line) < GeoRecord.MINIMUM_LINE_LENGTH):
146 raise RecordError("The input line is too short. The SF1 specification requires a line length of %d characters; this line contains only %d characters" % (GeoRecord.MINIMUM_LINE_LENGTH, len(line)))
147
148 record = GeoRecord()
149
150 # Note that Python list indexes are zero-based, whereas the SF1
151 # specification gives the field offsets as one-based. For example,
152 # the first field, "File Identification," is defined as beginning
153 # at position 1, and having length 6. The following line corresponds
154 # to this definition.
155 record.fileid = line[0:6]
156
157 # State / US Abbreviation (USPS)
158 record.stusab = line[6:8]
159
160 # Summary Level
161 record.sumlev = line[8:11]
162
163 # Geographic Component
164 record.geocomp = line[11:13]
165
166 # Characteristic Iteration
167 record.chariter = line[13:16]
168
169 # Characteristic Iteration File Sequence Number
170 record.cifsn = line[16:18]
171
172 # Logical Record Number
173 record.logrecno = line[18:25]
174
175 # Region
176 record.region = line[25]
177
178 # Division
179 record.division = line[26]
180
181 # State (Census)
182 record.statece = line[27:29]
183
184 # State (FIPS)
185 record.state = line[29:31]
186
187 # County
188 record.county = line[31:34]
189
190 # County Size Code
191 record.countysc = line[34:36]
192
193 # County Subdivision (FIPS)
194 record.cousub = line[36:41]
195
196 # FIPS County Subdivision Class Code
197 record.cousubcc = line[41:43]
198
199 # County Subdivision Size Code
200 record.cousubsc = line[43:45]
201
202 # Place (FIPS)
203 record.place = line[45:50]
204
205 # FIPS Place Class Code
206 record.placecc = line[50:52]
207
208 # Place Description Code
209 record.placedc = line[52]
210
211 # Place Size Code
212 record.placesc = line[53:55]
213
214 # Census Tract
215 record.tract = line[55:61]
216
217 # Block Group
218 record.blkgrp = line[61]
219
220 # Block
221 record.block = line[62:66]
222
223 # Internal Use Code
224 record.iuc = line[66:68]
225
226 # Consolidated City (FIPS)
227 record.concit = line[68:71]
228
229 # FIPS Consolidated City Class Code
230 record.concitcc = line[73:75]
231
232 # Consolidated City Size Code
233 record.concitsc = line[75:77]
234
235 # American Indian Area/Alaska Native Area/Hawaiian Home Land
236 # (Census)
237 record.aianhh = line[77:81]
238
239 # American Indian Area/Alaska Native Area/Hawaiian Home Land
240 # (FIPS)
241 record.aianhhfp = line[81:86]
242
243 # FIPS American Indian Area/Alaska Native Area/Hawaiian Home
244 # Land Class Code
245 record.aianhhcc = line[86:88]
246
247 # American Indian Trust Land/Hawaiian Home Land Indicator
248 record.aihhtli = line[88]
249
250 # American Indian Tribal Subdivision (Census)
251 record.aitsce = line[89:92]
252
253 # American Indian Tribal Subdivision (FIPS)
254 record.aits = line[92:97]
255
256 # FIPS American Indian Tribal Subdivision Class Code
257 record.aitscc = line[97:99]
258
259 # Alaska Native Regional Corporation (FIPS)
260 record.anrc = line[99:104]
261
262 # FIPS Alaska Native Regional Corporation Class Code
263 record.anrccc = line[104:106]
264
265 # Metropolitan Statistical Area/Consolidated Metropolitan
266 # Statistical Area
267 record.msacmsa = line[106:110]
268
269 # MSA/CMSA Size Code
270 record.masc = line[110:112]
271
272 # Consolidated Metropolitan Statistical Area
273 record.cmsa = line[112:114]
274
275 # Metropolitan Area Central City Indicator
276 record.macci = line[114]
277
278 # Primary Metropolitan Statistical Area
279 record.pmsa = line[115:119]
280
281 # New England County Metropolitan Area
282 record.necma = line[119:123]
283
284 # New England County Metropolitan Area Central City Indicator
285 record.necmacci = line[123]
286
287 # New England County Metropolitan Area Size Code
288 record.necmasc = line[124:126]
289
290 # Extended Place Indicator
291 record.exi = line[126]
292
293 # Urban Area
294 record.ua = line[127:132]
295
296 # Urban Area Size Code
297 record.uasc = line[132:134]
298
299 # Urban Area Type
300 record.ustype = line[134]
301
302 # Urban/Rural
303 record.ur = line[135]
304
305 # Congressional District (106th)
306 record.cd106 = line[136:138]
307
308 # Congressional District (108th)
309 record.cd108 = line[138:140]
310
311 # Congressional District (109th)
312 record.cd109 = line[140:142]
313
314 # Congressional District (110th)
315 record.cd110 = line[142:144]
316
317 # State Legislative District (Upper Chamber)
318 record.sldu = line[144:147]
319
320 # State Legislative District (Lower Chamber)
321 record.sldl = line[147:150]
322
323 # Voting District
324 record.vtd = line[150:156]
325
326 # Voting District Indicator
327 record.vtdi = line[156]
328
329 # ZIP Code Tabulation Area (3 digit)
330 record.zcta3 = line[157:160]
331
332 # ZIP Code Tabulation Area (5 digit)
333 record.zcta5 = line[160:165]
334
335 # Subbarrio (FIPS)
336 record.submcd = line[165:170]
337
338 # FIPS Subbarrio Class Code
339 record.submcdcc = line[170:172]
340
341 # Area (Land)
342 record.arealand = line[172:186]
343
344 # Area (Water)
345 record.areawatr = line[186:200]
346
347 # Area Name - Legal/Statistical
348 # Area Description (LSAD)
349 # Term - Part Indicator
350 record.name = line[200:290]
351
352 # Functional Status Code
353 record.funcstat = line[290]
354
355 # Geographic Change User Note Indicator
356 record.gcuni = line[291]
357
358 # Population Count (100%)
359 record.pop100 = line[292:301]
360
361 # Housing Unit Count (100%)
362 record.hu100 = line[301:310]
363
364 # Internal Point (Latitude)
365 record.intptlat = line[310:319]
366
367 # Internal Point (Longitude)
368 record.intptlon = line[319:329]
369
370 # Legal/Statistical Area Description Code
371 record.lsadc = line[329:331]
372
373 # Part Flag
374 record.partflag = line[331]
375
376 # School District (Elementary)
377 record.sdelm = line[332:337]
378
379 # School District (Secondary)
380 record.sdsec = line[337:342]
381
382 # School District (Unified)
383 record.sduni = line[342:347]
384
385 # Traffic Analysis Zone
386 record.taz = line[347:353]
387
388 # Oregon Urban Growth Area
389 record.uga = line[353:358]
390
391 # Public Use Microdata Area - 5% File
392 record.puma5 = line[358:363]
393
394 # Public Use Microdata Area - 1% File
395 record.puma1 = line[363:368]
396
397 # Reserved
398 record.reserve2 = line[368:383]
399
400 # Metropolitan Area Central City
401 record.macc = line[383:388]
402
403 # Urban Area Central Place
404 record.uacp = line[388:393]
405
406 # Reserved
407 record.reserved = line[393:400]
408
409
410 return record