]> gitweb.michael.orlitzky.com - dead/census-tools.git/blob - src/SummaryFile1.py
Modified the Data module and download script to download the Summary File 1 data.
[dead/census-tools.git] / src / SummaryFile1.py
1 import os
2
3 import GPS
4 import StringUtils
5
6
7 class RecordError(StandardError):
8 pass
9
10 class GeoRecord:
11 """
12 This class wraps one record in an SF1 geo file.
13 """
14
15 MINIMUM_LINE_LENGTH = 400
16
17
18 class Block:
19 """
20 Represents a block (which is a special case of a GeoRecord).
21 There are some convenience methods tacked on to make computation
22 and querying easier.
23 """
24
25 def __init__(self, geo_record):
26 """
27 We initialize from a GeoRecord object. It is important that
28 we raise some kind of error if there is no 'block' field, since
29 that means we weren't passed a block.
30 """
31 if not (StringUtils.is_integer(geo_record.block)):
32 raise RecordError('GeoRecord object does not represent a block.')
33
34 # These need to be stored as strings so they don't
35 # affect the block_identifier() generation.
36 self.state = geo_record.state
37 self.county = geo_record.county
38 self.tract = geo_record.tract
39 self.block = geo_record.block
40
41 # All of these int/float conversions will throw a ValueError
42 # if the input string cannot be converted o the specified
43 # type.
44 self.pop100 = int(geo_record.pop100)
45 self.arealand = float(geo_record.arealand)
46 self.areawatr = float(geo_record.areawatr)
47
48 # Both latitude and longitude are given to six digits of
49 # precision (i.e. after the decimal point). But, there are no
50 # decimal points in the intptlon/intptlat fields, so we need
51 # to add them.
52 #
53 # By default, the coordinates will be parsed as integers. For
54 # example, +12345678 will be parsed as 12345678.0. So, we need
55 # to "move" that decimal point 6 places to the left. We know
56 # how to do that.
57 #
58 self.coordinates = GPS.Coordinates()
59 self.coordinates.latitude = (float(geo_record.intptlat) / (10**6))
60 self.coordinates.longitude = (float(geo_record.intptlon) / (10**6))
61
62
63 def blkidfp00(self):
64 # From the Tiger/Line shapefile documentation:
65 #
66 # Current block identifier; a concatenation of Census 2000
67 # state FIPS code, Census 2000 county FIPS code, Census
68 # BLKIDFP 16 String 2000 census tract code, Census 2000
69 # tabulation block number, and current block suffix 1.
70 #
71 return (self.state +
72 self.county +
73 self.tract +
74 self.block)
75
76
77 def total_area(self):
78 return (self.arealand + self.areawatr)
79
80
81 def population_density(self):
82 # There are some unusual cases where a block will have a
83 # total area of zero. It also seems that these unusual blocks
84 # do in fact posess geometries, provided in the Tiger database.
85 # Therefore, we allow them to be parsed.
86 #
87 # The choice to assign these blocks an average density of 0
88 # was arbitrary.
89 #
90 if (self.total_area() == 0):
91 return 0
92 else:
93 return (self.pop100 / self.total_area())
94
95
96
97 class GeoRecordParser:
98
99 def parse_file(self, path):
100 """
101 Assuming that path refers to an SF1 (geo) file, parse the
102 geographic header records contained within it. Return a list
103 of GeoRecord objects.
104 """
105
106 # Our list of GeoRecord objects to return. Empty at first.
107 records = []
108
109 f = open(path, 'r')
110
111 for line in f:
112 record = self.parse_line(line)
113 records.append(record)
114
115 f.close()
116
117 return records
118
119
120 def parse_blocks(self, path):
121 """Parse only the blocks from a geo file."""
122 blocks = []
123 records = self.parse_file(path)
124
125 for record in records:
126 try:
127 block = Block(record)
128 blocks.append(block)
129 except RecordError:
130 # Ain't a block.
131 continue
132 except ValueError:
133 # A value couldn't be converted to the appropriate type.
134 continue
135
136 return blocks
137
138
139
140 def parse_line(self, line):
141 """
142 Parse one line of an SF1 geo file. Hopefully, the input will
143 match the specification. We can check the line length here, or
144 allow the GeoRecord class to parse the data meaningfully and
145 throw an error if something doesn't look right.
146 """
147 if (len(line) < GeoRecord.MINIMUM_LINE_LENGTH):
148 raise RecordError("The input line is too short. The SF1 specification requires a line length of %d characters; this line contains only %d characters" % (GeoRecord.MINIMUM_LINE_LENGTH, len(line)))
149
150 record = GeoRecord()
151
152 # Note that Python list indexes are zero-based, whereas the SF1
153 # specification gives the field offsets as one-based. For example,
154 # the first field, "File Identification," is defined as beginning
155 # at position 1, and having length 6. The following line corresponds
156 # to this definition.
157 record.fileid = line[0:6]
158
159 # State / US Abbreviation (USPS)
160 record.stusab = line[6:8]
161
162 # Summary Level
163 record.sumlev = line[8:11]
164
165 # Geographic Component
166 record.geocomp = line[11:13]
167
168 # Characteristic Iteration
169 record.chariter = line[13:16]
170
171 # Characteristic Iteration File Sequence Number
172 record.cifsn = line[16:18]
173
174 # Logical Record Number
175 record.logrecno = line[18:25]
176
177 # Region
178 record.region = line[25]
179
180 # Division
181 record.division = line[26]
182
183 # State (Census)
184 record.statece = line[27:29]
185
186 # State (FIPS)
187 record.state = line[29:31]
188
189 # County
190 record.county = line[31:34]
191
192 # County Size Code
193 record.countysc = line[34:36]
194
195 # County Subdivision (FIPS)
196 record.cousub = line[36:41]
197
198 # FIPS County Subdivision Class Code
199 record.cousubcc = line[41:43]
200
201 # County Subdivision Size Code
202 record.cousubsc = line[43:45]
203
204 # Place (FIPS)
205 record.place = line[45:50]
206
207 # FIPS Place Class Code
208 record.placecc = line[50:52]
209
210 # Place Description Code
211 record.placedc = line[52]
212
213 # Place Size Code
214 record.placesc = line[53:55]
215
216 # Census Tract
217 record.tract = line[55:61]
218
219 # Block Group
220 record.blkgrp = line[61]
221
222 # Block
223 record.block = line[62:66]
224
225 # Internal Use Code
226 record.iuc = line[66:68]
227
228 # Consolidated City (FIPS)
229 record.concit = line[68:71]
230
231 # FIPS Consolidated City Class Code
232 record.concitcc = line[73:75]
233
234 # Consolidated City Size Code
235 record.concitsc = line[75:77]
236
237 # American Indian Area/Alaska Native Area/Hawaiian Home Land
238 # (Census)
239 record.aianhh = line[77:81]
240
241 # American Indian Area/Alaska Native Area/Hawaiian Home Land
242 # (FIPS)
243 record.aianhhfp = line[81:86]
244
245 # FIPS American Indian Area/Alaska Native Area/Hawaiian Home
246 # Land Class Code
247 record.aianhhcc = line[86:88]
248
249 # American Indian Trust Land/Hawaiian Home Land Indicator
250 record.aihhtli = line[88]
251
252 # American Indian Tribal Subdivision (Census)
253 record.aitsce = line[89:92]
254
255 # American Indian Tribal Subdivision (FIPS)
256 record.aits = line[92:97]
257
258 # FIPS American Indian Tribal Subdivision Class Code
259 record.aitscc = line[97:99]
260
261 # Alaska Native Regional Corporation (FIPS)
262 record.anrc = line[99:104]
263
264 # FIPS Alaska Native Regional Corporation Class Code
265 record.anrccc = line[104:106]
266
267 # Metropolitan Statistical Area/Consolidated Metropolitan
268 # Statistical Area
269 record.msacmsa = line[106:110]
270
271 # MSA/CMSA Size Code
272 record.masc = line[110:112]
273
274 # Consolidated Metropolitan Statistical Area
275 record.cmsa = line[112:114]
276
277 # Metropolitan Area Central City Indicator
278 record.macci = line[114]
279
280 # Primary Metropolitan Statistical Area
281 record.pmsa = line[115:119]
282
283 # New England County Metropolitan Area
284 record.necma = line[119:123]
285
286 # New England County Metropolitan Area Central City Indicator
287 record.necmacci = line[123]
288
289 # New England County Metropolitan Area Size Code
290 record.necmasc = line[124:126]
291
292 # Extended Place Indicator
293 record.exi = line[126]
294
295 # Urban Area
296 record.ua = line[127:132]
297
298 # Urban Area Size Code
299 record.uasc = line[132:134]
300
301 # Urban Area Type
302 record.ustype = line[134]
303
304 # Urban/Rural
305 record.ur = line[135]
306
307 # Congressional District (106th)
308 record.cd106 = line[136:138]
309
310 # Congressional District (108th)
311 record.cd108 = line[138:140]
312
313 # Congressional District (109th)
314 record.cd109 = line[140:142]
315
316 # Congressional District (110th)
317 record.cd110 = line[142:144]
318
319 # State Legislative District (Upper Chamber)
320 record.sldu = line[144:147]
321
322 # State Legislative District (Lower Chamber)
323 record.sldl = line[147:150]
324
325 # Voting District
326 record.vtd = line[150:156]
327
328 # Voting District Indicator
329 record.vtdi = line[156]
330
331 # ZIP Code Tabulation Area (3 digit)
332 record.zcta3 = line[157:160]
333
334 # ZIP Code Tabulation Area (5 digit)
335 record.zcta5 = line[160:165]
336
337 # Subbarrio (FIPS)
338 record.submcd = line[165:170]
339
340 # FIPS Subbarrio Class Code
341 record.submcdcc = line[170:172]
342
343 # Area (Land)
344 record.arealand = line[172:186]
345
346 # Area (Water)
347 record.areawatr = line[186:200]
348
349 # Area Name - Legal/Statistical
350 # Area Description (LSAD)
351 # Term - Part Indicator
352 record.name = line[200:290]
353
354 # Functional Status Code
355 record.funcstat = line[290]
356
357 # Geographic Change User Note Indicator
358 record.gcuni = line[291]
359
360 # Population Count (100%)
361 record.pop100 = line[292:301]
362
363 # Housing Unit Count (100%)
364 record.hu100 = line[301:310]
365
366 # Internal Point (Latitude)
367 record.intptlat = line[310:319]
368
369 # Internal Point (Longitude)
370 record.intptlon = line[319:329]
371
372 # Legal/Statistical Area Description Code
373 record.lsadc = line[329:331]
374
375 # Part Flag
376 record.partflag = line[331]
377
378 # School District (Elementary)
379 record.sdelm = line[332:337]
380
381 # School District (Secondary)
382 record.sdsec = line[337:342]
383
384 # School District (Unified)
385 record.sduni = line[342:347]
386
387 # Traffic Analysis Zone
388 record.taz = line[347:353]
389
390 # Oregon Urban Growth Area
391 record.uga = line[353:358]
392
393 # Public Use Microdata Area - 5% File
394 record.puma5 = line[358:363]
395
396 # Public Use Microdata Area - 1% File
397 record.puma1 = line[363:368]
398
399 # Reserved
400 record.reserve2 = line[368:383]
401
402 # Metropolitan Area Central City
403 record.macc = line[383:388]
404
405 # Urban Area Central Place
406 record.uacp = line[388:393]
407
408 # Reserved
409 record.reserved = line[393:400]
410
411
412 return record