]>
gitweb.michael.orlitzky.com - dead/census-tools.git/blob - src/SummaryFile1.py
3 from Errors
import RecordError
10 This class wraps one record in an SF1 geo file.
13 MINIMUM_LINE_LENGTH
= 400
18 Represents a block (which is a special case of a GeoRecord).
19 There are some convenience methods tacked on to make computation
23 def __init__ ( self
, geo_record
):
25 We initialize from a GeoRecord object. It is important that
26 we raise some kind of error if there is no 'block' field, since
27 that means we weren't passed a block.
29 if not ( StringUtils
. is_integer ( geo_record
. block
)):
30 raise RecordError ( 'GeoRecord object does not represent a block.' )
32 # These need to be stored as strings so they don't
33 # affect the block_identifier() generation.
34 self
. state
= geo_record
. state
35 self
. county
= geo_record
. county
36 self
. tract
= geo_record
. tract
37 self
. block
= geo_record
. block
39 # All of these int/float conversions will throw a ValueError
40 # if the input string cannot be converted o the specified
42 self
. pop100
= int ( geo_record
. pop100
)
43 self
. arealand
= float ( geo_record
. arealand
)
44 self
. areawatr
= float ( geo_record
. areawatr
)
46 # Both latitude and longitude are given to six digits of
47 # precision (i.e. after the decimal point). But, there are no
48 # decimal points in the intptlon/intptlat fields, so we need
51 # By default, the coordinates will be parsed as integers. For
52 # example, +12345678 will be parsed as 12345678.0. So, we need
53 # to "move" that decimal point 6 places to the left. We know
56 self
. coordinates
= GPS
. Coordinates ()
57 self
. coordinates
. latitude
= ( float ( geo_record
. intptlat
) / ( 10 ** 6 ))
58 self
. coordinates
. longitude
= ( float ( geo_record
. intptlon
) / ( 10 ** 6 ))
62 # From the Tiger/Line shapefile documentation:
64 # Current block identifier; a concatenation of Census 2000
65 # state FIPS code, Census 2000 county FIPS code, Census
66 # BLKIDFP 16 String 2000 census tract code, Census 2000
67 # tabulation block number, and current block suffix 1.
76 return ( self
. arealand
+ self
. areawatr
)
79 def population_density ( self
):
80 # There are some unusual cases where a block will have a
81 # total area of zero. It also seems that these unusual blocks
82 # do in fact posess geometries, provided in the Tiger database.
83 # Therefore, we allow them to be parsed.
85 # The choice to assign these blocks an average density of 0
88 if ( self
. total_area () == 0 ):
91 return ( self
. pop100
/ self
. total_area ())
95 class GeoRecordParser
:
97 def parse_file ( self
, path
):
99 Assuming that path refers to an SF1 (geo) file, parse the
100 geographic header records contained within it. Return a list
101 of GeoRecord objects.
104 # Our list of GeoRecord objects to return. Empty at first.
110 record
= self
. parse_line ( line
)
111 records
. append ( record
)
118 def parse_blocks ( self
, path
):
119 """Parse only the blocks from a geo file."""
121 records
= self
. parse_file ( path
)
123 for record
in records
:
125 block
= Block ( record
)
131 # A value couldn't be converted to the appropriate type.
138 def parse_line ( self
, line
):
140 Parse one line of an SF1 geo file. Hopefully, the input will
141 match the specification. We can check the line length here, or
142 allow the GeoRecord class to parse the data meaningfully and
143 throw an error if something doesn't look right.
145 if ( len ( line
) < GeoRecord
. MINIMUM_LINE_LENGTH
):
146 raise RecordError ( "The input line is too short. The SF1 specification requires a line length of %d characters; this line contains only %d characters" % ( GeoRecord
. MINIMUM_LINE_LENGTH
, len ( line
)))
150 # Note that Python list indexes are zero-based, whereas the SF1
151 # specification gives the field offsets as one-based. For example,
152 # the first field, "File Identification," is defined as beginning
153 # at position 1, and having length 6. The following line corresponds
154 # to this definition.
155 record
. fileid
= line
[ 0 : 6 ]
157 # State / US Abbreviation (USPS)
158 record
. stusab
= line
[ 6 : 8 ]
161 record
. sumlev
= line
[ 8 : 11 ]
163 # Geographic Component
164 record
. geocomp
= line
[ 11 : 13 ]
166 # Characteristic Iteration
167 record
. chariter
= line
[ 13 : 16 ]
169 # Characteristic Iteration File Sequence Number
170 record
. cifsn
= line
[ 16 : 18 ]
172 # Logical Record Number
173 record
. logrecno
= line
[ 18 : 25 ]
176 record
. region
= line
[ 25 ]
179 record
. division
= line
[ 26 ]
182 record
. statece
= line
[ 27 : 29 ]
185 record
. state
= line
[ 29 : 31 ]
188 record
. county
= line
[ 31 : 34 ]
191 record
. countysc
= line
[ 34 : 36 ]
193 # County Subdivision (FIPS)
194 record
. cousub
= line
[ 36 : 41 ]
196 # FIPS County Subdivision Class Code
197 record
. cousubcc
= line
[ 41 : 43 ]
199 # County Subdivision Size Code
200 record
. cousubsc
= line
[ 43 : 45 ]
203 record
. place
= line
[ 45 : 50 ]
205 # FIPS Place Class Code
206 record
. placecc
= line
[ 50 : 52 ]
208 # Place Description Code
209 record
. placedc
= line
[ 52 ]
212 record
. placesc
= line
[ 53 : 55 ]
215 record
. tract
= line
[ 55 : 61 ]
218 record
. blkgrp
= line
[ 61 ]
221 record
. block
= line
[ 62 : 66 ]
224 record
. iuc
= line
[ 66 : 68 ]
226 # Consolidated City (FIPS)
227 record
. concit
= line
[ 68 : 71 ]
229 # FIPS Consolidated City Class Code
230 record
. concitcc
= line
[ 73 : 75 ]
232 # Consolidated City Size Code
233 record
. concitsc
= line
[ 75 : 77 ]
235 # American Indian Area/Alaska Native Area/Hawaiian Home Land
237 record
. aianhh
= line
[ 77 : 81 ]
239 # American Indian Area/Alaska Native Area/Hawaiian Home Land
241 record
. aianhhfp
= line
[ 81 : 86 ]
243 # FIPS American Indian Area/Alaska Native Area/Hawaiian Home
245 record
. aianhhcc
= line
[ 86 : 88 ]
247 # American Indian Trust Land/Hawaiian Home Land Indicator
248 record
. aihhtli
= line
[ 88 ]
250 # American Indian Tribal Subdivision (Census)
251 record
. aitsce
= line
[ 89 : 92 ]
253 # American Indian Tribal Subdivision (FIPS)
254 record
. aits
= line
[ 92 : 97 ]
256 # FIPS American Indian Tribal Subdivision Class Code
257 record
. aitscc
= line
[ 97 : 99 ]
259 # Alaska Native Regional Corporation (FIPS)
260 record
. anrc
= line
[ 99 : 104 ]
262 # FIPS Alaska Native Regional Corporation Class Code
263 record
. anrccc
= line
[ 104 : 106 ]
265 # Metropolitan Statistical Area/Consolidated Metropolitan
267 record
. msacmsa
= line
[ 106 : 110 ]
270 record
. masc
= line
[ 110 : 112 ]
272 # Consolidated Metropolitan Statistical Area
273 record
. cmsa
= line
[ 112 : 114 ]
275 # Metropolitan Area Central City Indicator
276 record
. macci
= line
[ 114 ]
278 # Primary Metropolitan Statistical Area
279 record
. pmsa
= line
[ 115 : 119 ]
281 # New England County Metropolitan Area
282 record
. necma
= line
[ 119 : 123 ]
284 # New England County Metropolitan Area Central City Indicator
285 record
. necmacci
= line
[ 123 ]
287 # New England County Metropolitan Area Size Code
288 record
. necmasc
= line
[ 124 : 126 ]
290 # Extended Place Indicator
291 record
. exi
= line
[ 126 ]
294 record
. ua
= line
[ 127 : 132 ]
296 # Urban Area Size Code
297 record
. uasc
= line
[ 132 : 134 ]
300 record
. ustype
= line
[ 134 ]
303 record
. ur
= line
[ 135 ]
305 # Congressional District (106th)
306 record
. cd106
= line
[ 136 : 138 ]
308 # Congressional District (108th)
309 record
. cd108
= line
[ 138 : 140 ]
311 # Congressional District (109th)
312 record
. cd109
= line
[ 140 : 142 ]
314 # Congressional District (110th)
315 record
. cd110
= line
[ 142 : 144 ]
317 # State Legislative District (Upper Chamber)
318 record
. sldu
= line
[ 144 : 147 ]
320 # State Legislative District (Lower Chamber)
321 record
. sldl
= line
[ 147 : 150 ]
324 record
. vtd
= line
[ 150 : 156 ]
326 # Voting District Indicator
327 record
. vtdi
= line
[ 156 ]
329 # ZIP Code Tabulation Area (3 digit)
330 record
. zcta3
= line
[ 157 : 160 ]
332 # ZIP Code Tabulation Area (5 digit)
333 record
. zcta5
= line
[ 160 : 165 ]
336 record
. submcd
= line
[ 165 : 170 ]
338 # FIPS Subbarrio Class Code
339 record
. submcdcc
= line
[ 170 : 172 ]
342 record
. arealand
= line
[ 172 : 186 ]
345 record
. areawatr
= line
[ 186 : 200 ]
347 # Area Name - Legal/Statistical
348 # Area Description (LSAD)
349 # Term - Part Indicator
350 record
. name
= line
[ 200 : 290 ]
352 # Functional Status Code
353 record
. funcstat
= line
[ 290 ]
355 # Geographic Change User Note Indicator
356 record
. gcuni
= line
[ 291 ]
358 # Population Count (100%)
359 record
. pop100
= line
[ 292 : 301 ]
361 # Housing Unit Count (100%)
362 record
. hu100
= line
[ 301 : 310 ]
364 # Internal Point (Latitude)
365 record
. intptlat
= line
[ 310 : 319 ]
367 # Internal Point (Longitude)
368 record
. intptlon
= line
[ 319 : 329 ]
370 # Legal/Statistical Area Description Code
371 record
. lsadc
= line
[ 329 : 331 ]
374 record
. partflag
= line
[ 331 ]
376 # School District (Elementary)
377 record
. sdelm
= line
[ 332 : 337 ]
379 # School District (Secondary)
380 record
. sdsec
= line
[ 337 : 342 ]
382 # School District (Unified)
383 record
. sduni
= line
[ 342 : 347 ]
385 # Traffic Analysis Zone
386 record
. taz
= line
[ 347 : 353 ]
388 # Oregon Urban Growth Area
389 record
. uga
= line
[ 353 : 358 ]
391 # Public Use Microdata Area - 5% File
392 record
. puma5
= line
[ 358 : 363 ]
394 # Public Use Microdata Area - 1% File
395 record
. puma1
= line
[ 363 : 368 ]
398 record
. reserve2
= line
[ 368 : 383 ]
400 # Metropolitan Area Central City
401 record
. macc
= line
[ 383 : 388 ]
403 # Urban Area Central Place
404 record
. uacp
= line
[ 388 : 393 ]
407 record
. reserved
= line
[ 393 : 400 ]