]>
gitweb.michael.orlitzky.com - dead/census-tools.git/blob - src/SummaryFile1.py
7 class RecordError ( StandardError ):
12 This class wraps one record in an SF1 geo file.
15 MINIMUM_LINE_LENGTH
= 400
20 Represents a block (which is a special case of a GeoRecord).
21 There are some convenience methods tacked on to make computation
25 def __init__ ( self
, geo_record
):
27 We initialize from a GeoRecord object. It is important that
28 we raise some kind of error if there is no 'block' field, since
29 that means we weren't passed a block.
31 if not ( StringUtils
. is_integer ( geo_record
. block
)):
32 raise RecordError ( 'GeoRecord object does not represent a block.' )
34 # These need to be stored as strings so they don't
35 # affect the block_identifier() generation.
36 self
. state
= geo_record
. state
37 self
. county
= geo_record
. county
38 self
. tract
= geo_record
. tract
39 self
. block
= geo_record
. block
41 # All of these int/float conversions will throw a ValueError
42 # if the input string cannot be converted o the specified
44 self
. pop100
= int ( geo_record
. pop100
)
45 self
. arealand
= float ( geo_record
. arealand
)
46 self
. areawatr
= float ( geo_record
. areawatr
)
48 # Both latitude and longitude are given to six digits of
49 # precision (i.e. after the decimal point). But, there are no
50 # decimal points in the intptlon/intptlat fields, so we need
53 # By default, the coordinates will be parsed as integers. For
54 # example, +12345678 will be parsed as 12345678.0. So, we need
55 # to "move" that decimal point 6 places to the left. We know
58 self
. coordinates
= GPS
. Coordinates ()
59 self
. coordinates
. latitude
= ( float ( geo_record
. intptlat
) / ( 10 ** 6 ))
60 self
. coordinates
. longitude
= ( float ( geo_record
. intptlon
) / ( 10 ** 6 ))
64 # From the Tiger/Line shapefile documentation:
66 # Current block identifier; a concatenation of Census 2000
67 # state FIPS code, Census 2000 county FIPS code, Census
68 # BLKIDFP 16 String 2000 census tract code, Census 2000
69 # tabulation block number, and current block suffix 1.
78 return ( self
. arealand
+ self
. areawatr
)
81 def population_density ( self
):
82 # There are some unusual cases where a block will have a
83 # total area of zero. It also seems that these unusual blocks
84 # do in fact posess geometries, provided in the Tiger database.
85 # Therefore, we allow them to be parsed.
87 # The choice to assign these blocks an average density of 0
90 if ( self
. total_area () == 0 ):
93 return ( self
. pop100
/ self
. total_area ())
97 class GeoRecordParser
:
99 def parse_file ( self
, path
):
101 Assuming that path refers to an SF1 (geo) file, parse the
102 geographic header records contained within it. Return a list
103 of GeoRecord objects.
106 # Our list of GeoRecord objects to return. Empty at first.
112 record
= self
. parse_line ( line
)
113 records
. append ( record
)
120 def parse_blocks ( self
, path
):
121 """Parse only the blocks from a geo file."""
123 records
= self
. parse_file ( path
)
125 for record
in records
:
127 block
= Block ( record
)
133 # A value couldn't be converted to the appropriate type.
140 def parse_line ( self
, line
):
142 Parse one line of an SF1 geo file. Hopefully, the input will
143 match the specification. We can check the line length here, or
144 allow the GeoRecord class to parse the data meaningfully and
145 throw an error if something doesn't look right.
147 if ( len ( line
) < GeoRecord
. MINIMUM_LINE_LENGTH
):
148 raise RecordError ( "The input line is too short. The SF1 specification requires a line length of %d characters; this line contains only %d characters" % ( GeoRecord
. MINIMUM_LINE_LENGTH
, len ( line
)))
152 # Note that Python list indexes are zero-based, whereas the SF1
153 # specification gives the field offsets as one-based. For example,
154 # the first field, "File Identification," is defined as beginning
155 # at position 1, and having length 6. The following line corresponds
156 # to this definition.
157 record
. fileid
= line
[ 0 : 6 ]
159 # State / US Abbreviation (USPS)
160 record
. stusab
= line
[ 6 : 8 ]
163 record
. sumlev
= line
[ 8 : 11 ]
165 # Geographic Component
166 record
. geocomp
= line
[ 11 : 13 ]
168 # Characteristic Iteration
169 record
. chariter
= line
[ 13 : 16 ]
171 # Characteristic Iteration File Sequence Number
172 record
. cifsn
= line
[ 16 : 18 ]
174 # Logical Record Number
175 record
. logrecno
= line
[ 18 : 25 ]
178 record
. region
= line
[ 25 ]
181 record
. division
= line
[ 26 ]
184 record
. statece
= line
[ 27 : 29 ]
187 record
. state
= line
[ 29 : 31 ]
190 record
. county
= line
[ 31 : 34 ]
193 record
. countysc
= line
[ 34 : 36 ]
195 # County Subdivision (FIPS)
196 record
. cousub
= line
[ 36 : 41 ]
198 # FIPS County Subdivision Class Code
199 record
. cousubcc
= line
[ 41 : 43 ]
201 # County Subdivision Size Code
202 record
. cousubsc
= line
[ 43 : 45 ]
205 record
. place
= line
[ 45 : 50 ]
207 # FIPS Place Class Code
208 record
. placecc
= line
[ 50 : 52 ]
210 # Place Description Code
211 record
. placedc
= line
[ 52 ]
214 record
. placesc
= line
[ 53 : 55 ]
217 record
. tract
= line
[ 55 : 61 ]
220 record
. blkgrp
= line
[ 61 ]
223 record
. block
= line
[ 62 : 66 ]
226 record
. iuc
= line
[ 66 : 68 ]
228 # Consolidated City (FIPS)
229 record
. concit
= line
[ 68 : 71 ]
231 # FIPS Consolidated City Class Code
232 record
. concitcc
= line
[ 73 : 75 ]
234 # Consolidated City Size Code
235 record
. concitsc
= line
[ 75 : 77 ]
237 # American Indian Area/Alaska Native Area/Hawaiian Home Land
239 record
. aianhh
= line
[ 77 : 81 ]
241 # American Indian Area/Alaska Native Area/Hawaiian Home Land
243 record
. aianhhfp
= line
[ 81 : 86 ]
245 # FIPS American Indian Area/Alaska Native Area/Hawaiian Home
247 record
. aianhhcc
= line
[ 86 : 88 ]
249 # American Indian Trust Land/Hawaiian Home Land Indicator
250 record
. aihhtli
= line
[ 88 ]
252 # American Indian Tribal Subdivision (Census)
253 record
. aitsce
= line
[ 89 : 92 ]
255 # American Indian Tribal Subdivision (FIPS)
256 record
. aits
= line
[ 92 : 97 ]
258 # FIPS American Indian Tribal Subdivision Class Code
259 record
. aitscc
= line
[ 97 : 99 ]
261 # Alaska Native Regional Corporation (FIPS)
262 record
. anrc
= line
[ 99 : 104 ]
264 # FIPS Alaska Native Regional Corporation Class Code
265 record
. anrccc
= line
[ 104 : 106 ]
267 # Metropolitan Statistical Area/Consolidated Metropolitan
269 record
. msacmsa
= line
[ 106 : 110 ]
272 record
. masc
= line
[ 110 : 112 ]
274 # Consolidated Metropolitan Statistical Area
275 record
. cmsa
= line
[ 112 : 114 ]
277 # Metropolitan Area Central City Indicator
278 record
. macci
= line
[ 114 ]
280 # Primary Metropolitan Statistical Area
281 record
. pmsa
= line
[ 115 : 119 ]
283 # New England County Metropolitan Area
284 record
. necma
= line
[ 119 : 123 ]
286 # New England County Metropolitan Area Central City Indicator
287 record
. necmacci
= line
[ 123 ]
289 # New England County Metropolitan Area Size Code
290 record
. necmasc
= line
[ 124 : 126 ]
292 # Extended Place Indicator
293 record
. exi
= line
[ 126 ]
296 record
. ua
= line
[ 127 : 132 ]
298 # Urban Area Size Code
299 record
. uasc
= line
[ 132 : 134 ]
302 record
. ustype
= line
[ 134 ]
305 record
. ur
= line
[ 135 ]
307 # Congressional District (106th)
308 record
. cd106
= line
[ 136 : 138 ]
310 # Congressional District (108th)
311 record
. cd108
= line
[ 138 : 140 ]
313 # Congressional District (109th)
314 record
. cd109
= line
[ 140 : 142 ]
316 # Congressional District (110th)
317 record
. cd110
= line
[ 142 : 144 ]
319 # State Legislative District (Upper Chamber)
320 record
. sldu
= line
[ 144 : 147 ]
322 # State Legislative District (Lower Chamber)
323 record
. sldl
= line
[ 147 : 150 ]
326 record
. vtd
= line
[ 150 : 156 ]
328 # Voting District Indicator
329 record
. vtdi
= line
[ 156 ]
331 # ZIP Code Tabulation Area (3 digit)
332 record
. zcta3
= line
[ 157 : 160 ]
334 # ZIP Code Tabulation Area (5 digit)
335 record
. zcta5
= line
[ 160 : 165 ]
338 record
. submcd
= line
[ 165 : 170 ]
340 # FIPS Subbarrio Class Code
341 record
. submcdcc
= line
[ 170 : 172 ]
344 record
. arealand
= line
[ 172 : 186 ]
347 record
. areawatr
= line
[ 186 : 200 ]
349 # Area Name - Legal/Statistical
350 # Area Description (LSAD)
351 # Term - Part Indicator
352 record
. name
= line
[ 200 : 290 ]
354 # Functional Status Code
355 record
. funcstat
= line
[ 290 ]
357 # Geographic Change User Note Indicator
358 record
. gcuni
= line
[ 291 ]
360 # Population Count (100%)
361 record
. pop100
= line
[ 292 : 301 ]
363 # Housing Unit Count (100%)
364 record
. hu100
= line
[ 301 : 310 ]
366 # Internal Point (Latitude)
367 record
. intptlat
= line
[ 310 : 319 ]
369 # Internal Point (Longitude)
370 record
. intptlon
= line
[ 319 : 329 ]
372 # Legal/Statistical Area Description Code
373 record
. lsadc
= line
[ 329 : 331 ]
376 record
. partflag
= line
[ 331 ]
378 # School District (Elementary)
379 record
. sdelm
= line
[ 332 : 337 ]
381 # School District (Secondary)
382 record
. sdsec
= line
[ 337 : 342 ]
384 # School District (Unified)
385 record
. sduni
= line
[ 342 : 347 ]
387 # Traffic Analysis Zone
388 record
. taz
= line
[ 347 : 353 ]
390 # Oregon Urban Growth Area
391 record
. uga
= line
[ 353 : 358 ]
393 # Public Use Microdata Area - 5% File
394 record
. puma5
= line
[ 358 : 363 ]
396 # Public Use Microdata Area - 1% File
397 record
. puma1
= line
[ 363 : 368 ]
400 record
. reserve2
= line
[ 368 : 383 ]
402 # Metropolitan Area Central City
403 record
. macc
= line
[ 383 : 388 ]
405 # Urban Area Central Place
406 record
. uacp
= line
[ 388 : 393 ]
409 record
. reserved
= line
[ 393 : 400 ]