import os from Errors import RecordError import GPS import StringUtils class GeoRecord: """ This class wraps one record in an SF1 geo file. """ MINIMUM_LINE_LENGTH = 400 class Block: """ Represents a block (which is a special case of a GeoRecord). There are some convenience methods tacked on to make computation and querying easier. """ def __init__(self, geo_record): """ We initialize from a GeoRecord object. It is important that we raise some kind of error if there is no 'block' field, since that means we weren't passed a block. """ if not (StringUtils.is_integer(geo_record.block)): raise RecordError('GeoRecord object does not represent a block.') # These need to be stored as strings so they don't # affect the block_identifier() generation. self.state = geo_record.state self.county = geo_record.county self.tract = geo_record.tract self.block = geo_record.block # All of these int/float conversions will throw a ValueError # if the input string cannot be converted o the specified # type. self.pop100 = int(geo_record.pop100) self.arealand = float(geo_record.arealand) self.areawatr = float(geo_record.areawatr) # Both latitude and longitude are given to six digits of # precision (i.e. after the decimal point). But, there are no # decimal points in the intptlon/intptlat fields, so we need # to add them. # # By default, the coordinates will be parsed as integers. For # example, +12345678 will be parsed as 12345678.0. So, we need # to "move" that decimal point 6 places to the left. We know # how to do that. # self.coordinates = GPS.Coordinates() self.coordinates.latitude = (float(geo_record.intptlat) / (10**6)) self.coordinates.longitude = (float(geo_record.intptlon) / (10**6)) def blkidfp00(self): # From the Tiger/Line shapefile documentation: # # Current block identifier; a concatenation of Census 2000 # state FIPS code, Census 2000 county FIPS code, Census # BLKIDFP 16 String 2000 census tract code, Census 2000 # tabulation block number, and current block suffix 1. # return (self.state + self.county + self.tract + self.block) def total_area(self): return (self.arealand + self.areawatr) def population_density(self): # There are some unusual cases where a block will have a # total area of zero. It also seems that these unusual blocks # do in fact posess geometries, provided in the Tiger database. # Therefore, we allow them to be parsed. # # The choice to assign these blocks an average density of 0 # was arbitrary. # if (self.total_area() == 0): return 0 else: return (self.pop100 / self.total_area()) class GeoRecordParser: def parse_file(self, path): """ Assuming that path refers to an SF1 (geo) file, parse the geographic header records contained within it. Return a list of GeoRecord objects. """ # Our list of GeoRecord objects to return. Empty at first. records = [] f = open(path, 'r') for line in f: record = self.parse_line(line) records.append(record) f.close() return records def parse_blocks(self, path): """Parse only the blocks from a geo file.""" blocks = [] records = self.parse_file(path) for record in records: try: block = Block(record) blocks.append(block) except RecordError: # Ain't a block. continue except ValueError: # A value couldn't be converted to the appropriate type. continue return blocks def parse_line(self, line): """ Parse one line of an SF1 geo file. Hopefully, the input will match the specification. We can check the line length here, or allow the GeoRecord class to parse the data meaningfully and throw an error if something doesn't look right. """ if (len(line) < GeoRecord.MINIMUM_LINE_LENGTH): raise RecordError("The input line is too short. The SF1 specification requires a line length of %d characters; this line contains only %d characters" % (GeoRecord.MINIMUM_LINE_LENGTH, len(line))) record = GeoRecord() # Note that Python list indexes are zero-based, whereas the SF1 # specification gives the field offsets as one-based. For example, # the first field, "File Identification," is defined as beginning # at position 1, and having length 6. The following line corresponds # to this definition. record.fileid = line[0:6] # State / US Abbreviation (USPS) record.stusab = line[6:8] # Summary Level record.sumlev = line[8:11] # Geographic Component record.geocomp = line[11:13] # Characteristic Iteration record.chariter = line[13:16] # Characteristic Iteration File Sequence Number record.cifsn = line[16:18] # Logical Record Number record.logrecno = line[18:25] # Region record.region = line[25] # Division record.division = line[26] # State (Census) record.statece = line[27:29] # State (FIPS) record.state = line[29:31] # County record.county = line[31:34] # County Size Code record.countysc = line[34:36] # County Subdivision (FIPS) record.cousub = line[36:41] # FIPS County Subdivision Class Code record.cousubcc = line[41:43] # County Subdivision Size Code record.cousubsc = line[43:45] # Place (FIPS) record.place = line[45:50] # FIPS Place Class Code record.placecc = line[50:52] # Place Description Code record.placedc = line[52] # Place Size Code record.placesc = line[53:55] # Census Tract record.tract = line[55:61] # Block Group record.blkgrp = line[61] # Block record.block = line[62:66] # Internal Use Code record.iuc = line[66:68] # Consolidated City (FIPS) record.concit = line[68:71] # FIPS Consolidated City Class Code record.concitcc = line[73:75] # Consolidated City Size Code record.concitsc = line[75:77] # American Indian Area/Alaska Native Area/Hawaiian Home Land # (Census) record.aianhh = line[77:81] # American Indian Area/Alaska Native Area/Hawaiian Home Land # (FIPS) record.aianhhfp = line[81:86] # FIPS American Indian Area/Alaska Native Area/Hawaiian Home # Land Class Code record.aianhhcc = line[86:88] # American Indian Trust Land/Hawaiian Home Land Indicator record.aihhtli = line[88] # American Indian Tribal Subdivision (Census) record.aitsce = line[89:92] # American Indian Tribal Subdivision (FIPS) record.aits = line[92:97] # FIPS American Indian Tribal Subdivision Class Code record.aitscc = line[97:99] # Alaska Native Regional Corporation (FIPS) record.anrc = line[99:104] # FIPS Alaska Native Regional Corporation Class Code record.anrccc = line[104:106] # Metropolitan Statistical Area/Consolidated Metropolitan # Statistical Area record.msacmsa = line[106:110] # MSA/CMSA Size Code record.masc = line[110:112] # Consolidated Metropolitan Statistical Area record.cmsa = line[112:114] # Metropolitan Area Central City Indicator record.macci = line[114] # Primary Metropolitan Statistical Area record.pmsa = line[115:119] # New England County Metropolitan Area record.necma = line[119:123] # New England County Metropolitan Area Central City Indicator record.necmacci = line[123] # New England County Metropolitan Area Size Code record.necmasc = line[124:126] # Extended Place Indicator record.exi = line[126] # Urban Area record.ua = line[127:132] # Urban Area Size Code record.uasc = line[132:134] # Urban Area Type record.ustype = line[134] # Urban/Rural record.ur = line[135] # Congressional District (106th) record.cd106 = line[136:138] # Congressional District (108th) record.cd108 = line[138:140] # Congressional District (109th) record.cd109 = line[140:142] # Congressional District (110th) record.cd110 = line[142:144] # State Legislative District (Upper Chamber) record.sldu = line[144:147] # State Legislative District (Lower Chamber) record.sldl = line[147:150] # Voting District record.vtd = line[150:156] # Voting District Indicator record.vtdi = line[156] # ZIP Code Tabulation Area (3 digit) record.zcta3 = line[157:160] # ZIP Code Tabulation Area (5 digit) record.zcta5 = line[160:165] # Subbarrio (FIPS) record.submcd = line[165:170] # FIPS Subbarrio Class Code record.submcdcc = line[170:172] # Area (Land) record.arealand = line[172:186] # Area (Water) record.areawatr = line[186:200] # Area Name - Legal/Statistical # Area Description (LSAD) # Term - Part Indicator record.name = line[200:290] # Functional Status Code record.funcstat = line[290] # Geographic Change User Note Indicator record.gcuni = line[291] # Population Count (100%) record.pop100 = line[292:301] # Housing Unit Count (100%) record.hu100 = line[301:310] # Internal Point (Latitude) record.intptlat = line[310:319] # Internal Point (Longitude) record.intptlon = line[319:329] # Legal/Statistical Area Description Code record.lsadc = line[329:331] # Part Flag record.partflag = line[331] # School District (Elementary) record.sdelm = line[332:337] # School District (Secondary) record.sdsec = line[337:342] # School District (Unified) record.sduni = line[342:347] # Traffic Analysis Zone record.taz = line[347:353] # Oregon Urban Growth Area record.uga = line[353:358] # Public Use Microdata Area - 5% File record.puma5 = line[358:363] # Public Use Microdata Area - 1% File record.puma1 = line[363:368] # Reserved record.reserve2 = line[368:383] # Metropolitan Area Central City record.macc = line[383:388] # Urban Area Central Place record.uacp = line[388:393] # Reserved record.reserved = line[393:400] return record