X-Git-Url: https://gitweb.michael.orlitzky.com/?a=blobdiff_plain;f=src%2FSummaryFile1.py;h=d646b07e70df62fd9bc0c7d478626840fe91ab7d;hb=63f2205c95de0992570d3f83ad07786c0102c3ce;hp=50f23ad0c96a83227137d60c7ff0fd2a4057efe8;hpb=84d512e317cac78d67bec5c64fcf761d3f5e0689;p=dead%2Fcensus-tools.git diff --git a/src/SummaryFile1.py b/src/SummaryFile1.py index 50f23ad..d646b07 100644 --- a/src/SummaryFile1.py +++ b/src/SummaryFile1.py @@ -1,45 +1,97 @@ -import os, GPS, inspect +import os + +import GPS +import StringUtils + class RecordError(StandardError): pass - class GeoRecord: """ This class wraps one record in an SF1 geo file. """ - MinimumLineLength = 400 + MINIMUM_LINE_LENGTH = 400 class Block: """ - Represents a block (which is a special case of a GeoRecord. - All we care about here is the block number, population, - area, and coordinates. + Represents a block (which is a special case of a GeoRecord). + There are some convenience methods tacked on to make computation + and querying easier. """ def __init__(self, geo_record): - """We initialize from a GeoRecord object""" + """ + We initialize from a GeoRecord object. It is important that + we raise some kind of error if there is no 'block' field, since + that means we weren't passed a block. + """ + if not (StringUtils.is_integer(geo_record.block)): + raise RecordError('GeoRecord object does not represent a block.') + + # These need to be stored as strings so they don't + # affect the block_identifier() generation. + self.state = geo_record.state + self.county = geo_record.county + self.tract = geo_record.tract + self.block = geo_record.block + # All of these int/float conversions will throw a ValueError # if the input string cannot be converted o the specified # type. - self.block_number = int(geo_record.block) - self.population = int(geo_record.pop100) - self.area_land = float(geo_record.arealand) - self.area_water = float(geo_record.areawatr) - + self.pop100 = int(geo_record.pop100) + self.arealand = float(geo_record.arealand) + self.areawatr = float(geo_record.areawatr) + + # Both latitude and longitude are given to six digits of + # precision (i.e. after the decimal point). But, there are no + # decimal points in the intptlon/intptlat fields, so we need + # to add them. + # + # By default, the coordinates will be parsed as integers. For + # example, +12345678 will be parsed as 12345678.0. So, we need + # to "move" that decimal point 6 places to the left. We know + # how to do that. + # self.coordinates = GPS.Coordinates() - self.coordinates.latitude = float(geo_record.intptlat) - self.coordinates.longitude = float(geo_record.intptlon) + self.coordinates.latitude = (float(geo_record.intptlat) / (10**6)) + self.coordinates.longitude = (float(geo_record.intptlon) / (10**6)) + + + def tiger_blkidfp00(self): + # From the Tiger/Line shapefile documentation: + # + # Current block identifier; a concatenation of Census 2000 + # state FIPS code, Census 2000 county FIPS code, Census + # BLKIDFP 16 String 2000 census tract code, Census 2000 + # tabulation block number, and current block suffix 1. + # + return (self.state + + self.county + + self.tract + + self.block) def total_area(self): - return (self.area_land + self.area_water) + return (self.arealand + self.areawatr) def population_density(self): - return (self.population / self.total_area()) + # There are some unusual cases where a block will have a + # total area of zero. It also seems that these unusual blocks + # do in fact posess geometries, provided in the Tiger database. + # Therefore, we allow them to be parsed. + # + # The choice to assign these blocks an average density of 0 + # was arbitrary. + # + if (self.total_area() == 0): + return 0 + else: + return (self.pop100 / self.total_area()) + class GeoRecordParser: @@ -74,7 +126,11 @@ class GeoRecordParser: try: block = Block(record) blocks.append(block) + except RecordError: + # Ain't a block. + continue except ValueError: + # A value couldn't be converted to the appropriate type. continue return blocks @@ -88,8 +144,8 @@ class GeoRecordParser: allow the GeoRecord class to parse the data meaningfully and throw an error if something doesn't look right. """ - if (len(line) < GeoRecord.MinimumLineLength): - raise RecordError("The input line is too short. The SF1 specification requires a line length of %d characters; this line contains only %d characters" % (GeoRecord.MinimumLineLength, len(line))) + if (len(line) < GeoRecord.MINIMUM_LINE_LENGTH): + raise RecordError("The input line is too short. The SF1 specification requires a line length of %d characters; this line contains only %d characters" % (GeoRecord.MINIMUM_LINE_LENGTH, len(line))) record = GeoRecord()