Added tests and fixtures for the LEHD module.
Updated the run_tests script to include the new LEHD tests.
from Tests.Unit import FileUtilsTest
from Tests.Unit import GeometryTest
from Tests.Unit import KMLTest
+from Tests.Unit import LEHDTest
from Tests.Unit import SummaryFile1Test
from Tests.Unit import StringUtilsTest
suite.addTest(KMLTest.suite())
suite.addTest(SummaryFile1Test.suite())
suite.addTest(StringUtilsTest.suite())
+suite.addTest(LEHDTest.suite())
unittest.TextTestRunner(verbosity=2).run(suite)
--- /dev/null
+"""
+Classes for parsing LEHD (Longitudinal Employer-Household Dynamics)
+data. Inparticular, we are currently parsing the Origin-Destination
+data provided by OnTheMap:
+
+ http://www.vrdc.cornell.edu/onthemap/doc/index.html
+
+"""
+
+import os
+
+from Errors import RecordError
+
+
+class OriginDestinationRecord(object):
+ """
+ Represents one record in an Origin-Destination matrix file. The
+ field contained within one of these files are recorded in the
+ first row of the appropriate CSV files. At this time,
+ documentation does not seem to be available for the newer version
+ 3.x fields.
+ """
+ NUM_FIELDS = 13
+
+
+
+class OriginDestinationRecordParser(object):
+
+ # This is the header for an Origin-Destination matrix file. If
+ # this isn't the first line of our file, then something is wrong.
+ HEADER = 'w_geocode,h_geocode,total,age1,age2,age3,earn1,earn2,earn3,ind1,ind2,ind3,createdate'
+
+ def parse_file(self, path):
+ """
+ Assuming that path refers to an Origin-Destination file, parse
+ each record contained therein. These files should simply be
+ CSV with no text qualifiers, and can therefore be parsed based
+ on comma placement alone.
+ """
+
+ # Our return value, a list of records.
+ records = []
+
+ f = open(path, 'r')
+
+ first_line = f.readline().strip()
+ if not (first_line == self.HEADER):
+ raise RecordError('According to the header (first row), this is not an Origin-Destination matrix file. The first line of an Origin-Destination matrix file should be,\n %s \nBut, the first line of your file is,\n %s\n' % (self.HEADER, first_line))
+
+ # We have already read the header line, so this process should
+ # start on the first non-header line.
+ for line in f:
+ record = self.parse_line(line)
+ records.append(record)
+
+ f.close()
+
+ return records
+
+
+ def parse_line(self, line):
+ """
+ Parse one line of an Origin-Destination matrix file into an
+ OriginDestinationRecord object.
+ """
+ fields = line.split(',')
+
+ if (len(fields) < OriginDestinationRecord.NUM_FIELDS):
+ raise RecordError("The line,\n %s \n does not contain enough fields. The minimum number of fields required in an Origin-Destination matrix file is %d. This line contains %d fields." % (line, OriginDestinationRecord.NUM_FIELDS, len(fields)))
+
+ od = OriginDestinationRecord()
+ od.w_geocode = fields[0]
+ od.h_geocode = fields[1]
+ od.total = fields[2]
+ od.age1 = fields[3]
+ od.age2 = fields[4]
+ od.age3 = fields[5]
+ od.earn1 = fields[6]
+ od.earn2 = fields[7]
+ od.earn3 = fields[8]
+ od.ind1 = fields[9]
+ od.ind2 = fields[10]
+ od.ind3 = fields[11]
+ od.createdate = fields[12]
+
+ return od
--- /dev/null
+240010001001011,240010001001033,1,1,0,0,0,1,0,0,1,0,20090211
+240010001001011,240010001002059,1,0,1,0,0,1,0,0,1,0,20090211
+240010001001011,240010001002070,1,1,0,0,1,0,0,0,1,0,20090211
+240010001001011,240010001002155,1,0,1,0,1,0,0,0,1,0,20090211
+240010001001011,240010004001008,1,0,1,0,1,0,0,0,1,0,20090211
+240010001001011,240010004002022,1,0,1,0,1,0,0,0,1,0,20090211
+240010001001011,240037401042006,1,0,1,0,1,0,0,0,1,0,20090211
+240010001001011,240317017032003,1,1,0,0,1,0,0,0,1,0,20090211
+240010001001011,240338005114011,1,0,0,1,1,0,0,0,1,0,20090211
+240010001001011,240430107002085,1,1,0,0,1,0,0,0,1,0,20090211
--- /dev/null
+w_geocode,h_geocode,total,age1,age2,age3,earn1,earn2,earn3,ind1,ind2,ind3
+240010001001011,240010001001033,1,1,0,0,0,1,0,0,1,0
+240010001001011,240010001002059,1,0,1,0,0,1,0,0,1,0
+240010001001011,240010001002070,1,1,0,0,1,0,0,0,1,0
+240010001001011,240010001002155,1,0,1,0,1,0,0,0,1,0
+240010001001011,240010004001008,1,0,1,0,1,0,0,0,1,0
+240010001001011,240010004002022,1,0,1,0,1,0,0,0,1,0
+240010001001011,240037401042006,1,0,1,0,1,0,0,0,1,0
+240010001001011,240317017032003,1,1,0,0,1,0,0,0,1,0
+240010001001011,240338005114011,1,0,0,1,1,0,0,0,1,0
+240010001001011,240430107002085,1,1,0,0,1,0,0,0,1,0
--- /dev/null
+w_geocode,h_geocode,total,age1,age2,age3,earn1,earn2,earn3,ind1,ind2,ind3,createdate
+240010001001011,240010001001033,1,1,0,0,0,1,0,0,1,0,20090211
+240010001001011,240010001002059,1,0,1,0,0,1,0,0,1,0,20090211
+240010001001011,240010001002070,1,1,0,0,1,0,0,0,1,0,20090211
+240010001001011,240010001002155,1,0,1,0,1,0,0,0,1,0,20090211
+240010001001011,240010004001008,1,0,1,0,1,0,0,0,1,0,20090211
+240010001001011,240010004002022,1,0,1,0,1,0,0,0,1,0,20090211
+240010001001011,240037401042006,1,0,1,0,1,0,0,0,1,0,20090211
+240010001001011,240317017032003,1,1,0,0,1,0,0,0,1,0,20090211
+240010001001011,240338005114011,1,0,0,1,1,0,0,0,1,0,20090211
+240010001001011,240430107002085,1,1,0,0,1,0,0,0,1,0,20090211
--- /dev/null
+import unittest
+
+import Tests.Fixtures
+from Errors import RecordError
+import LEHD
+
+
+class OriginDestinationRecordParserTest(unittest.TestCase):
+
+ def setUp(self):
+ self.odrp = LEHD.OriginDestinationRecordParser()
+
+
+ def testAllOfSubsetParsed(self):
+ fixture_path = Tests.Fixtures.Path() + '/LEHD/ten_records.csv'
+ records = self.odrp.parse_file(fixture_path)
+ self.assertEqual(len(records), 10)
+
+
+ def testErrorOnMissingColumns(self):
+ fixture_path = Tests.Fixtures.Path() + '/LEHD/ten_records-twelve_columns.csv'
+ self.assertRaises(RecordError, self.odrp.parse_file, fixture_path)
+
+
+ def testErrorOnMissingHeader(self):
+ fixture_path = Tests.Fixtures.Path() + '/LEHD/ten_records-no_header.csv'
+ self.assertRaises(RecordError, self.odrp.parse_file, fixture_path)
+
+def suite():
+ suite = unittest.TestSuite()
+ suite.addTest(unittest.makeSuite(OriginDestinationRecordParserTest))
+ return suite