--- /dev/null
+"""
+Classes for parsing LEHD (Longitudinal Employer-Household Dynamics)
+data. Inparticular, we are currently parsing the Origin-Destination
+data provided by OnTheMap:
+
+ http://www.vrdc.cornell.edu/onthemap/doc/index.html
+
+"""
+
+import os
+
+from Errors import RecordError
+
+
+class OriginDestinationRecord(object):
+ """
+ Represents one record in an Origin-Destination matrix file. The
+ field contained within one of these files are recorded in the
+ first row of the appropriate CSV files. At this time,
+ documentation does not seem to be available for the newer version
+ 3.x fields.
+ """
+ NUM_FIELDS = 13
+
+
+
+class OriginDestinationRecordParser(object):
+
+ # This is the header for an Origin-Destination matrix file. If
+ # this isn't the first line of our file, then something is wrong.
+ HEADER = 'w_geocode,h_geocode,total,age1,age2,age3,earn1,earn2,earn3,ind1,ind2,ind3,createdate'
+
+ def parse_file(self, path):
+ """
+ Assuming that path refers to an Origin-Destination file, parse
+ each record contained therein. These files should simply be
+ CSV with no text qualifiers, and can therefore be parsed based
+ on comma placement alone.
+ """
+
+ # Our return value, a list of records.
+ records = []
+
+ f = open(path, 'r')
+
+ first_line = f.readline().strip()
+ if not (first_line == self.HEADER):
+ raise RecordError('According to the header (first row), this is not an Origin-Destination matrix file. The first line of an Origin-Destination matrix file should be,\n %s \nBut, the first line of your file is,\n %s\n' % (self.HEADER, first_line))
+
+ # We have already read the header line, so this process should
+ # start on the first non-header line.
+ for line in f:
+ record = self.parse_line(line)
+ records.append(record)
+
+ f.close()
+
+ return records
+
+
+ def parse_line(self, line):
+ """
+ Parse one line of an Origin-Destination matrix file into an
+ OriginDestinationRecord object.
+ """
+ fields = line.split(',')
+
+ if (len(fields) < OriginDestinationRecord.NUM_FIELDS):
+ raise RecordError("The line,\n %s \n does not contain enough fields. The minimum number of fields required in an Origin-Destination matrix file is %d. This line contains %d fields." % (line, OriginDestinationRecord.NUM_FIELDS, len(fields)))
+
+ od = OriginDestinationRecord()
+ od.w_geocode = fields[0]
+ od.h_geocode = fields[1]
+ od.total = fields[2]
+ od.age1 = fields[3]
+ od.age2 = fields[4]
+ od.age3 = fields[5]
+ od.earn1 = fields[6]
+ od.earn2 = fields[7]
+ od.earn3 = fields[8]
+ od.ind1 = fields[9]
+ od.ind2 = fields[10]
+ od.ind3 = fields[11]
+ od.createdate = fields[12]
+
+ return od