"""
Classes for parsing LEHD (Longitudinal Employer-Household Dynamics)
data. Inparticular, we are currently parsing the Origin-Destination
data provided by OnTheMap:

  http://www.vrdc.cornell.edu/onthemap/doc/index.html

"""

import os

from Errors import RecordError


class OriginDestinationRecord(object):    
    """
    Represents one record in an Origin-Destination matrix file. The
    field contained within one of these files are recorded in the
    first row of the appropriate CSV files. At this time,
    documentation does not seem to be available for the newer version
    3.x fields.
    """
    NUM_FIELDS = 13


class OriginDestinationRecordParser(object):

    # This is the header for an Origin-Destination matrix file. If
    # this isn't the first line of our file, then something is wrong.
    HEADER = 'w_geocode,h_geocode,total,age1,age2,age3,earn1,earn2,earn3,ind1,ind2,ind3,createdate'
    
    def parse_file(self, path):
        """
        Assuming that path refers to an Origin-Destination file, parse
        each record contained therein. These files should simply be
        CSV with no text qualifiers, and can therefore be parsed based
        on comma placement alone.
        """

        # Our return value, a list of records.
        records = []
        
        f = open(path, 'r')

        first_line = f.readline().strip()
        if not (first_line == self.HEADER):
            raise RecordError('According to the header (first row), this is not an Origin-Destination matrix file. The first line of an Origin-Destination matrix file should be,\n %s \nBut, the first line of your file is,\n %s\n' % (self.HEADER, first_line))

        # We have already read the header line, so this process should
        # start on the first non-header line.
        for line in f:
            record = self.parse_line(line)
            records.append(record)
            
        f.close()
        
        return records


    def parse_line(self, line):
        """
        Parse one line of an Origin-Destination matrix file into an
        OriginDestinationRecord object.
        """
        fields = line.split(',')

        if (len(fields) < OriginDestinationRecord.NUM_FIELDS):
            raise RecordError("The line,\n %s \n does not contain enough fields. The minimum number of fields required in an Origin-Destination matrix file is %d. This line contains %d fields." % (line, OriginDestinationRecord.NUM_FIELDS, len(fields)))

        od = OriginDestinationRecord()
        od.w_geocode = fields[0]
        od.h_geocode = fields[1]
        od.total = fields[2]
        od.age1 = fields[3]
        od.age2 = fields[4]
        od.age3 = fields[5]
        od.earn1 = fields[6]
        od.earn2 = fields[7]
        od.earn3 = fields[8]
        od.ind1 = fields[9]
        od.ind2 = fields[10]
        od.ind3 = fields[11]
        od.createdate = fields[12]

        return od