module TSN.Parse ( parse_message, parse_time_stamp, parse_xml_time_stamp, parse_xmlfid, time_format, time_stamp_format ) where import Data.Either.Utils ( maybeToEither ) import Data.Time.Clock ( NominalDiffTime, UTCTime, addUTCTime ) import Data.Time.Format ( parseTime ) import System.Locale ( defaultTimeLocale ) import Text.Read ( readMaybe ) import Text.XML.HXT.Core ( XmlTree, (>>>), (/>), getChildren, getText, hasName, runLA ) -- | Parse the \"message\" element out of a document tree and return -- it as an 'XmlTree'. We use an Either for consistency. -- -- Note: It's more trouble than it's worth to attempt to use this as -- the basis for parse_xmlfid and parse_xml_time_stamp. -- parse_message :: XmlTree -> Either String XmlTree parse_message xmltree = case elements of [] -> Left "No message elements found." (x:_) -> Right x where parse :: XmlTree -> [XmlTree] parse = runLA $ hasName "/" /> hasName "message" elements = parse xmltree -- | Extract the \"XML_File_ID\" element from a document. If we fail -- to parse an XML_File_ID, we return the reason wrapped in a 'Left' -- constructor. The reason should be one of two things: -- -- 1. No XML_File_ID elements were found. -- -- 2. An XML_File_ID element was found, but it could not be read -- into an Integer. -- -- We use an Either rather than a Maybe because we do expect some -- non-integer XML_File_IDs. In the examples, you will see -- NHL_DepthChart_XML.XML with an XML_File_ID of \"49618.61\" and -- CFL_Boxscore_XML1.xml with an XML_File_ID of -- \"R28916\". According to Brijesh Patel of TSN, these are special -- category files and not part of the usual feed. -- -- TODO: This should eventually be combined with XML.parse_xmlfid -- from the htsn package. -- parse_xmlfid :: XmlTree -> Either String Integer parse_xmlfid xmltree = case parse_results of [] -> Left "No XML_File_ID elements found." (x:_) -> x where parse :: XmlTree -> [String] parse = runLA $ hasName "/" /> hasName "message" /> hasName "XML_File_ID" >>> getChildren >>> getText read_either_integer :: String -> Either String Integer read_either_integer s = let msg = "Could not parse XML_File_ID " ++ s ++ " as an integer." in maybeToEither msg (readMaybe s) elements = parse xmltree parse_results = map read_either_integer elements -- | The format string for times appearing in the feed. -- time_format :: String time_format = "%I:%M %p" -- | The format string for a time_stamp. This omits the leading and -- trailing space. time_stamp_format :: String time_stamp_format = "%B %-d, %Y, at " ++ time_format ++ " ET" -- | Parse a time stamp from a 'String' (maybe). -- -- TSN doesn't provide a proper time zone name, so we assume that -- it's always Eastern Standard Time. EST is UTC-5, so we -- add five hours to convert to UTC. -- parse_time_stamp :: String -> Maybe UTCTime parse_time_stamp = fmap add_five . parseTime defaultTimeLocale time_stamp_format where five_hours :: NominalDiffTime five_hours = 5 * 60 * 60 add_five :: UTCTime -> UTCTime add_five = addUTCTime five_hours -- | Extract the \"time_stamp\" element from a document. If we fail -- to parse a time_stamp, we return the reason wrapped in a 'Left' -- constructor. The reason should be one of two things: -- -- 1. No time_Stamp elements were found. -- -- 2. A time_stamp element was found, but it could not be read -- into a UTCTime. -- -- Unline 'parse_xmlfid', we don't expect to run into any time_stamps -- that we can't parse. But since parse_xmlfid returns an Either, we -- do for consistency. -- parse_xml_time_stamp :: XmlTree -> Either String UTCTime parse_xml_time_stamp xmltree = case parse_results of [] -> Left "No time_stamp elements found." (x:_) -> x where parse :: XmlTree -> [String] parse = runLA $ hasName "/" /> hasName "message" /> hasName "time_stamp" >>> getChildren >>> getText read_either_utctime :: String -> Either String UTCTime read_either_utctime s = let msg = "Could not parse time_stamp " ++ s ++ " as a date/time." in maybeToEither msg (parse_time_stamp s) elements = parse xmltree parse_results = map read_either_utctime elements