From 1201e197cdd5e275624959980b8de2ee7705f1fc Mon Sep 17 00:00:00 2001 From: Michael Orlitzky Date: Sun, 1 Jun 2014 00:12:39 -0400 Subject: [PATCH] Move some things from TSN.Picklers into TSN.Parse. Add XML tree parsing functions to TSN.Parse. --- htsn-import.cabal | 1 + src/TSN/Parse.hs | 148 ++++++++++++++++++++++++++++++++++++++++++++ src/TSN/Picklers.hs | 34 +++++----- 3 files changed, 164 insertions(+), 19 deletions(-) create mode 100644 src/TSN/Parse.hs diff --git a/htsn-import.cabal b/htsn-import.cabal index 1e291e7..0a9f285 100644 --- a/htsn-import.cabal +++ b/htsn-import.cabal @@ -109,6 +109,7 @@ executable htsn-import TSN.Codegen TSN.Database TSN.DbImport + TSN.Parse TSN.Picklers TSN.XmlImport TSN.XML.AutoRacingSchedule diff --git a/src/TSN/Parse.hs b/src/TSN/Parse.hs new file mode 100644 index 0000000..0d497b4 --- /dev/null +++ b/src/TSN/Parse.hs @@ -0,0 +1,148 @@ +module TSN.Parse ( + parse_message, + parse_time_stamp, + parse_xml_time_stamp, + parse_xmlfid, + time_format, + time_stamp_format ) +where + +import Data.Either.Utils ( maybeToEither ) +import Data.Time.Clock ( NominalDiffTime, UTCTime, addUTCTime ) +import Data.Time.Format ( parseTime ) +import System.Locale ( defaultTimeLocale ) +import Text.Read ( readMaybe ) +import Text.XML.HXT.Core ( + XmlTree, + (>>>), + (/>), + getChildren, + getText, + hasName, + runLA ) + + +-- | Parse the \"message\" element out of a document tree and return +-- it as an 'XmlTree'. We use an Either for consistency. +-- +-- Note: It's more trouble than it's worth to attempt to use this as +-- the basis for parse_xmlfid and parse_xml_time_stamp. +-- +parse_message :: XmlTree -> Either String XmlTree +parse_message xmltree = + case elements of + [] -> Left "No message elements found." + (x:_) -> Right x + where + parse :: XmlTree -> [XmlTree] + parse = runLA $ hasName "/" /> hasName "message" + + elements = parse xmltree + + + +-- | Extract the \"XML_File_ID\" element from a document. If we fail +-- to parse an XML_File_ID, we return the reason wrapped in a 'Left' +-- constructor. The reason should be one of two things: +-- +-- 1. No XML_File_ID elements were found. +-- +-- 2. An XML_File_ID element was found, but it could not be read +-- into an Integer. +-- +-- We use an Either rather than a Maybe because we do expect some +-- non-integer XML_File_IDs. In the examples, you will see +-- NHL_DepthChart_XML.XML with an XML_File_ID of \"49618.61\" and +-- CFL_Boxscore_XML1.xml with an XML_File_ID of +-- \"R28916\". According to Brijesh Patel of TSN, these are special +-- category files and not part of the usual feed. +-- +-- TODO: This should eventually be combined with XML.parse_xmlfid +-- from the htsn package. +-- +parse_xmlfid :: XmlTree -> Either String Integer +parse_xmlfid xmltree = + case parse_results of + [] -> Left "No XML_File_ID elements found." + (x:_) -> x + where + parse :: XmlTree -> [String] + parse = runLA $ hasName "/" + /> hasName "message" + /> hasName "XML_File_ID" + >>> getChildren + >>> getText + + read_either_integer :: String -> Either String Integer + read_either_integer s = + let msg = "Could not parse XML_File_ID " ++ s ++ " as an integer." + in + maybeToEither msg (readMaybe s) + + elements = parse xmltree + parse_results = map read_either_integer elements + + + +-- | The format string for times appearing in the feed. +-- +time_format :: String +time_format = "%I:%M %p" + +-- | The format string for a time_stamp. This omits the leading and +-- trailing space. +time_stamp_format :: String +time_stamp_format = "%B %-d, %Y, at " ++ time_format ++ " ET" + + +-- | Parse a time stamp from a 'String' (maybe). +-- +-- TSN doesn't provide a proper time zone name, so we assume that +-- it's always Eastern Standard Time. EST is UTC-5, so we +-- add five hours to convert to UTC. +-- +parse_time_stamp :: String -> Maybe UTCTime +parse_time_stamp = + fmap add_five . parseTime defaultTimeLocale time_stamp_format + where + five_hours :: NominalDiffTime + five_hours = 5 * 60 * 60 + + add_five :: UTCTime -> UTCTime + add_five = addUTCTime five_hours + + +-- | Extract the \"time_stamp\" element from a document. If we fail +-- to parse a time_stamp, we return the reason wrapped in a 'Left' +-- constructor. The reason should be one of two things: +-- +-- 1. No time_Stamp elements were found. +-- +-- 2. A time_stamp element was found, but it could not be read +-- into a UTCTime. +-- +-- Unline 'parse_xmlfid', we don't expect to run into any time_stamps +-- that we can't parse. But since parse_xmlfid returns an Either, we +-- do for consistency. +-- +parse_xml_time_stamp :: XmlTree -> Either String UTCTime +parse_xml_time_stamp xmltree = + case parse_results of + [] -> Left "No time_stamp elements found." + (x:_) -> x + where + parse :: XmlTree -> [String] + parse = runLA $ hasName "/" + /> hasName "message" + /> hasName "time_stamp" + >>> getChildren + >>> getText + + read_either_utctime :: String -> Either String UTCTime + read_either_utctime s = + let msg = "Could not parse time_stamp " ++ s ++ " as a date/time." + in + maybeToEither msg (parse_time_stamp s) + + elements = parse xmltree + parse_results = map read_either_utctime elements diff --git a/src/TSN/Picklers.hs b/src/TSN/Picklers.hs index e188953..608f907 100644 --- a/src/TSN/Picklers.hs +++ b/src/TSN/Picklers.hs @@ -19,6 +19,12 @@ import Text.XML.HXT.Arrow.Pickle ( xpWrapMaybe ) import Text.XML.HXT.Arrow.Pickle.Xml ( PU ) +-- Local imports. +import TSN.Parse ( + parse_time_stamp, + time_format, + time_stamp_format ) + -- | (Un)pickle a UTCTime without the time portion. -- @@ -83,10 +89,7 @@ xp_gamedate = --- | The time format string used in 'xp_time' and 'xp_time_stamp'. --- -xp_time_format :: String -xp_time_format = "%I:%M %p" + -- | (Un)pickle a UTCTime without the date portion. @@ -96,10 +99,10 @@ xp_time = (to_time, from_time) `xpWrapMaybe` xpText where to_time :: String -> Maybe UTCTime - to_time = parseTime defaultTimeLocale xp_time_format + to_time = parseTime defaultTimeLocale time_format from_time :: UTCTime -> String - from_time = formatTime defaultTimeLocale xp_time_format + from_time = formatTime defaultTimeLocale time_format -- | (Un)pickle a UTCTime without the date portion, allowing for a @@ -112,11 +115,12 @@ xp_tba_time = to_time :: String -> Maybe UTCTime to_time s | s == "TBA" = Nothing - | otherwise = parseTime defaultTimeLocale xp_time_format s + | otherwise = parseTime defaultTimeLocale time_format s from_time :: Maybe UTCTime -> String from_time Nothing = "" - from_time (Just t) = formatTime defaultTimeLocale xp_time_format t + from_time (Just t) = formatTime defaultTimeLocale time_format t + -- | (Un)pickle the \ element format to/from a 'UTCTime'. @@ -129,22 +133,14 @@ xp_tba_time = -- xp_time_stamp :: PU UTCTime xp_time_stamp = - (to_time_stamp, from_time_stamp) `xpWrapMaybe` xpText + (parse_time_stamp, from_time_stamp) `xpWrapMaybe` xpText where - -- This omits the timezone and trailing space. - format = "%B %-d, %Y, at " ++ xp_time_format ++ " ET" - five_hours :: NominalDiffTime five_hours = 5 * 60 * 60 - add_five :: UTCTime -> UTCTime - add_five = addUTCTime five_hours - subtract_five :: UTCTime -> UTCTime subtract_five = addUTCTime (-1 * five_hours) - to_time_stamp :: String -> Maybe UTCTime - to_time_stamp = fmap add_five . parseTime defaultTimeLocale format - from_time_stamp :: UTCTime -> String - from_time_stamp = formatTime defaultTimeLocale format . subtract_five + from_time_stamp = + formatTime defaultTimeLocale time_stamp_format . subtract_five -- 2.44.2