--- /dev/null
+module TSN.Parse (
+ parse_message,
+ parse_time_stamp,
+ parse_xml_time_stamp,
+ parse_xmlfid,
+ time_format,
+ time_stamp_format )
+where
+
+import Data.Either.Utils ( maybeToEither )
+import Data.Time.Clock ( NominalDiffTime, UTCTime, addUTCTime )
+import Data.Time.Format ( parseTime )
+import System.Locale ( defaultTimeLocale )
+import Text.Read ( readMaybe )
+import Text.XML.HXT.Core (
+ XmlTree,
+ (>>>),
+ (/>),
+ getChildren,
+ getText,
+ hasName,
+ runLA )
+
+
+-- | Parse the \"message\" element out of a document tree and return
+-- it as an 'XmlTree'. We use an Either for consistency.
+--
+-- Note: It's more trouble than it's worth to attempt to use this as
+-- the basis for parse_xmlfid and parse_xml_time_stamp.
+--
+parse_message :: XmlTree -> Either String XmlTree
+parse_message xmltree =
+ case elements of
+ [] -> Left "No message elements found."
+ (x:_) -> Right x
+ where
+ parse :: XmlTree -> [XmlTree]
+ parse = runLA $ hasName "/" /> hasName "message"
+
+ elements = parse xmltree
+
+
+
+-- | Extract the \"XML_File_ID\" element from a document. If we fail
+-- to parse an XML_File_ID, we return the reason wrapped in a 'Left'
+-- constructor. The reason should be one of two things:
+--
+-- 1. No XML_File_ID elements were found.
+--
+-- 2. An XML_File_ID element was found, but it could not be read
+-- into an Integer.
+--
+-- We use an Either rather than a Maybe because we do expect some
+-- non-integer XML_File_IDs. In the examples, you will see
+-- NHL_DepthChart_XML.XML with an XML_File_ID of \"49618.61\" and
+-- CFL_Boxscore_XML1.xml with an XML_File_ID of
+-- \"R28916\". According to Brijesh Patel of TSN, these are special
+-- category files and not part of the usual feed.
+--
+-- TODO: This should eventually be combined with XML.parse_xmlfid
+-- from the htsn package.
+--
+parse_xmlfid :: XmlTree -> Either String Integer
+parse_xmlfid xmltree =
+ case parse_results of
+ [] -> Left "No XML_File_ID elements found."
+ (x:_) -> x
+ where
+ parse :: XmlTree -> [String]
+ parse = runLA $ hasName "/"
+ /> hasName "message"
+ /> hasName "XML_File_ID"
+ >>> getChildren
+ >>> getText
+
+ read_either_integer :: String -> Either String Integer
+ read_either_integer s =
+ let msg = "Could not parse XML_File_ID " ++ s ++ " as an integer."
+ in
+ maybeToEither msg (readMaybe s)
+
+ elements = parse xmltree
+ parse_results = map read_either_integer elements
+
+
+
+-- | The format string for times appearing in the feed.
+--
+time_format :: String
+time_format = "%I:%M %p"
+
+-- | The format string for a time_stamp. This omits the leading and
+-- trailing space.
+time_stamp_format :: String
+time_stamp_format = "%B %-d, %Y, at " ++ time_format ++ " ET"
+
+
+-- | Parse a time stamp from a 'String' (maybe).
+--
+-- TSN doesn't provide a proper time zone name, so we assume that
+-- it's always Eastern Standard Time. EST is UTC-5, so we
+-- add five hours to convert to UTC.
+--
+parse_time_stamp :: String -> Maybe UTCTime
+parse_time_stamp =
+ fmap add_five . parseTime defaultTimeLocale time_stamp_format
+ where
+ five_hours :: NominalDiffTime
+ five_hours = 5 * 60 * 60
+
+ add_five :: UTCTime -> UTCTime
+ add_five = addUTCTime five_hours
+
+
+-- | Extract the \"time_stamp\" element from a document. If we fail
+-- to parse a time_stamp, we return the reason wrapped in a 'Left'
+-- constructor. The reason should be one of two things:
+--
+-- 1. No time_Stamp elements were found.
+--
+-- 2. A time_stamp element was found, but it could not be read
+-- into a UTCTime.
+--
+-- Unline 'parse_xmlfid', we don't expect to run into any time_stamps
+-- that we can't parse. But since parse_xmlfid returns an Either, we
+-- do for consistency.
+--
+parse_xml_time_stamp :: XmlTree -> Either String UTCTime
+parse_xml_time_stamp xmltree =
+ case parse_results of
+ [] -> Left "No time_stamp elements found."
+ (x:_) -> x
+ where
+ parse :: XmlTree -> [String]
+ parse = runLA $ hasName "/"
+ /> hasName "message"
+ /> hasName "time_stamp"
+ >>> getChildren
+ >>> getText
+
+ read_either_utctime :: String -> Either String UTCTime
+ read_either_utctime s =
+ let msg = "Could not parse time_stamp " ++ s ++ " as a date/time."
+ in
+ maybeToEither msg (parse_time_stamp s)
+
+ elements = parse xmltree
+ parse_results = map read_either_utctime elements
xpWrapMaybe )
import Text.XML.HXT.Arrow.Pickle.Xml ( PU )
+-- Local imports.
+import TSN.Parse (
+ parse_time_stamp,
+ time_format,
+ time_stamp_format )
+
-- | (Un)pickle a UTCTime without the time portion.
--
--- | The time format string used in 'xp_time' and 'xp_time_stamp'.
---
-xp_time_format :: String
-xp_time_format = "%I:%M %p"
+
-- | (Un)pickle a UTCTime without the date portion.
(to_time, from_time) `xpWrapMaybe` xpText
where
to_time :: String -> Maybe UTCTime
- to_time = parseTime defaultTimeLocale xp_time_format
+ to_time = parseTime defaultTimeLocale time_format
from_time :: UTCTime -> String
- from_time = formatTime defaultTimeLocale xp_time_format
+ from_time = formatTime defaultTimeLocale time_format
-- | (Un)pickle a UTCTime without the date portion, allowing for a
to_time :: String -> Maybe UTCTime
to_time s
| s == "TBA" = Nothing
- | otherwise = parseTime defaultTimeLocale xp_time_format s
+ | otherwise = parseTime defaultTimeLocale time_format s
from_time :: Maybe UTCTime -> String
from_time Nothing = ""
- from_time (Just t) = formatTime defaultTimeLocale xp_time_format t
+ from_time (Just t) = formatTime defaultTimeLocale time_format t
+
-- | (Un)pickle the \<time_stamp\> element format to/from a 'UTCTime'.
--
xp_time_stamp :: PU UTCTime
xp_time_stamp =
- (to_time_stamp, from_time_stamp) `xpWrapMaybe` xpText
+ (parse_time_stamp, from_time_stamp) `xpWrapMaybe` xpText
where
- -- This omits the timezone and trailing space.
- format = "%B %-d, %Y, at " ++ xp_time_format ++ " ET"
-
five_hours :: NominalDiffTime
five_hours = 5 * 60 * 60
- add_five :: UTCTime -> UTCTime
- add_five = addUTCTime five_hours
-
subtract_five :: UTCTime -> UTCTime
subtract_five = addUTCTime (-1 * five_hours)
- to_time_stamp :: String -> Maybe UTCTime
- to_time_stamp = fmap add_five . parseTime defaultTimeLocale format
-
from_time_stamp :: UTCTime -> String
- from_time_stamp = formatTime defaultTimeLocale format . subtract_five
+ from_time_stamp =
+ formatTime defaultTimeLocale time_stamp_format . subtract_five