module TSN.Parse (
+ ParseError,
+ format_parse_error,
+ parse_game_id,
parse_message,
+ parse_schedule_id,
+ parse_tests,
parse_time_stamp,
parse_xml_time_stamp,
parse_xmlfid,
where
import Data.Either.Utils ( maybeToEither )
-import Data.Time.Clock ( NominalDiffTime, UTCTime, addUTCTime )
+import Data.Time.Clock ( UTCTime )
import Data.Time.Format ( parseTime )
import System.Locale ( defaultTimeLocale )
+import Test.Tasty ( TestTree, testGroup )
+import Test.Tasty.HUnit ( (@?=), testCase )
import Text.Read ( readMaybe )
import Text.XML.HXT.Core (
XmlTree,
hasName,
runLA )
+-- Local imports
+import Xml ( unsafe_read_document )
--- | Parse the \"message\" element out of a document tree and return
--- it as an 'XmlTree'. We use an Either for consistency.
+
+-- | When parsing an element from an XML document (like the
+-- XML_File_ID), there are a few things that can happen. First of
+-- all, it can work. Good for you.
+--
+-- Or, you may find nothing. Like, the element is missing. We
+-- represent that with a 'ParseNotFound' containing the name of
+-- thing thing not-found as a 'String'.
--
--- Note: It's more trouble than it's worth to attempt to use this as
--- the basis for parse_xmlfid and parse_xml_time_stamp.
+-- Finally, you could find something, but be unable to interpret it
+-- as the type you were expecting. For example, if you parse
+-- \"WHATSUP\" out of a \<game_id\> which is supposed to contain
+-- integers. We represent this case with a 'ParseMismatch'
+-- containing the name of the thing that you were looking for, the
+-- value that had the unexpected type, and finally the name of the
+-- expected type (used in error messages).
+--
+data ParseError =
+ ParseNotFound String | ParseMismatch String String String
+ deriving (Eq, Show)
+
+
+-- | Take a 'ParseError' and turn it into a human-readable description
+-- of the problem.
--
-parse_message :: XmlTree -> Either String XmlTree
+format_parse_error :: ParseError -> String
+format_parse_error (ParseNotFound item) =
+ "No " ++ item ++ " elements found."
+format_parse_error (ParseMismatch item val expected_type) =
+ "Could not parse " ++ item ++ " " ++ val ++ " as " ++ expected_type ++ "."
+
+
+
+-- | Parse the \"message\" element out of a document tree and return
+-- it as an 'XmlTree'. We use an 'Either' for consistency.
+--
+parse_message :: XmlTree -> Either ParseError XmlTree
parse_message xmltree =
case elements of
- [] -> Left "No message elements found."
+ [] -> Left $ ParseNotFound "message"
(x:_) -> Right x
where
parse :: XmlTree -> [XmlTree]
--- | Extract the \"XML_File_ID\" element from a document. If we fail
--- to parse an XML_File_ID, we return the reason wrapped in a 'Left'
--- constructor. The reason should be one of two things:
---
--- 1. No XML_File_ID elements were found.
---
--- 2. An XML_File_ID element was found, but it could not be read
--- into an Integer.
---
--- We use an Either rather than a Maybe because we do expect some
--- non-integer XML_File_IDs. In the examples, you will see
--- NHL_DepthChart_XML.XML with an XML_File_ID of \"49618.61\" and
--- CFL_Boxscore_XML1.xml with an XML_File_ID of
--- \"R28916\". According to Brijesh Patel of TSN, these are special
--- category files and not part of the usual feed.
+
+
+-- | Parse an 'Int' from a direct descendent of the (top-level)
+-- \<message\> element in an XmlTree. This is used to implement the
+-- XML_File_ID, game_id, and schedule_id (the last two are specific
+-- to "TSN.XML.GameInfo") parsers.
--
--- TODO: This should eventually be combined with XML.parse_xmlfid
--- from the htsn package.
+-- If the parse fails, we return the corresponding 'ParseError'
+-- wrapped in a 'Left'. Otherwise the parsed value is returned in a
+-- 'Right'.
--
-parse_xmlfid :: XmlTree -> Either String Integer
-parse_xmlfid xmltree =
+parse_message_int :: String -> XmlTree -> Either ParseError Int
+parse_message_int child xmltree =
case parse_results of
- [] -> Left "No XML_File_ID elements found."
+ [] -> Left $ ParseNotFound child
(x:_) -> x
where
parse :: XmlTree -> [String]
parse = runLA $ hasName "/"
/> hasName "message"
- /> hasName "XML_File_ID"
+ /> hasName child
>>> getChildren
>>> getText
- read_either_integer :: String -> Either String Integer
- read_either_integer s =
- let msg = "Could not parse XML_File_ID " ++ s ++ " as an integer."
- in
- maybeToEither msg (readMaybe s)
+ read_either_int :: String -> Either ParseError Int
+ read_either_int s =
+ maybeToEither (ParseMismatch child s "integer") (readMaybe s)
elements = parse xmltree
- parse_results = map read_either_integer elements
+ parse_results = map read_either_int elements
+
+
+
+-- | Parse an optional 'Int' from a direct descendent of the
+-- (top-level) \<message\> element in an XmlTree. This is just like
+-- 'parse_message_int', except we expect the element/value to be
+-- missing sometimes.
+--
+-- To handle the fact that the element/value is optional, we pattern
+-- match on the 'ParseError' that comes back in case of failure. If
+-- we didn't find anything, we turn that into a \"successful
+-- nothing\". But if we find a value and it can't be parsed, we let
+-- the error propagate, because that shouldn't happen. Of course, if
+-- the parse worked, that's nice too: we wrap the parsed value in a
+-- 'Just' and return that wrapped in a 'Right'
+--
+parse_message_int_optional :: String
+ -> XmlTree
+ -> Either ParseError (Maybe Int)
+parse_message_int_optional child xmltree =
+ case (parse_message_int child xmltree) of
+ Left (ParseNotFound _) -> Right Nothing
+ Left pm@(ParseMismatch {}) -> Left pm
+ Right whatever -> Right (Just whatever)
+
+
+
+-- | Extract the \"XML_File_ID\" element from a document. If we fail
+-- to parse an XML_File_ID, we return an appropriate 'ParseError'
+-- wrapped in a 'Left' constructor. The reason should be one of two
+-- things:
+--
+-- 1. No XML_File_ID elements were found.
+--
+-- 2. An XML_File_ID element was found, but it could not be read
+-- into an Int.
+--
+-- In general we expect some non-integer XML_File_IDs, because they
+-- appear on the feed. But the htsn daemon refuses to save them at
+-- the moment, so if we ever see an XML_File_ID that we can't parse,
+-- it's truly an error.
+--
+parse_xmlfid :: XmlTree -> Either ParseError Int
+parse_xmlfid = parse_message_int "XML_File_ID"
+
+
+
+-- | Extract the \<game_id\> element from within the top-level
+-- \<message\> of a document. These appear in the "TSN.XML.GameInfo"
+-- documents. Unlike the \<XML_File_ID\> elements, the \<game_id\>
+-- can be missing from GameInfo documents, so for our implementation
+-- we use 'parse_message_int_optional' instead.
+--
+parse_game_id :: XmlTree -> Either ParseError (Maybe Int)
+parse_game_id = parse_message_int_optional "game_id"
+
+
+
+-- | Extract the \<schedule_id\> element from within the top-level
+-- \<message\> of a document. Identical to 'parse_game_id' except
+-- for the element name.
+--
+parse_schedule_id :: XmlTree -> Either ParseError (Maybe Int)
+parse_schedule_id = parse_message_int_optional "schedule_id"
time_format :: String
time_format = "%I:%M %p"
--- | The format string for a time_stamp. This omits the leading and
--- trailing space.
+
+
+-- | The format string for a time_stamp. We have removed the
+-- leading/trailing space so that parseTime and formatTime are NOT
+-- inverses of one another. We should be able to rectify this once
+-- everything is updated to support time-1.5. See,
+--
+-- <https://ghc.haskell.org/trac/ghc/ticket/9150>
+--
time_stamp_format :: String
time_stamp_format = "%B %-d, %Y, at " ++ time_format ++ " ET"
--- | Parse a time stamp from a 'String' (maybe).
---
--- TSN doesn't provide a proper time zone name, so we assume that
--- it's always Eastern Standard Time. EST is UTC-5, so we
--- add five hours to convert to UTC.
+
+-- | Parse a time stamp from a 'String' (maybe). TSN doesn't provide a
+-- proper time zone name, so we parse it as UTC, and maybe our
+-- eventual consumer can figure out a way to deduce the time zone.
--
parse_time_stamp :: String -> Maybe UTCTime
parse_time_stamp =
- fmap add_five . parseTime defaultTimeLocale time_stamp_format
- where
- five_hours :: NominalDiffTime
- five_hours = 5 * 60 * 60
+ parseTime defaultTimeLocale time_stamp_format
- add_five :: UTCTime -> UTCTime
- add_five = addUTCTime five_hours
--- | Extract the \"time_stamp\" element from a document. If we fail
--- to parse a time_stamp, we return the reason wrapped in a 'Left'
--- constructor. The reason should be one of two things:
+-- | Extract the \"time_stamp\" element from a document. If we fail to
+-- parse a time_stamp, we return an appropriate 'ParseError'. The
+-- reason should be one of two things:
--
-- 1. No time_Stamp elements were found.
--
-- 2. A time_stamp element was found, but it could not be read
-- into a UTCTime.
--
--- Unline 'parse_xmlfid', we don't expect to run into any time_stamps
--- that we can't parse. But since parse_xmlfid returns an Either, we
--- do for consistency.
+-- We don't expect to run into any time_stamps that we can't parse,
+-- and they can never be missing, so both conditions are truly
+-- errors.
--
-parse_xml_time_stamp :: XmlTree -> Either String UTCTime
+parse_xml_time_stamp :: XmlTree -> Either ParseError UTCTime
parse_xml_time_stamp xmltree =
case parse_results of
- [] -> Left "No time_stamp elements found."
+ [] -> Left $ ParseNotFound "time_stamp"
(x:_) -> x
where
parse :: XmlTree -> [String]
>>> getChildren
>>> getText
- read_either_utctime :: String -> Either String UTCTime
+ read_either_utctime :: String -> Either ParseError UTCTime
read_either_utctime s =
- let msg = "Could not parse time_stamp " ++ s ++ " as a date/time."
- in
- maybeToEither msg (parse_time_stamp s)
+ maybeToEither (ParseMismatch "time_stamp" s "date/time")
+ (parse_time_stamp s)
elements = parse xmltree
parse_results = map read_either_utctime elements
+
+
+
+--
+-- * Tests
+--
+
+-- | A list of all tests for this module.
+--
+parse_tests :: TestTree
+parse_tests =
+ testGroup
+ "TSN.Parse tests"
+ [ test_parse_game_id,
+ test_parse_missing_game_id,
+ test_parse_missing_schedule_id,
+ test_parse_schedule_id,
+ test_parse_xmlfid ]
+ where
+ sample_path :: String
+ sample_path = "test/xml/gameinfo/CBASK_Lineup_XML.xml"
+
+ desc :: String -> String
+ desc child = "a known " ++ child ++ " is parsed correctly"
+
+
+ -- | Actual implementation of the test for parse_xmlfid,
+ -- parse_game_id, and parse_schedule_id.
+ --
+ test_child :: String -> Int -> TestTree
+ test_child child expected =
+ testCase (desc child) $ do
+ xmltree <- unsafe_read_document sample_path
+ let actual = parse_message_int child xmltree
+ actual @?= (Right expected)
+
+
+ -- | Make sure we can parse a game_id into the expected value.
+ --
+ test_parse_game_id :: TestTree
+ test_parse_game_id = test_child "game_id" 97865
+
+
+ -- | Make sure we can parse a schedule_id (different from the
+ -- game_id) into the expected value.
+ --
+ test_parse_schedule_id :: TestTree
+ test_parse_schedule_id = test_child "schedule_id" 10199
+
+
+ -- | Make sure we can parse an XML_File_ID into the expected value.
+ --
+ test_parse_xmlfid :: TestTree
+ test_parse_xmlfid = test_child "XML_File_ID" 17
+
+
+
+-- | The game_id element can be missing, so we test that too.
+--
+test_parse_missing_game_id :: TestTree
+test_parse_missing_game_id =
+ testCase "missing game_id is not an error" $ do
+ xmltree <- unsafe_read_document "test/xml/gameinfo/MLB_Matchup_XML.xml"
+ let actual = parse_game_id xmltree
+ let expected = Right Nothing
+ actual @?= expected
+
+
+-- | The schedule_id element can be missing, so we test that too.
+--
+test_parse_missing_schedule_id :: TestTree
+test_parse_missing_schedule_id =
+ testCase "missing schedule_id is not an error" $ do
+ let path = "test/xml/gameinfo/recapxml-no-game-schedule-ids.xml"
+ xmltree <- unsafe_read_document path
+ let actual = parse_schedule_id xmltree
+ let expected = Right Nothing
+ actual @?= expected