module TSN.Parse ( ParseError, format_parse_error, parse_game_id, parse_message, parse_schedule_id, parse_tests, parse_time_stamp, parse_xml_time_stamp, parse_xmlfid, time_format, time_stamp_format ) where import Data.Either.Utils ( maybeToEither ) import Data.Time.Clock ( UTCTime ) import Data.Time.Format ( parseTime ) import System.Locale ( defaultTimeLocale ) import Test.Tasty ( TestTree, testGroup ) import Test.Tasty.HUnit ( (@?=), testCase ) import Text.Read ( readMaybe ) import Text.XML.HXT.Core ( XmlTree, (>>>), (/>), getChildren, getText, hasName, runLA ) -- Local imports import Xml ( unsafe_read_document ) -- | When parsing an element from an XML document (like the -- XML_File_ID), there are a few things that can happen. First of -- all, it can work. Good for you. -- -- Or, you may find nothing. Like, the element is missing. We -- represent that with a 'ParseNotFound' containing the name of -- thing thing not-found as a 'String'. -- -- Finally, you could find something, but be unable to interpret it -- as the type you were expecting. For example, if you parse -- \"WHATSUP\" out of a \ which is supposed to contain -- integers. We represent this case with a 'ParseMismatch' -- containing the name of the thing that you were looking for, the -- value that had the unexpected type, and finally the name of the -- expected type (used in error messages). -- data ParseError = ParseNotFound String | ParseMismatch String String String deriving (Eq, Show) -- | Take a 'ParseError' and turn it into a human-readable description -- of the problem. -- format_parse_error :: ParseError -> String format_parse_error (ParseNotFound item) = "No " ++ item ++ " elements found." format_parse_error (ParseMismatch item val expected_type) = "Could not parse " ++ item ++ " " ++ val ++ " as " ++ expected_type ++ "." -- | Parse the \"message\" element out of a document tree and return -- it as an 'XmlTree'. We use an 'Either' for consistency. -- parse_message :: XmlTree -> Either ParseError XmlTree parse_message xmltree = case elements of [] -> Left $ ParseNotFound "message" (x:_) -> Right x where parse :: XmlTree -> [XmlTree] parse = runLA $ hasName "/" /> hasName "message" elements = parse xmltree -- | Parse an 'Int' from a direct descendent of the (top-level) -- \ element in an XmlTree. This is used to implement the -- XML_File_ID, game_id, and schedule_id (the last two are specific -- to "TSN.XML.GameInfo") parsers. -- -- If the parse fails, we return the corresponding 'ParseError' -- wrapped in a 'Left'. Otherwise the parsed value is returned in a -- 'Right'. -- parse_message_int :: String -> XmlTree -> Either ParseError Int parse_message_int child xmltree = case parse_results of [] -> Left $ ParseNotFound child (x:_) -> x where parse :: XmlTree -> [String] parse = runLA $ hasName "/" /> hasName "message" /> hasName child >>> getChildren >>> getText read_either_int :: String -> Either ParseError Int read_either_int s = maybeToEither (ParseMismatch child s "integer") (readMaybe s) elements = parse xmltree parse_results = map read_either_int elements -- | Extract the \"XML_File_ID\" element from a document. If we fail -- to parse an XML_File_ID, we return an appropriate 'ParseError' -- wrapped in a 'Left' constructor. The reason should be one of two -- things: -- -- 1. No XML_File_ID elements were found. -- -- 2. An XML_File_ID element was found, but it could not be read -- into an Int. -- -- In general we expect some non-integer XML_File_IDs, because they -- appear on the feed. But the htsn daemon refuses to save them at -- the moment, so if we ever see an XML_File_ID that we can't parse, -- it's truly an error. -- parse_xmlfid :: XmlTree -> Either ParseError Int parse_xmlfid = parse_message_int "XML_File_ID" -- | Extract the \ element from within the top-level -- \ of a document. These appear in the "TSN.XML.GameInfo" -- documents. Unlike the \ and \ -- elements, the \ can be missing from GameInfo -- documents. So even the 'Right' value of the 'Either' can be -- \"missing\". There are two reasons that the parse might fail. -- -- 1. No such elements were found. This is expected sometimes, and -- should be returned as a 'Right' 'Nothing'. -- -- 2. An element was found, but it could not be read into an -- 'Int'. This is NOT expected, and will be returned as a -- 'ParseError', wrapped in a 'Left'. -- -- Most of implementation for this ('parse_message_int') is shared, -- but to handle the fact that game_id is optional, we pattern match -- on the 'ParseError' that comes back in case of failure. If we -- didn't find any game_id elements, we turn that into a -- \"successful nothing\". But if we find a game_id and it can't be -- parsed, we let the error propagate, because that shouldn't -- happen. Of course, if the parse worked, that's nice too: we wrap -- the parsed value in a 'Just' and return that wrapped in a 'Right' -- parse_game_id :: XmlTree -> Either ParseError (Maybe Int) parse_game_id xml = case (parse_message_int "game_id" xml) of Left (ParseNotFound _) -> Right Nothing Left pm@(ParseMismatch {}) -> Left pm Right whatever -> Right (Just whatever) -- | Extract the \ element from within the top-level -- \ of a document. These appear in the -- "TSN.XML.GameInfo" documents. If we fail to parse a schedule_id, -- we return the reason wrapped in an appropriate 'ParseError'. The reason -- should be one of two things: -- -- 1. No such elements were found. -- -- 2. An element was found, but it could not be read -- into an Int. -- -- Both of these are truly errors in the case of schedule_id. The -- implementation for this ('parse_message_int') is shared among a -- few functions. -- parse_schedule_id :: XmlTree -> Either ParseError Int parse_schedule_id = parse_message_int "schedule_id" -- | The format string for times appearing in the feed. -- time_format :: String time_format = "%I:%M %p" -- | The format string for a time_stamp. We keep the leading/trailing -- space so that parseTime and formatTime are inverses are one -- another, even though there is some confusion as to how these two -- functions should behave: -- -- -- time_stamp_format :: String time_stamp_format = " %B %-d, %Y, at " ++ time_format ++ " ET " -- | Parse a time stamp from a 'String' (maybe). TSN doesn't provide a -- proper time zone name, so we parse it as UTC, and maybe our -- eventual consumer can figure out a way to deduce the time zone. -- parse_time_stamp :: String -> Maybe UTCTime parse_time_stamp = parseTime defaultTimeLocale time_stamp_format -- | Extract the \"time_stamp\" element from a document. If we fail to -- parse a time_stamp, we return an appropriate 'ParseError'. The -- reason should be one of two things: -- -- 1. No time_Stamp elements were found. -- -- 2. A time_stamp element was found, but it could not be read -- into a UTCTime. -- -- We don't expect to run into any time_stamps that we can't parse, -- and they can never be missing, so both conditions are truly -- errors. -- parse_xml_time_stamp :: XmlTree -> Either ParseError UTCTime parse_xml_time_stamp xmltree = case parse_results of [] -> Left $ ParseNotFound "time_stamp" (x:_) -> x where parse :: XmlTree -> [String] parse = runLA $ hasName "/" /> hasName "message" /> hasName "time_stamp" >>> getChildren >>> getText read_either_utctime :: String -> Either ParseError UTCTime read_either_utctime s = maybeToEither (ParseMismatch "time_stamp" s "date/time") (parse_time_stamp s) elements = parse xmltree parse_results = map read_either_utctime elements -- -- * Tests -- -- | A list of all tests for this module. -- parse_tests :: TestTree parse_tests = testGroup "TSN.Parse tests" [ test_parse_game_id, test_parse_missing_game_id, test_parse_schedule_id, test_parse_xmlfid ] where sample_path :: String sample_path = "test/xml/gameinfo/CBASK_Lineup_XML.xml" desc :: String -> String desc child = "a known " ++ child ++ " is parsed correctly" -- | Actual implementation of the test for parse_xmlfid, -- parse_game_id, and parse_schedule_id. -- test_child :: String -> Int -> TestTree test_child child expected = testCase (desc child) $ do xmltree <- unsafe_read_document sample_path let actual = parse_message_int child xmltree actual @?= (Right expected) -- | Make sure we can parse a game_id into the expected value. -- test_parse_game_id :: TestTree test_parse_game_id = test_child "game_id" 97865 -- | Make sure we can parse a schedule_id (different from the -- game_id) into the expected value. -- test_parse_schedule_id :: TestTree test_parse_schedule_id = test_child "schedule_id" 10199 -- | Make sure we can parse an XML_File_ID into the expected value. -- test_parse_xmlfid :: TestTree test_parse_xmlfid = test_child "XML_File_ID" 17 -- | The game_id element can be missing, so we test that too. -- test_parse_missing_game_id :: TestTree test_parse_missing_game_id = testCase "missing game_id is not an error" $ do xmltree <- unsafe_read_document "test/xml/gameinfo/MLB_Matchup_XML.xml" let actual = parse_game_id xmltree let expected = Right Nothing actual @?= expected