15 import Data.Either.Utils ( maybeToEither )
16 import Data.Time.Clock ( UTCTime )
17 import Data.Time.Format ( parseTime )
18 import System.Locale ( defaultTimeLocale )
19 import Test.Tasty ( TestTree, testGroup )
20 import Test.Tasty.HUnit ( (@?=), testCase )
21 import Text.Read ( readMaybe )
22 import Text.XML.HXT.Core (
32 import Xml ( unsafe_read_document )
35 -- | When parsing an element from an XML document (like the
36 -- XML_File_ID), there are a few things that can happen. First of
37 -- all, it can work. Good for you.
39 -- Or, you may find nothing. Like, the element is missing. We
40 -- represent that with a 'ParseNotFound' containing the name of
41 -- thing thing not-found as a 'String'.
43 -- Finally, you could find something, but be unable to interpret it
44 -- as the type you were expecting. For example, if you parse
45 -- \"WHATSUP\" out of a \<game_id\> which is supposed to contain
46 -- integers. We represent this case with a 'ParseMismatch'
47 -- containing the name of the thing that you were looking for, the
48 -- value that had the unexpected type, and finally the name of the
49 -- expected type (used in error messages).
52 ParseNotFound String | ParseMismatch String String String
56 -- | Take a 'ParseError' and turn it into a human-readable description
59 format_parse_error :: ParseError -> String
60 format_parse_error (ParseNotFound item) =
61 "No " ++ item ++ " elements found."
62 format_parse_error (ParseMismatch item val expected_type) =
63 "Could not parse " ++ item ++ " " ++ val ++ " as " ++ expected_type ++ "."
67 -- | Parse the \"message\" element out of a document tree and return
68 -- it as an 'XmlTree'. We use an 'Either' for consistency.
70 parse_message :: XmlTree -> Either ParseError XmlTree
71 parse_message xmltree =
73 [] -> Left $ ParseNotFound "message"
76 parse :: XmlTree -> [XmlTree]
77 parse = runLA $ hasName "/" /> hasName "message"
79 elements = parse xmltree
85 -- | Parse an 'Int' from a direct descendent of the (top-level)
86 -- \<message\> element in an XmlTree. This is used to implement the
87 -- XML_File_ID, game_id, and schedule_id (the last two are specific
88 -- to "TSN.XML.GameInfo") parsers.
90 -- If the parse fails, we return the corresponding 'ParseError'
91 -- wrapped in a 'Left'. Otherwise the parsed value is returned in a
94 parse_message_int :: String -> XmlTree -> Either ParseError Int
95 parse_message_int child xmltree =
97 [] -> Left $ ParseNotFound child
100 parse :: XmlTree -> [String]
101 parse = runLA $ hasName "/"
107 read_either_int :: String -> Either ParseError Int
109 maybeToEither (ParseMismatch child s "integer") (readMaybe s)
111 elements = parse xmltree
112 parse_results = map read_either_int elements
116 -- | Parse an optional 'Int' from a direct descendent of the
117 -- (top-level) \<message\> element in an XmlTree. This is just like
118 -- 'parse_message_int', except we expect the element/value to be
119 -- missing sometimes.
121 -- To handle the fact that the element/value is optional, we pattern
122 -- match on the 'ParseError' that comes back in case of failure. If
123 -- we didn't find anything, we turn that into a \"successful
124 -- nothing\". But if we find a value and it can't be parsed, we let
125 -- the error propagate, because that shouldn't happen. Of course, if
126 -- the parse worked, that's nice too: we wrap the parsed value in a
127 -- 'Just' and return that wrapped in a 'Right'
129 parse_message_int_optional :: String
131 -> Either ParseError (Maybe Int)
132 parse_message_int_optional child xmltree =
133 case (parse_message_int child xmltree) of
134 Left (ParseNotFound _) -> Right Nothing
135 Left pm@(ParseMismatch {}) -> Left pm
136 Right whatever -> Right (Just whatever)
140 -- | Extract the \"XML_File_ID\" element from a document. If we fail
141 -- to parse an XML_File_ID, we return an appropriate 'ParseError'
142 -- wrapped in a 'Left' constructor. The reason should be one of two
145 -- 1. No XML_File_ID elements were found.
147 -- 2. An XML_File_ID element was found, but it could not be read
150 -- In general we expect some non-integer XML_File_IDs, because they
151 -- appear on the feed. But the htsn daemon refuses to save them at
152 -- the moment, so if we ever see an XML_File_ID that we can't parse,
153 -- it's truly an error.
155 parse_xmlfid :: XmlTree -> Either ParseError Int
156 parse_xmlfid = parse_message_int "XML_File_ID"
160 -- | Extract the \<game_id\> element from within the top-level
161 -- \<message\> of a document. These appear in the "TSN.XML.GameInfo"
162 -- documents. Unlike the \<XML_File_ID\> elements, the \<game_id\>
163 -- can be missing from GameInfo documents, so for our implementation
164 -- we use 'parse_message_int_optional' instead.
166 parse_game_id :: XmlTree -> Either ParseError (Maybe Int)
167 parse_game_id = parse_message_int_optional "game_id"
171 -- | Extract the \<schedule_id\> element from within the top-level
172 -- \<message\> of a document. Identical to 'parse_game_id' except
173 -- for the element name.
175 parse_schedule_id :: XmlTree -> Either ParseError (Maybe Int)
176 parse_schedule_id = parse_message_int_optional "schedule_id"
180 -- | The format string for times appearing in the feed.
182 time_format :: String
183 time_format = "%I:%M %p"
187 -- | The format string for a time_stamp. We have removed the
188 -- leading/trailing space so that parseTime and formatTime are NOT
189 -- inverses of one another. We should be able to rectify this once
190 -- everything is updated to support time-1.5. See,
192 -- <https://ghc.haskell.org/trac/ghc/ticket/9150>
194 time_stamp_format :: String
195 time_stamp_format = "%B %-d, %Y, at " ++ time_format ++ " ET"
199 -- | Parse a time stamp from a 'String' (maybe). TSN doesn't provide a
200 -- proper time zone name, so we parse it as UTC, and maybe our
201 -- eventual consumer can figure out a way to deduce the time zone.
203 parse_time_stamp :: String -> Maybe UTCTime
205 parseTime defaultTimeLocale time_stamp_format
209 -- | Extract the \"time_stamp\" element from a document. If we fail to
210 -- parse a time_stamp, we return an appropriate 'ParseError'. The
211 -- reason should be one of two things:
213 -- 1. No time_Stamp elements were found.
215 -- 2. A time_stamp element was found, but it could not be read
218 -- We don't expect to run into any time_stamps that we can't parse,
219 -- and they can never be missing, so both conditions are truly
222 parse_xml_time_stamp :: XmlTree -> Either ParseError UTCTime
223 parse_xml_time_stamp xmltree =
224 case parse_results of
225 [] -> Left $ ParseNotFound "time_stamp"
228 parse :: XmlTree -> [String]
229 parse = runLA $ hasName "/"
231 /> hasName "time_stamp"
235 read_either_utctime :: String -> Either ParseError UTCTime
236 read_either_utctime s =
237 maybeToEither (ParseMismatch "time_stamp" s "date/time")
240 elements = parse xmltree
241 parse_results = map read_either_utctime elements
249 -- | A list of all tests for this module.
251 parse_tests :: TestTree
255 [ test_parse_game_id,
256 test_parse_missing_game_id,
257 test_parse_missing_schedule_id,
258 test_parse_schedule_id,
261 sample_path :: String
262 sample_path = "test/xml/gameinfo/CBASK_Lineup_XML.xml"
264 desc :: String -> String
265 desc child = "a known " ++ child ++ " is parsed correctly"
268 -- | Actual implementation of the test for parse_xmlfid,
269 -- parse_game_id, and parse_schedule_id.
271 test_child :: String -> Int -> TestTree
272 test_child child expected =
273 testCase (desc child) $ do
274 xmltree <- unsafe_read_document sample_path
275 let actual = parse_message_int child xmltree
276 actual @?= (Right expected)
279 -- | Make sure we can parse a game_id into the expected value.
281 test_parse_game_id :: TestTree
282 test_parse_game_id = test_child "game_id" 97865
285 -- | Make sure we can parse a schedule_id (different from the
286 -- game_id) into the expected value.
288 test_parse_schedule_id :: TestTree
289 test_parse_schedule_id = test_child "schedule_id" 10199
292 -- | Make sure we can parse an XML_File_ID into the expected value.
294 test_parse_xmlfid :: TestTree
295 test_parse_xmlfid = test_child "XML_File_ID" 17
299 -- | The game_id element can be missing, so we test that too.
301 test_parse_missing_game_id :: TestTree
302 test_parse_missing_game_id =
303 testCase "missing game_id is not an error" $ do
304 xmltree <- unsafe_read_document "test/xml/gameinfo/MLB_Matchup_XML.xml"
305 let actual = parse_game_id xmltree
306 let expected = Right Nothing
310 -- | The schedule_id element can be missing, so we test that too.
312 test_parse_missing_schedule_id :: TestTree
313 test_parse_missing_schedule_id =
314 testCase "missing schedule_id is not an error" $ do
315 let path = "test/xml/gameinfo/recapxml-no-game-schedule-ids.xml"
316 xmltree <- unsafe_read_document path
317 let actual = parse_schedule_id xmltree
318 let expected = Right Nothing