]> gitweb.michael.orlitzky.com - dead/htsn.git/blob - src/TSN/Xml.hs
9d83f58ba448c0a63a048c3329a28ea657b1234a
[dead/htsn.git] / src / TSN / Xml.hs
1 -- | Minimal XML functionality needed to parse each document's
2 -- XML_File_ID.
3 --
4 module TSN.Xml (
5 parse_xmlfid,
6 xml_tests )
7 where
8
9 import Data.Either.Utils ( maybeToEither )
10 import Test.Tasty ( TestTree, testGroup )
11 import Test.Tasty.HUnit ( (@?=), Assertion, testCase )
12 import Text.Read ( readMaybe )
13 import Text.XML.HXT.Core (
14 (>>>),
15 (/>),
16 getChildren,
17 getText,
18 hasName,
19 runLA,
20 xreadDoc )
21
22
23 -- | A tiny parser written in HXT to extract the "XML_File_ID" element
24 -- from a document. If we fail to parse an XML_File_ID, we return
25 -- the reason wrapped in a 'Left' constructor. The reason should be
26 -- one of two things:
27 --
28 -- 1. No XML_File_ID elements were found.
29 --
30 -- 2. An XML_File_ID element was found, but it could not be read
31 -- into an Integer.
32 --
33 -- We use an Either rather than a Maybe because we do expect some
34 -- non-integer XML_File_IDs. In the examples, you will see
35 -- NHL_DepthChart_XML.XML with an XML_File_ID of "49618.61" and
36 -- CFL_Boxscore_XML1.xml with an XML_File_ID of "R28916". According
37 -- to Brijesh Patel of TSN, these are special category files and not
38 -- part of the usual feed.
39 --
40 -- We want to report them differently, "just in case."
41 --
42 parse_xmlfid :: String -- ^ The XML Document
43 -> Either String Integer
44 parse_xmlfid doc =
45 case parse_results of
46 [] -> Left "No XML_File_ID elements found."
47 (x:_) -> x
48 where
49 parse :: String -> [String]
50 parse =
51 runLA (xreadDoc
52 >>> hasName "message"
53 /> hasName "XML_File_ID"
54 >>> getChildren
55 >>> getText)
56
57 read_either_integer :: String -> Either String Integer
58 read_either_integer s =
59 let msg = "Could not parse XML_File_ID" ++ s ++ " as an integer."
60 in
61 maybeToEither msg (readMaybe s)
62
63 elements = parse doc
64 parse_results = map read_either_integer elements
65
66
67 -- * Tasty Tests
68 xml_tests :: TestTree
69 xml_tests =
70 testGroup
71 "XML tests"
72 [ xml_file_id_tests ]
73
74
75 xml_file_id_tests :: TestTree
76 xml_file_id_tests =
77 testCase "XML_File_ID is parsed correctly" $ do
78 let xmlfids = ["19908216", "19908216", "19908245", "19908246", "19908247"]
79 mapM_ check xmlfids
80 where
81 check :: String -> Assertion
82 check xmlfid = do
83 xml <- readFile ("test/xml/" ++ xmlfid ++ ".xml")
84 let actual = parse_xmlfid xml
85 -- The maybeToEither should always succeed here, so the error
86 -- message goes unused.
87 let expected = maybeToEither "derp" (readMaybe xmlfid)
88 actual @?= expected