]> gitweb.michael.orlitzky.com - dead/htsn.git/blob - src/Xml.hs
Bump to v0.1.1 and allow >= network-2.6.
[dead/htsn.git] / src / Xml.hs
1 -- | Minimal XML functionality needed to parse each document's
2 -- XML_File_ID.
3 --
4 module Xml (
5 parse_xmlfid,
6 -- * Tests
7 xml_tests )
8 where
9
10 import Data.Either.Utils ( maybeToEither )
11 import Test.Tasty ( TestTree, testGroup )
12 import Test.Tasty.HUnit ( (@?=), Assertion, testCase )
13 import Text.Read ( readMaybe )
14 import Text.XML.HXT.Core (
15 (>>>),
16 (/>),
17 getChildren,
18 getText,
19 hasName,
20 runLA,
21 xreadDoc )
22
23
24 -- | A tiny parser written in HXT to extract the \"XML_File_ID\"
25 -- element from a document. If we fail to parse an XML_File_ID, we
26 -- return the reason wrapped in a 'Left' constructor. The reason
27 -- should be one of two things:
28 --
29 -- 1. No XML_File_ID elements were found.
30 --
31 -- 2. An XML_File_ID element was found, but it could not be read
32 -- into an Integer.
33 --
34 -- We use an Either rather than a Maybe because we do expect some
35 -- non-integer XML_File_IDs. In the examples, you will see
36 -- NHL_DepthChart_XML.XML with an XML_File_ID of \"49618.61\" and
37 -- CFL_Boxscore_XML1.xml with an XML_File_ID of
38 -- \"R28916\". According to Brijesh Patel of TSN, these are special
39 -- category files and not part of the usual feed.
40 --
41 -- We want to report them differently, \"just in case.\"
42 --
43 parse_xmlfid :: String -- ^ The XML Document
44 -> Either String Integer
45 parse_xmlfid doc =
46 case parse_results of
47 [] -> Left "No XML_File_ID elements found."
48 (x:_) -> x
49 where
50 parse :: String -> [String]
51 parse =
52 runLA (xreadDoc
53 >>> hasName "message"
54 /> hasName "XML_File_ID"
55 >>> getChildren
56 >>> getText)
57
58 read_either_integer :: String -> Either String Integer
59 read_either_integer s =
60 let msg = "Could not parse XML_File_ID " ++ s ++ " as an integer."
61 in
62 maybeToEither msg (readMaybe s)
63
64 elements = parse doc
65 parse_results = map read_either_integer elements
66
67
68 --
69 -- Tasty Tests
70 --
71
72 -- | A list of all tests for this module.
73 --
74 xml_tests :: TestTree
75 xml_tests =
76 testGroup
77 "XML tests"
78 [ xml_file_id_tests ]
79
80
81 -- | Ensure that we parse the correct XML_File_ID out of some known
82 -- examples.
83 --
84 xml_file_id_tests :: TestTree
85 xml_file_id_tests =
86 testCase "XML_File_ID is parsed correctly" $ do
87 let xmlfids = ["19908216", "19908216", "19908245", "19908246", "19908247"]
88 mapM_ check xmlfids
89 where
90 check :: String -> Assertion
91 check xmlfid = do
92 xml <- readFile ("test/xml/" ++ xmlfid ++ ".xml")
93 let actual = parse_xmlfid xml
94 -- The maybeToEither should always succeed here, so the error
95 -- message goes unused.
96 let expected = maybeToEither "derp" (readMaybe xmlfid)
97 actual @?= expected