+-- | Minimal XML functionality needed to parse each document's
+-- XML_File_ID.
+--
module TSN.Xml (
parse_xmlfid,
- xml_prologue )
+ xml_tests )
where
-import Data.Maybe (listToMaybe, mapMaybe)
-import Text.Read (readMaybe)
+import Data.Either.Utils ( maybeToEither )
+import Test.Tasty ( TestTree, testGroup )
+import Test.Tasty.HUnit ( (@?=), Assertion, testCase )
+import Text.Read ( readMaybe )
import Text.XML.HXT.Core (
(>>>),
(/>),
runLA,
xreadDoc )
+
-- | A tiny parser written in HXT to extract the "XML_File_ID" element
--- from a document.
-parse_xmlfid :: String -> Maybe Integer
-parse_xmlfid =
- listToMaybe . mapMaybe readMaybe . parse
+-- from a document. If we fail to parse an XML_File_ID, we return
+-- the reason wrapped in a 'Left' constructor. The reason should be
+-- one of two things:
+--
+-- 1. No XML_File_ID elements were found.
+--
+-- 2. An XML_File_ID element was found, but it could not be read
+-- into an Integer.
+--
+-- We use an Either rather than a Maybe because we do expect some
+-- non-integer XML_File_IDs. In the examples, you will see
+-- NHL_DepthChart_XML.XML with an XML_File_ID of "49618.61" and
+-- CFL_Boxscore_XML1.xml with an XML_File_ID of "R28916". According
+-- to Brijesh Patel of TSN, these are special category files and not
+-- part of the usual feed.
+--
+-- We want to report them differently, "just in case."
+--
+parse_xmlfid :: String -- ^ The XML Document
+ -> Either String Integer
+parse_xmlfid doc =
+ case parse_results of
+ [] -> Left "No XML_File_ID elements found."
+ (x:_) -> x
where
parse :: String -> [String]
parse =
>>> getChildren
>>> getText)
+ read_either_integer :: String -> Either String Integer
+ read_either_integer s =
+ let msg = "Could not parse XML_File_ID " ++ s ++ " as an integer."
+ in
+ maybeToEither msg (readMaybe s)
+
+ elements = parse doc
+ parse_results = map read_either_integer elements
+
--- | The opening "tag" for the XML prologue.
-xml_prologue :: String
-xml_prologue = "<?xml "
+-- * Tasty Tests
+xml_tests :: TestTree
+xml_tests =
+ testGroup
+ "XML tests"
+ [ xml_file_id_tests ]
+
+
+xml_file_id_tests :: TestTree
+xml_file_id_tests =
+ testCase "XML_File_ID is parsed correctly" $ do
+ let xmlfids = ["19908216", "19908216", "19908245", "19908246", "19908247"]
+ mapM_ check xmlfids
+ where
+ check :: String -> Assertion
+ check xmlfid = do
+ xml <- readFile ("test/xml/" ++ xmlfid ++ ".xml")
+ let actual = parse_xmlfid xml
+ -- The maybeToEither should always succeed here, so the error
+ -- message goes unused.
+ let expected = maybeToEither "derp" (readMaybe xmlfid)
+ actual @?= expected