]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/TSN/Parse.hs
Use UTC instead of EST to store the timestamps; they're both wrong anyway.
[dead/htsn-import.git] / src / TSN / Parse.hs
1 module TSN.Parse (
2 parse_message,
3 parse_time_stamp,
4 parse_xml_time_stamp,
5 parse_xmlfid,
6 time_format,
7 time_stamp_format )
8 where
9
10 import Data.Either.Utils ( maybeToEither )
11 import Data.Time.Clock ( UTCTime )
12 import Data.Time.Format ( parseTime )
13 import System.Locale ( defaultTimeLocale )
14 import Text.Read ( readMaybe )
15 import Text.XML.HXT.Core (
16 XmlTree,
17 (>>>),
18 (/>),
19 getChildren,
20 getText,
21 hasName,
22 runLA )
23
24
25 -- | Parse the \"message\" element out of a document tree and return
26 -- it as an 'XmlTree'. We use an Either for consistency.
27 --
28 -- Note: It's more trouble than it's worth to attempt to use this as
29 -- the basis for parse_xmlfid and parse_xml_time_stamp.
30 --
31 parse_message :: XmlTree -> Either String XmlTree
32 parse_message xmltree =
33 case elements of
34 [] -> Left "No message elements found."
35 (x:_) -> Right x
36 where
37 parse :: XmlTree -> [XmlTree]
38 parse = runLA $ hasName "/" /> hasName "message"
39
40 elements = parse xmltree
41
42
43
44 -- | Extract the \"XML_File_ID\" element from a document. If we fail
45 -- to parse an XML_File_ID, we return the reason wrapped in a 'Left'
46 -- constructor. The reason should be one of two things:
47 --
48 -- 1. No XML_File_ID elements were found.
49 --
50 -- 2. An XML_File_ID element was found, but it could not be read
51 -- into an Integer.
52 --
53 -- We use an Either rather than a Maybe because we do expect some
54 -- non-integer XML_File_IDs. In the examples, you will see
55 -- NHL_DepthChart_XML.XML with an XML_File_ID of \"49618.61\" and
56 -- CFL_Boxscore_XML1.xml with an XML_File_ID of
57 -- \"R28916\". According to Brijesh Patel of TSN, these are special
58 -- category files and not part of the usual feed.
59 --
60 -- TODO: This should eventually be combined with XML.parse_xmlfid
61 -- from the htsn package.
62 --
63 parse_xmlfid :: XmlTree -> Either String Integer
64 parse_xmlfid xmltree =
65 case parse_results of
66 [] -> Left "No XML_File_ID elements found."
67 (x:_) -> x
68 where
69 parse :: XmlTree -> [String]
70 parse = runLA $ hasName "/"
71 /> hasName "message"
72 /> hasName "XML_File_ID"
73 >>> getChildren
74 >>> getText
75
76 read_either_integer :: String -> Either String Integer
77 read_either_integer s =
78 let msg = "Could not parse XML_File_ID " ++ s ++ " as an integer."
79 in
80 maybeToEither msg (readMaybe s)
81
82 elements = parse xmltree
83 parse_results = map read_either_integer elements
84
85
86
87 -- | The format string for times appearing in the feed.
88 --
89 time_format :: String
90 time_format = "%I:%M %p"
91
92
93 -- | The format string for a time_stamp. We keep the leading/trailing
94 -- space so that parseTime and formatTime are inverses are one
95 -- another, even though there is some confusion as to how these two
96 -- functions should behave:
97 --
98 -- <https://ghc.haskell.org/trac/ghc/ticket/9150>
99 --
100 time_stamp_format :: String
101 time_stamp_format = " %B %-d, %Y, at " ++ time_format ++ " ET "
102
103
104
105 -- | Parse a time stamp from a 'String' (maybe). TSN doesn't provide a
106 -- proper time zone name, so we parse it as UTC, and maybe our
107 -- eventual consumer can figure out a way to deduce the time zone.
108 --
109 parse_time_stamp :: String -> Maybe UTCTime
110 parse_time_stamp =
111 parseTime defaultTimeLocale time_stamp_format
112
113
114
115 -- | Extract the \"time_stamp\" element from a document. If we fail
116 -- to parse a time_stamp, we return the reason wrapped in a 'Left'
117 -- constructor. The reason should be one of two things:
118 --
119 -- 1. No time_Stamp elements were found.
120 --
121 -- 2. A time_stamp element was found, but it could not be read
122 -- into a UTCTime.
123 --
124 -- Unline 'parse_xmlfid', we don't expect to run into any time_stamps
125 -- that we can't parse. But since parse_xmlfid returns an Either, we
126 -- do for consistency.
127 --
128 parse_xml_time_stamp :: XmlTree -> Either String UTCTime
129 parse_xml_time_stamp xmltree =
130 case parse_results of
131 [] -> Left "No time_stamp elements found."
132 (x:_) -> x
133 where
134 parse :: XmlTree -> [String]
135 parse = runLA $ hasName "/"
136 /> hasName "message"
137 /> hasName "time_stamp"
138 >>> getChildren
139 >>> getText
140
141 read_either_utctime :: String -> Either String UTCTime
142 read_either_utctime s =
143 let msg = "Could not parse time_stamp " ++ s ++ " as a date/time."
144 in
145 maybeToEither msg (parse_time_stamp s)
146
147 elements = parse xmltree
148 parse_results = map read_either_utctime elements