]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/TSN/Picklers.hs
Add tests for existing picklers.
[dead/htsn-import.git] / src / TSN / Picklers.hs
1 -- | (Un)picklers for data types present in The Sports Network XML
2 -- feed.
3 --
4 module TSN.Picklers (
5 pickler_tests,
6 xp_ambiguous_time,
7 xp_date,
8 xp_date_padded,
9 xp_datetime,
10 xp_earnings,
11 xp_gamedate,
12 xp_tba_time,
13 xp_time,
14 xp_time_dots,
15 xp_time_stamp )
16 where
17
18 -- System imports.
19 import Data.List ( intercalate )
20 import Data.List.Split ( chunksOf )
21 import Data.String.Utils ( replace )
22 import Data.Time.Clock ( NominalDiffTime, UTCTime, addUTCTime )
23 import Data.Time.Format ( formatTime, parseTime )
24 import Data.Tree.NTree.TypeDefs ( NTree(..) )
25 import System.Locale ( defaultTimeLocale )
26 import Test.Tasty ( TestTree, testGroup )
27 import Test.Tasty.HUnit ( (@?=), testCase )
28 import Text.XML.HXT.Arrow.Pickle (
29 xpText,
30 xpWrap,
31 xpWrapMaybe )
32 import Text.XML.HXT.Arrow.Pickle.Xml ( PU )
33 import Text.XML.HXT.Core (
34 XmlTree,
35 XNode( XTag, XText ),
36 mkName,
37 pickleDoc,
38 unpickleDoc )
39
40 -- Local imports.
41 import TSN.Parse (
42 parse_time_stamp,
43 time_format,
44 time_stamp_format )
45
46
47 -- | The format string for a base date in m/d/yyyy format. The
48 -- day/month are not padded at all. This will match for example,
49 --
50 -- * 2\/15\/1983
51 --
52 -- * 1\/1\/0000
53 --
54 date_format :: String
55 date_format = "%-m/%-d/%Y"
56
57
58 -- | The format string for a base date in mm/dd/yyyy format. The
59 -- day/month are padded to two characters with zeros. This will
60 -- match for example,
61 --
62 -- * 02\/15\/1983
63 --
64 -- * 01\/01\/0000
65 --
66 date_format_padded :: String
67 date_format_padded = "%0m/%0d/%Y"
68
69
70 -- | (Un)pickle a UTCTime without the time portion.
71 --
72 xp_date :: PU UTCTime
73 xp_date =
74 (to_date, from_date) `xpWrapMaybe` xpText
75 where
76 to_date :: String -> Maybe UTCTime
77 to_date = parseTime defaultTimeLocale date_format
78
79 from_date :: UTCTime -> String
80 from_date = formatTime defaultTimeLocale date_format
81
82
83 -- | (Un)pickle a UTCTime without the time portion. The day/month are
84 -- padded to two characters with zeros.
85 --
86 xp_date_padded :: PU UTCTime
87 xp_date_padded =
88 (to_date, from_date) `xpWrapMaybe` xpText
89 where
90 to_date :: String -> Maybe UTCTime
91 to_date = parseTime defaultTimeLocale date_format_padded
92
93 from_date :: UTCTime -> String
94 from_date = formatTime defaultTimeLocale date_format_padded
95
96
97
98 -- | Format a number as a string using a comma as the thousands
99 -- separator.
100 --
101 -- Examples:
102 --
103 -- >>> format_commas 0
104 -- "0"
105 -- >>> format_commas 10
106 -- "10"
107 -- >>> format_commas 100
108 -- "100"
109 -- >>> format_commas 1000
110 -- "1,000"
111 -- >>> format_commas 10000
112 -- "10,000"
113 -- >>> format_commas 100000
114 -- "100,000"
115 -- >>> format_commas 1000000
116 -- "1,000,000"
117 --
118 format_commas :: Int -> String
119 format_commas x =
120 reverse (intercalate "," $ chunksOf 3 $ reverse $ show x)
121
122 -- | Parse \<Earnings\> from an 'AutoRaceResultsListing'. These are
123 -- essentially 'Int's, but they look like,
124 --
125 -- * \<Earnings\>336,826\</Earnings\>
126 --
127 -- * \<Earnings\>1,000,191\</Earnings\>
128 --
129 -- * \<Earnings\>TBA\</Earnings\>
130 --
131 xp_earnings :: PU (Maybe Int)
132 xp_earnings =
133 (to_earnings, from_earnings) `xpWrap` xpText
134 where
135 strip_commas :: String -> String
136 strip_commas = replace "," ""
137
138 to_earnings :: String -> Maybe Int
139 to_earnings s
140 | s == "TBA" = Nothing
141 | otherwise = Just $ (read . strip_commas) s
142
143 from_earnings :: Maybe Int -> String
144 from_earnings Nothing = "TBA"
145 from_earnings (Just i) = format_commas i
146
147
148 -- | (Un)pickle an unpadded 'UTCTime'. Used for example on the
149 -- \<RaceDate\> elements in an 'AutoRaceResults' message.
150 --
151 -- Examples:
152 --
153 -- * \<RaceDate\>6/1/2014 1:00:00 PM\</RaceDate\>
154 --
155 -- * \<RaceDate\>5/24/2014 2:45:00 PM\</RaceDate\>
156 --
157 xp_datetime :: PU UTCTime
158 xp_datetime =
159 (to_datetime, from_datetime) `xpWrapMaybe` xpText
160 where
161 format = date_format ++ " " ++ "%-I:%M:%S %p"
162
163 to_datetime :: String -> Maybe UTCTime
164 to_datetime = parseTime defaultTimeLocale format
165
166 from_datetime :: UTCTime -> String
167 from_datetime = formatTime defaultTimeLocale format
168
169
170 -- | (Un)pickle a UTCTime from a weather forecast's gamedate. Example
171 -- input looks like,
172 --
173 -- \<forecast gamedate=\"Monday, December 30th\"\>
174 --
175 -- When unpickling we get rid of the suffixes \"st\", \"nd\", \"rd\", and
176 -- \"th\". During pickling, we add them back based on the last digit
177 -- of the date.
178 --
179 xp_gamedate :: PU UTCTime
180 xp_gamedate =
181 (to_gamedate, from_gamedate) `xpWrapMaybe` xpText
182 where
183 format = "%A, %B %-d"
184
185 to_gamedate :: String -> Maybe UTCTime
186 to_gamedate s =
187 parseTime defaultTimeLocale format s'
188 where
189 s' = case (reverse s) of
190 (c2:c1:cs) -> let suffix = [c1,c2]
191 in
192 case suffix of
193 "st" -> reverse cs
194 "nd" -> reverse cs
195 "rd" -> reverse cs
196 "th" -> reverse cs
197 _ -> s -- Unknown suffix, leave it alone.
198 _ -> s -- The String is less than two characters long,
199 -- leave it alone.
200
201
202 from_gamedate :: UTCTime -> String
203 from_gamedate d = s ++ (suffix s)
204 where
205 s = formatTime defaultTimeLocale format d
206
207 suffix :: String -> String
208 suffix cs =
209 case (reverse cs) of
210 [] -> []
211 ('1':_) -> "st"
212 ('2':_) -> "nd"
213 ('3':_) -> "rd"
214 _ -> "th"
215
216
217
218
219
220
221 -- | (Un)pickle a UTCTime without the date portion.
222 --
223 xp_time :: PU UTCTime
224 xp_time =
225 (to_time, from_time) `xpWrapMaybe` xpText
226 where
227 to_time :: String -> Maybe UTCTime
228 to_time = parseTime defaultTimeLocale time_format
229
230 from_time :: UTCTime -> String
231 from_time = formatTime defaultTimeLocale time_format
232
233
234 -- | (Un)pickle a UTCTime without the date portion. This differs from
235 -- 'xp_time' in that it uses periods in the AM/PM part, i.e. \"A.M.\"
236 -- and \"P.M.\" It also doesn't use padding for the \"hours\" part.
237 --
238 -- Examples:
239 --
240 -- * \<CurrentTimeStamp\>11:30 A.M.\</CurrentTimeStamp\>
241 --
242 xp_time_dots :: PU UTCTime
243 xp_time_dots =
244 (to_time, from_time) `xpWrapMaybe` xpText
245 where
246 -- | The hours arent padded with zeros.
247 nopad_time_format :: String
248 nopad_time_format = "%-I:%M %p"
249
250 to_time :: String -> Maybe UTCTime
251 to_time = (parseTime defaultTimeLocale nopad_time_format) . (replace "." "")
252
253 from_time :: UTCTime -> String
254 from_time t =
255 replace "AM" "A.M." (replace "PM" "P.M." s)
256 where
257 s = formatTime defaultTimeLocale nopad_time_format t
258
259
260 -- | (Un)pickle a UTCTime without the date portion, allowing for a
261 -- value of \"TBA\" (which gets translated to 'Nothing').
262 --
263 -- /Examples/:
264 --
265 -- A failed parse will return 'Nothing':
266 --
267 -- >>> let tn = text_node "YO"
268 -- >>> unpickleDoc xp_tba_time tn
269 -- Just Nothing
270 --
271 -- And so will parsing a \"TBA\":
272 --
273 -- >>> let tn = text_node "TBA"
274 -- >>> unpickleDoc xp_tba_time tn
275 -- Just Nothing
276 --
277 -- But re-pickling 'Nothing' gives only \"TBA\":
278 --
279 -- >>> pickleDoc xp_tba_time Nothing
280 -- NTree (XTag "/" []) [NTree (XText "TBA") []]
281 --
282 -- A normal time is also parsed successfully, of course:
283 --
284 -- >>> let tn = text_node "08:10 PM"
285 -- >>> unpickleDoc xp_tba_time tn
286 -- Just (Just 1970-01-01 20:10:00 UTC)
287 --
288 xp_tba_time :: PU (Maybe UTCTime)
289 xp_tba_time =
290 (to_time, from_time) `xpWrap` xpText
291 where
292 to_time :: String -> Maybe UTCTime
293 to_time s
294 | s == "TBA" = Nothing
295 | otherwise = parseTime defaultTimeLocale time_format s
296
297 from_time :: Maybe UTCTime -> String
298 from_time Nothing = "TBA"
299 from_time (Just t) = formatTime defaultTimeLocale time_format t
300
301
302
303 -- | (Un)pickle the \<time_stamp\> element format to/from a 'UTCTime'.
304 -- The time_stamp elements look something like,
305 --
306 -- \<time_stamp\> January 6, 2014, at 10:11 PM ET \</time_stamp\>
307 --
308 -- TSN doesn't provide a proper time zone name, so we assume that
309 -- it's always Eastern Standard Time. EST is UTC-5, so we
310 -- add/subtract 5 hours to convert to/from UTC.
311 --
312 -- Examples:
313 --
314 -- >>> let tn = text_node " January 6, 2014, at 10:11 PM ET "
315 -- >>> unpickleDoc xp_time_stamp tn
316 -- Just 2014-01-07 03:11:00 UTC
317 --
318 xp_time_stamp :: PU UTCTime
319 xp_time_stamp =
320 (parse_time_stamp, from_time_stamp) `xpWrapMaybe` xpText
321 where
322 five_hours :: NominalDiffTime
323 five_hours = 5 * 60 * 60
324
325 subtract_five :: UTCTime -> UTCTime
326 subtract_five = addUTCTime (-1 * five_hours)
327
328 from_time_stamp :: UTCTime -> String
329 from_time_stamp =
330 formatTime defaultTimeLocale time_stamp_format . subtract_five
331
332
333 -- | (Un)pickle an ambiguous 12-hour AM/PM time, which is ambiguous
334 -- because it's missing the AM/PM part.
335 --
336 -- Examples:
337 --
338 -- >>> let tn = text_node "8:00"
339 -- >>> unpickleDoc xp_ambiguous_time tn
340 -- Just 1970-01-01 08:00:00 UTC
341 --
342 xp_ambiguous_time :: PU UTCTime
343 xp_ambiguous_time =
344 (to_time, from_time) `xpWrapMaybe` xpText
345 where
346 ambiguous_time_format :: String
347 ambiguous_time_format = "%-I:%M"
348
349 to_time :: String -> Maybe UTCTime
350 to_time = parseTime defaultTimeLocale ambiguous_time_format
351
352 from_time :: UTCTime -> String
353 from_time =
354 formatTime defaultTimeLocale ambiguous_time_format
355
356
357 -- | Create an 'XmlTree' containing only the given text. This is
358 -- useful for testing (un)picklers, where we don't want to have to
359 -- bother to create a dummy XML document.
360 --
361 -- Examples:
362 --
363 -- >>> text_node "8:00"
364 -- NTree (XText "8:00") []
365 --
366 text_node :: String -> XmlTree
367 text_node s = NTree (XText s) []
368
369
370
371 --
372 -- * Tasty Tests
373 --
374
375 -- | A list of all tests for this module. This primary exists to
376 -- eliminate the unused import/export warnings for 'unpickleDoc' and
377 -- 'text_node' which are otherwise only used in the doctests.
378 --
379 pickler_tests :: TestTree
380 pickler_tests =
381 testGroup
382 "Pickler tests"
383 [ test_pickle_of_unpickle_is_identity ]
384
385
386 -- | If we unpickle something and then pickle it, we should wind up
387 -- with the same thing we started with (plus an additional root
388 -- element).
389 --
390 test_pickle_of_unpickle_is_identity :: TestTree
391 test_pickle_of_unpickle_is_identity =
392 testCase "pickle composed with unpickle is (almost) the identity" $ do
393 let tn = text_node "8:00"
394 let (Just utctime) = unpickleDoc xp_ambiguous_time tn
395 let actual = pickleDoc xp_ambiguous_time utctime
396 let expected = NTree (XTag (mkName "/") []) [tn]
397 actual @?= expected