]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/TSN/Picklers.hs
Add more tests for existing picklers.
[dead/htsn-import.git] / src / TSN / Picklers.hs
1 -- | (Un)picklers for data types present in The Sports Network XML
2 -- feed.
3 --
4 module TSN.Picklers (
5 pickler_tests,
6 xp_ambiguous_time,
7 xp_date,
8 xp_date_padded,
9 xp_datetime,
10 xp_earnings,
11 xp_gamedate,
12 xp_tba_time,
13 xp_time,
14 xp_time_dots,
15 xp_time_stamp )
16 where
17
18 -- System imports.
19 import Data.List ( intercalate )
20 import Data.List.Split ( chunksOf )
21 import Data.String.Utils ( replace )
22 import Data.Time.Clock ( NominalDiffTime, UTCTime, addUTCTime )
23 import Data.Time.Format ( formatTime, parseTime )
24 import Data.Tree.NTree.TypeDefs ( NTree(..) )
25 import System.Locale ( defaultTimeLocale )
26 import Test.Tasty ( TestTree, testGroup )
27 import Test.Tasty.HUnit ( (@?=), testCase )
28 import Text.XML.HXT.Arrow.Pickle (
29 xpText,
30 xpWrap,
31 xpWrapMaybe )
32 import Text.XML.HXT.Arrow.Pickle.Xml ( PU )
33 import Text.XML.HXT.Core (
34 XmlTree,
35 XNode( XTag, XText ),
36 mkName,
37 pickleDoc,
38 unpickleDoc )
39
40 -- Local imports.
41 import TSN.Parse (
42 parse_time_stamp,
43 time_format,
44 time_stamp_format )
45
46
47 -- | The format string for a base date in m/d/yyyy format. The
48 -- day/month are not padded at all. This will match for example,
49 --
50 -- * 2\/15\/1983
51 --
52 -- * 1\/1\/0000
53 --
54 date_format :: String
55 date_format = "%-m/%-d/%Y"
56
57
58 -- | The format string for a base date in mm/dd/yyyy format. The
59 -- day/month are padded to two characters with zeros. This will
60 -- match for example,
61 --
62 -- * 02\/15\/1983
63 --
64 -- * 01\/01\/0000
65 --
66 date_format_padded :: String
67 date_format_padded = "%0m/%0d/%Y"
68
69
70 -- | (Un)pickle a UTCTime without the time portion.
71 --
72 -- /Examples/:
73 --
74 -- This should parse:
75 --
76 -- >>> let tn = text_node "2/15/1983"
77 -- >>> unpickleDoc xp_date tn
78 -- Just 1983-02-15 00:00:00 UTC
79 --
80 -- But for some reason, it can also parse a leading zero in the
81 -- month. Whatever. This isn't required behavior.
82 --
83 -- >>> let tn = text_node "02/15/1983"
84 -- >>> unpickleDoc xp_date tn
85 -- Just 1983-02-15 00:00:00 UTC
86 --
87 xp_date :: PU UTCTime
88 xp_date =
89 (to_date, from_date) `xpWrapMaybe` xpText
90 where
91 to_date :: String -> Maybe UTCTime
92 to_date = parseTime defaultTimeLocale date_format
93
94 from_date :: UTCTime -> String
95 from_date = formatTime defaultTimeLocale date_format
96
97
98 -- | (Un)pickle a UTCTime without the time portion. The day/month are
99 -- padded to two characters with zeros.
100 --
101 -- Examples:
102 --
103 -- >>> let tn = text_node "02/15/1983"
104 -- >>> unpickleDoc xp_date_padded tn
105 -- Just 1983-02-15 00:00:00 UTC
106 --
107 xp_date_padded :: PU UTCTime
108 xp_date_padded =
109 (to_date, from_date) `xpWrapMaybe` xpText
110 where
111 to_date :: String -> Maybe UTCTime
112 to_date = parseTime defaultTimeLocale date_format_padded
113
114 from_date :: UTCTime -> String
115 from_date = formatTime defaultTimeLocale date_format_padded
116
117
118
119 -- | Format a number as a string using a comma as the thousands
120 -- separator.
121 --
122 -- Examples:
123 --
124 -- >>> format_commas 0
125 -- "0"
126 -- >>> format_commas 10
127 -- "10"
128 -- >>> format_commas 100
129 -- "100"
130 -- >>> format_commas 1000
131 -- "1,000"
132 -- >>> format_commas 10000
133 -- "10,000"
134 -- >>> format_commas 100000
135 -- "100,000"
136 -- >>> format_commas 1000000
137 -- "1,000,000"
138 --
139 format_commas :: Int -> String
140 format_commas x =
141 reverse (intercalate "," $ chunksOf 3 $ reverse $ show x)
142
143
144 -- | Parse \<Earnings\> from an 'AutoRaceResultsListing'. These are
145 -- essentially 'Int's, but they look like,
146 --
147 -- * \<Earnings\>336,826\</Earnings\>
148 --
149 -- * \<Earnings\>1,000,191\</Earnings\>
150 --
151 -- * \<Earnings\>TBA\</Earnings\>
152 --
153 -- Examples:
154 --
155 -- >>> let tn = text_node "1,000,191"
156 -- >>> unpickleDoc xp_earnings tn
157 -- Just (Just 1000191)
158 --
159 -- >>> let tn = text_node "TBA"
160 -- >>> unpickleDoc xp_earnings tn
161 -- Just Nothing
162 --
163 xp_earnings :: PU (Maybe Int)
164 xp_earnings =
165 (to_earnings, from_earnings) `xpWrap` xpText
166 where
167 strip_commas :: String -> String
168 strip_commas = replace "," ""
169
170 to_earnings :: String -> Maybe Int
171 to_earnings s
172 | s == "TBA" = Nothing
173 | otherwise = Just $ (read . strip_commas) s
174
175 from_earnings :: Maybe Int -> String
176 from_earnings Nothing = "TBA"
177 from_earnings (Just i) = format_commas i
178
179
180
181 -- | (Un)pickle an unpadded 'UTCTime'. Used for example on the
182 -- \<RaceDate\> elements in an 'AutoRaceResults' message.
183 --
184 -- Examples:
185 --
186 -- >>> let tn = text_node "6/1/2014 1:00:00 PM"
187 -- >>> unpickleDoc xp_datetime tn
188 -- Just 2014-06-01 13:00:00 UTC
189 --
190 -- >>> let tn = text_node "5/24/2014 2:45:00 PM"
191 -- >>> unpickleDoc xp_datetime tn
192 -- Just 2014-05-24 14:45:00 UTC
193 --
194 -- Padded! For some reason it works with only one zero in front. I
195 -- dunno man. NOT required (or even desired?) behavior.
196 --
197 -- >>> let tn = text_node "05/24/2014 2:45:00 PM"
198 -- >>> unpickleDoc xp_datetime tn
199 -- Just 2014-05-24 14:45:00 UTC
200 --
201 xp_datetime :: PU UTCTime
202 xp_datetime =
203 (to_datetime, from_datetime) `xpWrapMaybe` xpText
204 where
205 format = date_format ++ " " ++ "%-I:%M:%S %p"
206
207 to_datetime :: String -> Maybe UTCTime
208 to_datetime = parseTime defaultTimeLocale format
209
210 from_datetime :: UTCTime -> String
211 from_datetime = formatTime defaultTimeLocale format
212
213
214 -- | (Un)pickle a UTCTime from a weather forecast's gamedate. Example
215 -- input looks like,
216 --
217 -- When unpickling we get rid of the suffixes \"st\", \"nd\", \"rd\", and
218 -- \"th\". During pickling, we add them back based on the last digit
219 -- of the date.
220 --
221 -- Examples:
222 --
223 -- >>> let tn = text_node "Monday, December 30th"
224 -- >>> let (Just gd) = unpickleDoc xp_gamedate tn
225 -- >>> gd
226 -- 1970-12-30 00:00:00 UTC
227 -- >>> pickleDoc xp_gamedate gd
228 -- NTree (XTag "/" []) [NTree (XText "Wednesday, December 30th") []]
229 --
230 xp_gamedate :: PU UTCTime
231 xp_gamedate =
232 (to_gamedate, from_gamedate) `xpWrapMaybe` xpText
233 where
234 format = "%A, %B %-d"
235
236 to_gamedate :: String -> Maybe UTCTime
237 to_gamedate s =
238 parseTime defaultTimeLocale format s'
239 where
240 s' = case (reverse s) of
241 (c2:c1:cs) -> let suffix = [c1,c2]
242 in
243 case suffix of
244 "st" -> reverse cs
245 "nd" -> reverse cs
246 "rd" -> reverse cs
247 "th" -> reverse cs
248 _ -> s -- Unknown suffix, leave it alone.
249 _ -> s -- The String is less than two characters long,
250 -- leave it alone.
251
252
253 from_gamedate :: UTCTime -> String
254 from_gamedate d = s ++ (suffix s)
255 where
256 s = formatTime defaultTimeLocale format d
257
258 suffix :: String -> String
259 suffix cs =
260 case (reverse cs) of
261 [] -> []
262 ('1':_) -> "st"
263 ('2':_) -> "nd"
264 ('3':_) -> "rd"
265 _ -> "th"
266
267
268
269
270
271
272 -- | (Un)pickle a UTCTime without the date portion. Doesn't work if
273 -- the fields aren't zero-padded to two characters.
274 --
275 -- /Examples/:
276 --
277 -- Padded, should work:
278 --
279 -- >>> let tn = text_node "04:35 PM"
280 -- >>> unpickleDoc xp_time tn
281 -- Just 1970-01-01 16:35:00 UTC
282 --
283 -- Unpadded, should fail:
284 --
285 -- >>> let tn = text_node "4:35 PM"
286 -- >>> unpickleDoc xp_time tn
287 -- Nothing
288 --
289 xp_time :: PU UTCTime
290 xp_time =
291 (to_time, from_time) `xpWrapMaybe` xpText
292 where
293 to_time :: String -> Maybe UTCTime
294 to_time = parseTime defaultTimeLocale time_format
295
296 from_time :: UTCTime -> String
297 from_time = formatTime defaultTimeLocale time_format
298
299
300 -- | (Un)pickle a UTCTime without the date portion. This differs from
301 -- 'xp_time' in that it uses periods in the AM/PM part, i.e. \"A.M.\"
302 -- and \"P.M.\" It also doesn't use padding for the \"hours\" part.
303 --
304 -- /Examples/:
305 --
306 -- A standard example of the correct form:
307 --
308 -- >>> let tn = text_node "11:30 A.M."
309 -- >>> let (Just result) = unpickleDoc xp_time_dots tn
310 -- >>> result
311 -- 1970-01-01 11:30:00 UTC
312 -- >>> pickleDoc xp_time_dots result
313 -- NTree (XTag "/" []) [NTree (XText "11:30 A.M.") []]
314 --
315 -- Another miracle, it still parses with a leading zero!
316 --
317 -- >>> let tn = text_node "01:30 A.M."
318 -- >>> unpickleDoc xp_time_dots tn
319 -- Just 1970-01-01 01:30:00 UTC
320 --
321 xp_time_dots :: PU UTCTime
322 xp_time_dots =
323 (to_time, from_time) `xpWrapMaybe` xpText
324 where
325 -- | The hours arent padded with zeros.
326 nopad_time_format :: String
327 nopad_time_format = "%-I:%M %p"
328
329 to_time :: String -> Maybe UTCTime
330 to_time = (parseTime defaultTimeLocale nopad_time_format) . (replace "." "")
331
332 from_time :: UTCTime -> String
333 from_time t =
334 replace "AM" "A.M." (replace "PM" "P.M." s)
335 where
336 s = formatTime defaultTimeLocale nopad_time_format t
337
338
339 -- | (Un)pickle a UTCTime without the date portion, allowing for a
340 -- value of \"TBA\" (which gets translated to 'Nothing').
341 --
342 -- /Examples/:
343 --
344 -- A failed parse will return 'Nothing':
345 --
346 -- >>> let tn = text_node "YO"
347 -- >>> unpickleDoc xp_tba_time tn
348 -- Just Nothing
349 --
350 -- And so will parsing a \"TBA\":
351 --
352 -- >>> let tn = text_node "TBA"
353 -- >>> unpickleDoc xp_tba_time tn
354 -- Just Nothing
355 --
356 -- But re-pickling 'Nothing' gives only \"TBA\":
357 --
358 -- >>> pickleDoc xp_tba_time Nothing
359 -- NTree (XTag "/" []) [NTree (XText "TBA") []]
360 --
361 -- A normal time is also parsed successfully, of course:
362 --
363 -- >>> let tn = text_node "08:10 PM"
364 -- >>> unpickleDoc xp_tba_time tn
365 -- Just (Just 1970-01-01 20:10:00 UTC)
366 --
367 xp_tba_time :: PU (Maybe UTCTime)
368 xp_tba_time =
369 (to_time, from_time) `xpWrap` xpText
370 where
371 to_time :: String -> Maybe UTCTime
372 to_time s
373 | s == "TBA" = Nothing
374 | otherwise = parseTime defaultTimeLocale time_format s
375
376 from_time :: Maybe UTCTime -> String
377 from_time Nothing = "TBA"
378 from_time (Just t) = formatTime defaultTimeLocale time_format t
379
380
381
382 -- | (Un)pickle the \<time_stamp\> element format to/from a 'UTCTime'.
383 -- The time_stamp elements look something like,
384 --
385 -- \<time_stamp\> January 6, 2014, at 10:11 PM ET \</time_stamp\>
386 --
387 -- TSN doesn't provide a proper time zone name, so we assume that
388 -- it's always Eastern Standard Time. EST is UTC-5, so we
389 -- add/subtract 5 hours to convert to/from UTC.
390 --
391 -- Examples:
392 --
393 -- >>> let tn = text_node " January 6, 2014, at 10:11 PM ET "
394 -- >>> unpickleDoc xp_time_stamp tn
395 -- Just 2014-01-07 03:11:00 UTC
396 --
397 xp_time_stamp :: PU UTCTime
398 xp_time_stamp =
399 (parse_time_stamp, from_time_stamp) `xpWrapMaybe` xpText
400 where
401 five_hours :: NominalDiffTime
402 five_hours = 5 * 60 * 60
403
404 subtract_five :: UTCTime -> UTCTime
405 subtract_five = addUTCTime (-1 * five_hours)
406
407 from_time_stamp :: UTCTime -> String
408 from_time_stamp =
409 formatTime defaultTimeLocale time_stamp_format . subtract_five
410
411
412 -- | (Un)pickle an ambiguous 12-hour AM/PM time, which is ambiguous
413 -- because it's missing the AM/PM part.
414 --
415 -- Examples:
416 --
417 -- >>> let tn = text_node "8:00"
418 -- >>> unpickleDoc xp_ambiguous_time tn
419 -- Just 1970-01-01 08:00:00 UTC
420 --
421 xp_ambiguous_time :: PU UTCTime
422 xp_ambiguous_time =
423 (to_time, from_time) `xpWrapMaybe` xpText
424 where
425 ambiguous_time_format :: String
426 ambiguous_time_format = "%-I:%M"
427
428 to_time :: String -> Maybe UTCTime
429 to_time = parseTime defaultTimeLocale ambiguous_time_format
430
431 from_time :: UTCTime -> String
432 from_time =
433 formatTime defaultTimeLocale ambiguous_time_format
434
435
436 -- | Create an 'XmlTree' containing only the given text. This is
437 -- useful for testing (un)picklers, where we don't want to have to
438 -- bother to create a dummy XML document.
439 --
440 -- Examples:
441 --
442 -- >>> text_node "8:00"
443 -- NTree (XText "8:00") []
444 --
445 text_node :: String -> XmlTree
446 text_node s = NTree (XText s) []
447
448
449
450 --
451 -- * Tasty Tests
452 --
453
454 -- | A list of all tests for this module. This primary exists to
455 -- eliminate the unused import/export warnings for 'unpickleDoc' and
456 -- 'text_node' which are otherwise only used in the doctests.
457 --
458 pickler_tests :: TestTree
459 pickler_tests =
460 testGroup
461 "Pickler tests"
462 [ test_pickle_of_unpickle_is_identity ]
463
464
465 -- | If we unpickle something and then pickle it, we should wind up
466 -- with the same thing we started with (plus an additional root
467 -- element).
468 --
469 test_pickle_of_unpickle_is_identity :: TestTree
470 test_pickle_of_unpickle_is_identity =
471 testCase "pickle composed with unpickle is (almost) the identity" $ do
472 let tn = text_node "8:00"
473 let (Just utctime) = unpickleDoc xp_ambiguous_time tn
474 let actual = pickleDoc xp_ambiguous_time utctime
475 let expected = NTree (XTag (mkName "/") []) [tn]
476 actual @?= expected