]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/TSN/Picklers.hs
Allow "TBA" laps in TSN.XML.AutoRacingSchedule.
[dead/htsn-import.git] / src / TSN / Picklers.hs
1 {-# LANGUAGE ScopedTypeVariables #-}
2
3 -- | (Un)picklers for data types present in The Sports Network XML
4 -- feed.
5 --
6 module TSN.Picklers (
7 pickler_tests,
8 xp_ambiguous_time,
9 xp_attr_option,
10 xp_date,
11 xp_date_padded,
12 xp_datetime,
13 xp_early_line_date,
14 xp_earnings,
15 xp_fracpart_only_double,
16 xp_gamedate,
17 xp_tba_int,
18 xp_tba_time,
19 xp_time,
20 xp_time_dots,
21 xp_time_stamp )
22 where
23
24 -- System imports.
25 import Data.Char ( toUpper )
26 import Data.List ( intercalate )
27 import Data.List.Split ( chunksOf )
28 import Data.Maybe ( catMaybes, listToMaybe )
29 import Data.String.Utils ( replace )
30 import Data.Time.Clock ( UTCTime )
31 import Data.Time.Format ( formatTime, parseTime )
32 import Data.Tree.NTree.TypeDefs ( NTree(..) )
33 import System.Locale ( TimeLocale( wDays, months ), defaultTimeLocale )
34 import Test.Tasty ( TestTree, testGroup )
35 import Test.Tasty.HUnit ( (@?=), testCase )
36 import Text.Read ( readMaybe )
37 import Text.XML.HXT.Arrow.Pickle (
38 xpText,
39 xpWrap,
40 xpWrapMaybe )
41 import Text.XML.HXT.Arrow.Pickle.Xml ( PU )
42 import Text.XML.HXT.Core (
43 XmlTree,
44 XNode( XTag, XText ),
45 mkName,
46 pickleDoc,
47 unpickleDoc )
48
49 -- Local imports.
50 import TSN.Parse (
51 parse_time_stamp,
52 time_format,
53 time_stamp_format )
54
55
56 -- | The format string for a base date in m/d/yyyy format. The
57 -- day/month are not padded at all. This will match for example,
58 --
59 -- * 2\/15\/1983
60 --
61 -- * 1\/1\/0000
62 --
63 date_format :: String
64 date_format = "%-m/%-d/%Y"
65
66
67 -- | The format string for a base date in mm/dd/yyyy format. The
68 -- day/month are padded to two characters with zeros. This will
69 -- match for example,
70 --
71 -- * 02\/15\/1983
72 --
73 -- * 01\/01\/0000
74 --
75 date_format_padded :: String
76 date_format_padded = "%0m/%0d/%Y"
77
78
79 -- | (Un)pickle a UTCTime without the time portion.
80 --
81 -- /Examples/:
82 --
83 -- This should parse:
84 --
85 -- >>> let tn = text_node "2/15/1983"
86 -- >>> unpickleDoc xp_date tn
87 -- Just 1983-02-15 00:00:00 UTC
88 --
89 -- But for some reason, it can also parse a leading zero in the
90 -- month. Whatever. This isn't required behavior.
91 --
92 -- >>> let tn = text_node "02/15/1983"
93 -- >>> unpickleDoc xp_date tn
94 -- Just 1983-02-15 00:00:00 UTC
95 --
96 xp_date :: PU UTCTime
97 xp_date =
98 (to_date, from_date) `xpWrapMaybe` xpText
99 where
100 to_date :: String -> Maybe UTCTime
101 to_date = parseTime defaultTimeLocale date_format
102
103 from_date :: UTCTime -> String
104 from_date = formatTime defaultTimeLocale date_format
105
106
107 -- | (Un)pickle a UTCTime without the time portion. The day/month are
108 -- padded to two characters with zeros.
109 --
110 -- Examples:
111 --
112 -- >>> let tn = text_node "02/15/1983"
113 -- >>> unpickleDoc xp_date_padded tn
114 -- Just 1983-02-15 00:00:00 UTC
115 --
116 -- >>> let tn = text_node "06/07/2014"
117 -- >>> unpickleDoc xp_date_padded tn
118 -- Just 2014-06-07 00:00:00 UTC
119 --
120 xp_date_padded :: PU UTCTime
121 xp_date_padded =
122 (to_date, from_date) `xpWrapMaybe` xpText
123 where
124 to_date :: String -> Maybe UTCTime
125 to_date = parseTime defaultTimeLocale date_format_padded
126
127 from_date :: UTCTime -> String
128 from_date = formatTime defaultTimeLocale date_format_padded
129
130
131
132 -- | Format a number as a string using a comma as the thousands
133 -- separator.
134 --
135 -- Examples:
136 --
137 -- >>> format_commas 0
138 -- "0"
139 -- >>> format_commas 10
140 -- "10"
141 -- >>> format_commas 100
142 -- "100"
143 -- >>> format_commas 1000
144 -- "1,000"
145 -- >>> format_commas 10000
146 -- "10,000"
147 -- >>> format_commas 100000
148 -- "100,000"
149 -- >>> format_commas 1000000
150 -- "1,000,000"
151 --
152 format_commas :: Int -> String
153 format_commas x =
154 reverse (intercalate "," $ chunksOf 3 $ reverse $ show x)
155
156
157
158 -- | Parse \<Earnings\> from an 'AutoRaceResultsListing'. These are
159 -- essentially 'Int's, but they look like,
160 --
161 -- * \<Earnings\>336,826\</Earnings\>
162 --
163 -- * \<Earnings\>1,000,191\</Earnings\>
164 --
165 -- * \<Earnings\>TBA\</Earnings\>
166 --
167 -- Examples:
168 --
169 -- >>> let tn = text_node "1,000,191"
170 -- >>> unpickleDoc xp_earnings tn
171 -- Just (Just 1000191)
172 --
173 -- >>> let tn = text_node "TBA"
174 -- >>> unpickleDoc xp_earnings tn
175 -- Just Nothing
176 --
177 xp_earnings :: PU (Maybe Int)
178 xp_earnings =
179 (to_earnings, from_earnings) `xpWrap` xpText
180 where
181 strip_commas :: String -> String
182 strip_commas = replace "," ""
183
184 to_earnings :: String -> Maybe Int
185 to_earnings s
186 | s == "TBA" = Nothing
187 | otherwise = Just $ (read . strip_commas) s
188
189 from_earnings :: Maybe Int -> String
190 from_earnings Nothing = "TBA"
191 from_earnings (Just i) = format_commas i
192
193
194
195 -- | Pickle a 'Double' that can be missing its leading zero (for
196 -- values less than one). For example, we've seen,
197 --
198 -- <TrackLength KPH=".805">0.5</TrackLength>
199 --
200 -- Which 'xpPrim' can't handle without the leading
201 -- zero. Unfortunately there's no way pickle/unpickle can be
202 -- inverses of each other here, since \"0.5\" and \".5\" should
203 -- unpickle to the same 'Double'.
204 --
205 -- Examples:
206 --
207 -- >>> let tn = text_node "0.5"
208 -- >>> unpickleDoc xp_fracpart_only_double tn
209 -- Just 0.5
210 --
211 -- >>> let tn = text_node ".5"
212 -- >>> unpickleDoc xp_fracpart_only_double tn
213 -- Just 0.5
214 --
215 -- >>> let tn = text_node "foo"
216 -- >>> unpickleDoc xp_fracpart_only_double tn
217 -- Nothing
218 --
219 xp_fracpart_only_double :: PU Double
220 xp_fracpart_only_double =
221 (to_double, from_double) `xpWrapMaybe` xpText
222 where
223 -- | Convert a 'String' to a 'Double', maybe. We always prepend a
224 -- zero, since it will fix the fraction-only values, and not hurt
225 -- the ones that already have a leading integer.
226 to_double :: String -> Maybe Double
227 to_double s = readMaybe ("0" ++ s)
228
229 from_double :: Double -> String
230 from_double = show
231
232
233
234 -- | (Un)pickle an unpadded 'UTCTime'. Used for example on the
235 -- \<RaceDate\> elements in an 'AutoRaceResults' message.
236 --
237 -- Examples:
238 --
239 -- >>> let tn = text_node "6/1/2014 1:00:00 PM"
240 -- >>> unpickleDoc xp_datetime tn
241 -- Just 2014-06-01 13:00:00 UTC
242 --
243 -- >>> let tn = text_node "5/24/2014 2:45:00 PM"
244 -- >>> unpickleDoc xp_datetime tn
245 -- Just 2014-05-24 14:45:00 UTC
246 --
247 -- Padded! For some reason it works with only one zero in front. I
248 -- dunno man. NOT required (or even desired?) behavior.
249 --
250 -- >>> let tn = text_node "05/24/2014 2:45:00 PM"
251 -- >>> unpickleDoc xp_datetime tn
252 -- Just 2014-05-24 14:45:00 UTC
253 --
254 xp_datetime :: PU UTCTime
255 xp_datetime =
256 (to_datetime, from_datetime) `xpWrapMaybe` xpText
257 where
258 format = date_format ++ " " ++ "%-I:%M:%S %p"
259
260 to_datetime :: String -> Maybe UTCTime
261 to_datetime = parseTime defaultTimeLocale format
262
263 from_datetime :: UTCTime -> String
264 from_datetime = formatTime defaultTimeLocale format
265
266
267
268 -- | Takes a 'UTCTime', and returns the English suffix that would be
269 -- appropriate after the day of the month. For example, if we have a
270 -- UTCTime representing Christmas, this would return \"th\" because
271 -- \"th\" is the right suffix of \"December 25th\".
272 --
273 -- Examples:
274 --
275 -- >>> import Data.Maybe ( fromJust )
276 -- >>> :{
277 -- let parse_date :: String -> Maybe UTCTime
278 -- parse_date = parseTime defaultTimeLocale date_format
279 -- :}
280 --
281 -- >>> let dates = [ "1/" ++ (d : "/1970") | d <- ['1'..'9'] ]
282 -- >>> let suffixes = map (date_suffix . fromJust . parse_date) dates
283 -- >>> suffixes
284 -- ["st","nd","rd","th","th","th","th","th","th"]
285 --
286 date_suffix :: UTCTime -> String
287 date_suffix t =
288 case (reverse daystr) of
289 [] -> []
290 ('1':_) -> "st"
291 ('2':_) -> "nd"
292 ('3':_) -> "rd"
293 _ -> "th"
294 where
295 daystr = formatTime defaultTimeLocale "%d" t
296
297
298 -- | (Un)pickle a UTCTime from a weather forecast's gamedate. Example
299 -- input looks like,
300 --
301 -- When unpickling we get rid of the suffixes \"st\", \"nd\", \"rd\", and
302 -- \"th\". During pickling, we add them back based on the last digit
303 -- of the date.
304 --
305 -- Examples:
306 --
307 -- >>> let tn = text_node "Monday, December 30th"
308 -- >>> let (Just gd) = unpickleDoc xp_gamedate tn
309 -- >>> gd
310 -- 1970-12-30 00:00:00 UTC
311 -- >>> pickleDoc xp_gamedate gd
312 -- NTree (XTag "/" []) [NTree (XText "Wednesday, December 30th") []]
313 --
314 xp_gamedate :: PU UTCTime
315 xp_gamedate =
316 (to_gamedate, from_gamedate) `xpWrapMaybe` xpText
317 where
318 format = "%A, %B %-d"
319
320 to_gamedate :: String -> Maybe UTCTime
321 to_gamedate s =
322 parseTime defaultTimeLocale format s'
323 where
324 s' = case (reverse s) of
325 (c2:c1:cs) -> let suffix = [c1,c2]
326 in
327 if suffix `elem` ["st","nd","rd","th"]
328 then reverse cs
329 else s -- Unknown suffix, leave it alone.
330
331 _ -> s -- The String is less than two characters long,
332 -- leave it alone.
333
334
335 from_gamedate :: UTCTime -> String
336 from_gamedate d = s ++ (date_suffix d)
337 where
338 s = formatTime defaultTimeLocale format d
339
340
341
342
343
344
345
346 -- | (Un)pickle a UTCTime without the date portion. Doesn't work if
347 -- the fields aren't zero-padded to two characters.
348 --
349 -- /Examples/:
350 --
351 -- Padded, should work:
352 --
353 -- >>> let tn = text_node "04:35 PM"
354 -- >>> unpickleDoc xp_time tn
355 -- Just 1970-01-01 16:35:00 UTC
356 --
357 -- Unpadded, should fail:
358 --
359 -- >>> let tn = text_node "4:35 PM"
360 -- >>> unpickleDoc xp_time tn
361 -- Nothing
362 --
363 xp_time :: PU UTCTime
364 xp_time =
365 (to_time, from_time) `xpWrapMaybe` xpText
366 where
367 to_time :: String -> Maybe UTCTime
368 to_time = parseTime defaultTimeLocale time_format
369
370 from_time :: UTCTime -> String
371 from_time = formatTime defaultTimeLocale time_format
372
373
374 -- | (Un)pickle a UTCTime without the date portion. This differs from
375 -- 'xp_time' in that it uses periods in the AM/PM part, i.e. \"A.M.\"
376 -- and \"P.M.\" It also doesn't use padding for the \"hours\" part.
377 --
378 -- /Examples/:
379 --
380 -- A standard example of the correct form:
381 --
382 -- >>> let tn = text_node "11:30 A.M."
383 -- >>> let (Just result) = unpickleDoc xp_time_dots tn
384 -- >>> result
385 -- 1970-01-01 11:30:00 UTC
386 -- >>> pickleDoc xp_time_dots result
387 -- NTree (XTag "/" []) [NTree (XText "11:30 A.M.") []]
388 --
389 -- Another miracle, it still parses with a leading zero!
390 --
391 -- >>> let tn = text_node "01:30 A.M."
392 -- >>> unpickleDoc xp_time_dots tn
393 -- Just 1970-01-01 01:30:00 UTC
394 --
395 xp_time_dots :: PU UTCTime
396 xp_time_dots =
397 (to_time, from_time) `xpWrapMaybe` xpText
398 where
399 -- | The hours arent padded with zeros.
400 nopad_time_format :: String
401 nopad_time_format = "%-I:%M %p"
402
403 to_time :: String -> Maybe UTCTime
404 to_time = (parseTime defaultTimeLocale nopad_time_format) . (replace "." "")
405
406 from_time :: UTCTime -> String
407 from_time t =
408 replace "AM" "A.M." (replace "PM" "P.M." s)
409 where
410 s = formatTime defaultTimeLocale nopad_time_format t
411
412
413 -- | (Un)pickle a UTCTime without the date portion, allowing for a
414 -- value of \"TBA\" (which gets translated to 'Nothing').
415 --
416 -- /Examples/:
417 --
418 -- A failed parse will return 'Nothing':
419 --
420 -- >>> let tn = text_node "YO"
421 -- >>> unpickleDoc xp_tba_time tn
422 -- Just Nothing
423 --
424 -- And so will parsing a \"TBA\":
425 --
426 -- >>> let tn = text_node "TBA"
427 -- >>> unpickleDoc xp_tba_time tn
428 -- Just Nothing
429 --
430 -- But re-pickling 'Nothing' gives only \"TBA\":
431 --
432 -- >>> pickleDoc xp_tba_time Nothing
433 -- NTree (XTag "/" []) [NTree (XText "TBA") []]
434 --
435 -- A normal time is also parsed successfully, of course:
436 --
437 -- >>> let tn = text_node "08:10 PM"
438 -- >>> unpickleDoc xp_tba_time tn
439 -- Just (Just 1970-01-01 20:10:00 UTC)
440 --
441 xp_tba_time :: PU (Maybe UTCTime)
442 xp_tba_time =
443 (to_time, from_time) `xpWrap` xpText
444 where
445 to_time :: String -> Maybe UTCTime
446 to_time s
447 | s == "TBA" = Nothing
448 | otherwise = parseTime defaultTimeLocale time_format s
449
450 from_time :: Maybe UTCTime -> String
451 from_time Nothing = "TBA"
452 from_time (Just t) = formatTime defaultTimeLocale time_format t
453
454
455 -- | (Un)pickle a 'Int', allowing for a value of \"TBA\" (which gets
456 -- translated to 'Nothing').
457 --
458 -- /Examples/:
459 --
460 -- A failed parse will return 'Nothing':
461 --
462 -- >>> let tn = text_node "YO"
463 -- >>> unpickleDoc xp_tba_int tn
464 -- Just Nothing
465 --
466 -- And so will parsing a \"TBA\":
467 --
468 -- >>> let tn = text_node "TBA"
469 -- >>> unpickleDoc xp_tba_int tn
470 -- Just Nothing
471 --
472 -- But re-pickling 'Nothing' gives only \"TBA\":
473 --
474 -- >>> pickleDoc xp_tba_int Nothing
475 -- NTree (XTag "/" []) [NTree (XText "TBA") []]
476 --
477 -- A normal integer is also parsed successfully, of course:
478 --
479 -- >>> let tn = text_node "110"
480 -- >>> unpickleDoc xp_tba_int tn
481 -- Just (Just 110)
482 --
483 xp_tba_int :: PU (Maybe Int)
484 xp_tba_int =
485 (to_int, from_int) `xpWrap` xpText
486 where
487 to_int :: String -> Maybe Int
488 to_int = readMaybe
489
490 from_int :: Maybe Int -> String
491 from_int Nothing = "TBA"
492 from_int (Just t) = show t
493
494
495
496 -- | (Un)pickle the \<time_stamp\> element format to/from a 'UTCTime'.
497 -- The time_stamp elements look something like,
498 --
499 -- \<time_stamp\> January 6, 2014, at 10:11 PM ET \</time_stamp\>
500 --
501 -- TSN doesn't provide a proper time zone name, only \"ET\" for
502 -- \"Eastern Time\". But \"Eastern Time\" changes throughout the
503 -- year, depending on one's location, for daylight-savings
504 -- time. It's really not any more useful to be off by one hour than
505 -- it is to be off by 5 hours, so rather than guess at EDT/EST, we
506 -- just store the timestamp as UTC.
507 --
508 -- Examples:
509 --
510 -- >>> let tn = text_node " January 6, 2014, at 10:11 PM ET "
511 -- >>> let (Just tstamp) = unpickleDoc xp_time_stamp tn
512 -- >>> tstamp
513 -- 2014-01-06 22:11:00 UTC
514 -- >>> pickleDoc xp_time_stamp tstamp
515 -- NTree (XTag "/" []) [NTree (XText " January 6, 2014, at 10:11 PM ET ") []]
516 --
517 xp_time_stamp :: PU UTCTime
518 xp_time_stamp =
519 (parse_time_stamp, from_time_stamp) `xpWrapMaybe` xpText
520 where
521 -- | We have to re-pad the time_stamp_format with a leading and
522 -- trailing space; see the documentation of 'time_stamp_format'
523 -- for more information.
524 from_time_stamp :: UTCTime -> String
525 from_time_stamp =
526 formatTime defaultTimeLocale (" " ++ time_stamp_format ++ " ")
527
528
529
530 -- | (Un)pickle an ambiguous 12-hour AM/PM time, which is ambiguous
531 -- because it's missing the AM/PM part.
532 --
533 -- Examples:
534 --
535 -- >>> let tn = text_node "8:00"
536 -- >>> unpickleDoc xp_ambiguous_time tn
537 -- Just 1970-01-01 08:00:00 UTC
538 --
539 xp_ambiguous_time :: PU UTCTime
540 xp_ambiguous_time =
541 (to_time, from_time) `xpWrapMaybe` xpText
542 where
543 ambiguous_time_format :: String
544 ambiguous_time_format = "%-I:%M"
545
546 to_time :: String -> Maybe UTCTime
547 to_time = parseTime defaultTimeLocale ambiguous_time_format
548
549 from_time :: UTCTime -> String
550 from_time =
551 formatTime defaultTimeLocale ambiguous_time_format
552
553
554 -- | Pickle a date value from a \<date\> element as they appear in the
555 -- early lines. This is a particularly wacky format, but then so is
556 -- the associated time (see 'xp_ambiguous_time').
557 --
558 -- Examples:
559 --
560 -- >>> let tn = text_node "SUNDAY, MAY 25TH (05/25/2014)"
561 -- >>> let (Just result) = unpickleDoc xp_early_line_date tn
562 -- >>> result
563 -- 2014-05-25 00:00:00 UTC
564 -- >>> pickleDoc xp_early_line_date result
565 -- NTree (XTag "/" []) [NTree (XText "SUNDAY, MAY 25TH (05/25/2014)") []]
566 --
567 -- >>> let tn = text_node "SATURDAY, JUNE 7TH (06/07/2014)"
568 -- >>> let (Just result) = unpickleDoc xp_early_line_date tn
569 -- >>> result
570 -- 2014-06-07 00:00:00 UTC
571 -- >>> pickleDoc xp_early_line_date result
572 -- NTree (XTag "/" []) [NTree (XText "SATURDAY, JUNE 7TH (06/07/2014)") []]
573 --
574 xp_early_line_date :: PU UTCTime
575 xp_early_line_date =
576 (to_time, from_time) `xpWrapMaybe` xpText
577 where
578 -- | We need to create our own time locale that talks IN ALL CAPS.
579 -- Actually, 'parseTime' doesn't seem to care about the
580 -- case. But when we spit it back out again ('formatTime'),
581 -- we'll want it to be in all caps.
582 --
583 caps_time_locale :: TimeLocale
584 caps_time_locale =
585 defaultTimeLocale { wDays = caps_days, months = caps_months }
586
587 caps_days :: [(String,String)]
588 caps_days = map both_to_upper (wDays defaultTimeLocale)
589
590 caps_months :: [(String,String)]
591 caps_months = map both_to_upper (months defaultTimeLocale)
592
593 both_to_upper :: (String,String) -> (String,String)
594 both_to_upper (s1,s2) = (map toUpper s1, map toUpper s2)
595
596 wacko_date_formats :: [String]
597 wacko_date_formats =
598 ["%A, %B %-d" ++ suffix ++ " (" ++ date_format_padded ++ ")" |
599 suffix <- ["ST", "ND", "RD","TH"] ]
600
601 to_time :: String -> Maybe UTCTime
602 to_time s =
603 listToMaybe $ catMaybes possible_parses
604 where
605 possible_parses = [ parseTime caps_time_locale fmt s |
606 fmt <- wacko_date_formats ]
607
608 from_time :: UTCTime -> String
609 from_time t =
610 formatTime caps_time_locale fmt t
611 where
612 upper_suffix = map toUpper (date_suffix t)
613 fmt = "%A, %B %-d" ++ upper_suffix ++ " (" ++ date_format_padded ++ ")"
614
615
616 -- | This is a replacement for @xpOption xpFoo@ within an 'xpAttr'.
617 -- There's a bug <https://github.com/UweSchmidt/hxt/issues/39> in
618 -- newer versions of HXT that prevents us from using the usual
619 -- 'xpOption' solution, so this is our stopgap. It should work on
620 -- any type that can be unpickled with a plain read/show.
621 --
622 xp_attr_option :: forall a. (Read a, Show a) => PU (Maybe a)
623 xp_attr_option =
624 (to_a, from_a) `xpWrap` xpText
625 where
626 to_a :: String -> Maybe a
627 to_a = readMaybe
628
629 from_a :: Maybe a -> String
630 from_a Nothing = ""
631 from_a (Just x) = show x
632
633
634 -- | Create an 'XmlTree' containing only the given text. This is
635 -- useful for testing (un)picklers, where we don't want to have to
636 -- bother to create a dummy XML document.
637 --
638 -- Examples:
639 --
640 -- >>> text_node "8:00"
641 -- NTree (XText "8:00") []
642 --
643 text_node :: String -> XmlTree
644 text_node s = NTree (XText s) []
645
646
647
648 --
649 -- * Tasty Tests
650 --
651
652 -- | A list of all tests for this module. This primary exists to
653 -- eliminate the unused import/export warnings for 'unpickleDoc' and
654 -- 'text_node' which are otherwise only used in the doctests.
655 --
656 pickler_tests :: TestTree
657 pickler_tests =
658 testGroup
659 "Pickler tests"
660 [ test_pickle_of_unpickle_is_identity ]
661
662
663 -- | If we unpickle something and then pickle it, we should wind up
664 -- with the same thing we started with (plus an additional root
665 -- element).
666 --
667 test_pickle_of_unpickle_is_identity :: TestTree
668 test_pickle_of_unpickle_is_identity =
669 testCase "pickle composed with unpickle is (almost) the identity" $ do
670 let tn = text_node "8:00"
671 let (Just utctime) = unpickleDoc xp_ambiguous_time tn
672 let actual = pickleDoc xp_ambiguous_time utctime
673 let expected = NTree (XTag (mkName "/") []) [tn]
674 actual @?= expected