]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/TSN/Picklers.hs
Add the 'xp_attr_option' pickler and use it to fix tests broken by HXT.
[dead/htsn-import.git] / src / TSN / Picklers.hs
1 {-# LANGUAGE ScopedTypeVariables #-}
2
3 -- | (Un)picklers for data types present in The Sports Network XML
4 -- feed.
5 --
6 module TSN.Picklers (
7 pickler_tests,
8 xp_ambiguous_time,
9 xp_attr_option,
10 xp_date,
11 xp_date_padded,
12 xp_datetime,
13 xp_early_line_date,
14 xp_earnings,
15 xp_fracpart_only_double,
16 xp_gamedate,
17 xp_tba_time,
18 xp_time,
19 xp_time_dots,
20 xp_time_stamp )
21 where
22
23 -- System imports.
24 import Data.Char ( toUpper )
25 import Data.List ( intercalate )
26 import Data.List.Split ( chunksOf )
27 import Data.Maybe ( catMaybes, listToMaybe )
28 import Data.String.Utils ( replace )
29 import Data.Time.Clock ( UTCTime )
30 import Data.Time.Format ( formatTime, parseTime )
31 import Data.Tree.NTree.TypeDefs ( NTree(..) )
32 import System.Locale ( TimeLocale( wDays, months ), defaultTimeLocale )
33 import Test.Tasty ( TestTree, testGroup )
34 import Test.Tasty.HUnit ( (@?=), testCase )
35 import Text.Read ( readMaybe )
36 import Text.XML.HXT.Arrow.Pickle (
37 xpText,
38 xpWrap,
39 xpWrapMaybe )
40 import Text.XML.HXT.Arrow.Pickle.Xml ( PU )
41 import Text.XML.HXT.Core (
42 XmlTree,
43 XNode( XTag, XText ),
44 mkName,
45 pickleDoc,
46 unpickleDoc )
47
48 -- Local imports.
49 import TSN.Parse (
50 parse_time_stamp,
51 time_format,
52 time_stamp_format )
53
54
55 -- | The format string for a base date in m/d/yyyy format. The
56 -- day/month are not padded at all. This will match for example,
57 --
58 -- * 2\/15\/1983
59 --
60 -- * 1\/1\/0000
61 --
62 date_format :: String
63 date_format = "%-m/%-d/%Y"
64
65
66 -- | The format string for a base date in mm/dd/yyyy format. The
67 -- day/month are padded to two characters with zeros. This will
68 -- match for example,
69 --
70 -- * 02\/15\/1983
71 --
72 -- * 01\/01\/0000
73 --
74 date_format_padded :: String
75 date_format_padded = "%0m/%0d/%Y"
76
77
78 -- | (Un)pickle a UTCTime without the time portion.
79 --
80 -- /Examples/:
81 --
82 -- This should parse:
83 --
84 -- >>> let tn = text_node "2/15/1983"
85 -- >>> unpickleDoc xp_date tn
86 -- Just 1983-02-15 00:00:00 UTC
87 --
88 -- But for some reason, it can also parse a leading zero in the
89 -- month. Whatever. This isn't required behavior.
90 --
91 -- >>> let tn = text_node "02/15/1983"
92 -- >>> unpickleDoc xp_date tn
93 -- Just 1983-02-15 00:00:00 UTC
94 --
95 xp_date :: PU UTCTime
96 xp_date =
97 (to_date, from_date) `xpWrapMaybe` xpText
98 where
99 to_date :: String -> Maybe UTCTime
100 to_date = parseTime defaultTimeLocale date_format
101
102 from_date :: UTCTime -> String
103 from_date = formatTime defaultTimeLocale date_format
104
105
106 -- | (Un)pickle a UTCTime without the time portion. The day/month are
107 -- padded to two characters with zeros.
108 --
109 -- Examples:
110 --
111 -- >>> let tn = text_node "02/15/1983"
112 -- >>> unpickleDoc xp_date_padded tn
113 -- Just 1983-02-15 00:00:00 UTC
114 --
115 -- >>> let tn = text_node "06/07/2014"
116 -- >>> unpickleDoc xp_date_padded tn
117 -- Just 2014-06-07 00:00:00 UTC
118 --
119 xp_date_padded :: PU UTCTime
120 xp_date_padded =
121 (to_date, from_date) `xpWrapMaybe` xpText
122 where
123 to_date :: String -> Maybe UTCTime
124 to_date = parseTime defaultTimeLocale date_format_padded
125
126 from_date :: UTCTime -> String
127 from_date = formatTime defaultTimeLocale date_format_padded
128
129
130
131 -- | Format a number as a string using a comma as the thousands
132 -- separator.
133 --
134 -- Examples:
135 --
136 -- >>> format_commas 0
137 -- "0"
138 -- >>> format_commas 10
139 -- "10"
140 -- >>> format_commas 100
141 -- "100"
142 -- >>> format_commas 1000
143 -- "1,000"
144 -- >>> format_commas 10000
145 -- "10,000"
146 -- >>> format_commas 100000
147 -- "100,000"
148 -- >>> format_commas 1000000
149 -- "1,000,000"
150 --
151 format_commas :: Int -> String
152 format_commas x =
153 reverse (intercalate "," $ chunksOf 3 $ reverse $ show x)
154
155
156
157 -- | Parse \<Earnings\> from an 'AutoRaceResultsListing'. These are
158 -- essentially 'Int's, but they look like,
159 --
160 -- * \<Earnings\>336,826\</Earnings\>
161 --
162 -- * \<Earnings\>1,000,191\</Earnings\>
163 --
164 -- * \<Earnings\>TBA\</Earnings\>
165 --
166 -- Examples:
167 --
168 -- >>> let tn = text_node "1,000,191"
169 -- >>> unpickleDoc xp_earnings tn
170 -- Just (Just 1000191)
171 --
172 -- >>> let tn = text_node "TBA"
173 -- >>> unpickleDoc xp_earnings tn
174 -- Just Nothing
175 --
176 xp_earnings :: PU (Maybe Int)
177 xp_earnings =
178 (to_earnings, from_earnings) `xpWrap` xpText
179 where
180 strip_commas :: String -> String
181 strip_commas = replace "," ""
182
183 to_earnings :: String -> Maybe Int
184 to_earnings s
185 | s == "TBA" = Nothing
186 | otherwise = Just $ (read . strip_commas) s
187
188 from_earnings :: Maybe Int -> String
189 from_earnings Nothing = "TBA"
190 from_earnings (Just i) = format_commas i
191
192
193
194 -- | Pickle a 'Double' that can be missing its leading zero (for
195 -- values less than one). For example, we've seen,
196 --
197 -- <TrackLength KPH=".805">0.5</TrackLength>
198 --
199 -- Which 'xpPrim' can't handle without the leading
200 -- zero. Unfortunately there's no way pickle/unpickle can be
201 -- inverses of each other here, since \"0.5\" and \".5\" should
202 -- unpickle to the same 'Double'.
203 --
204 -- Examples:
205 --
206 -- >>> let tn = text_node "0.5"
207 -- >>> unpickleDoc xp_fracpart_only_double tn
208 -- Just 0.5
209 --
210 -- >>> let tn = text_node ".5"
211 -- >>> unpickleDoc xp_fracpart_only_double tn
212 -- Just 0.5
213 --
214 -- >>> let tn = text_node "foo"
215 -- >>> unpickleDoc xp_fracpart_only_double tn
216 -- Nothing
217 --
218 xp_fracpart_only_double :: PU Double
219 xp_fracpart_only_double =
220 (to_double, from_double) `xpWrapMaybe` xpText
221 where
222 -- | Convert a 'String' to a 'Double', maybe. We always prepend a
223 -- zero, since it will fix the fraction-only values, and not hurt
224 -- the ones that already have a leading integer.
225 to_double :: String -> Maybe Double
226 to_double s = readMaybe ("0" ++ s)
227
228 from_double :: Double -> String
229 from_double = show
230
231
232
233 -- | (Un)pickle an unpadded 'UTCTime'. Used for example on the
234 -- \<RaceDate\> elements in an 'AutoRaceResults' message.
235 --
236 -- Examples:
237 --
238 -- >>> let tn = text_node "6/1/2014 1:00:00 PM"
239 -- >>> unpickleDoc xp_datetime tn
240 -- Just 2014-06-01 13:00:00 UTC
241 --
242 -- >>> let tn = text_node "5/24/2014 2:45:00 PM"
243 -- >>> unpickleDoc xp_datetime tn
244 -- Just 2014-05-24 14:45:00 UTC
245 --
246 -- Padded! For some reason it works with only one zero in front. I
247 -- dunno man. NOT required (or even desired?) behavior.
248 --
249 -- >>> let tn = text_node "05/24/2014 2:45:00 PM"
250 -- >>> unpickleDoc xp_datetime tn
251 -- Just 2014-05-24 14:45:00 UTC
252 --
253 xp_datetime :: PU UTCTime
254 xp_datetime =
255 (to_datetime, from_datetime) `xpWrapMaybe` xpText
256 where
257 format = date_format ++ " " ++ "%-I:%M:%S %p"
258
259 to_datetime :: String -> Maybe UTCTime
260 to_datetime = parseTime defaultTimeLocale format
261
262 from_datetime :: UTCTime -> String
263 from_datetime = formatTime defaultTimeLocale format
264
265
266
267 -- | Takes a 'UTCTime', and returns the English suffix that would be
268 -- appropriate after the day of the month. For example, if we have a
269 -- UTCTime representing Christmas, this would return \"th\" because
270 -- \"th\" is the right suffix of \"December 25th\".
271 --
272 -- Examples:
273 --
274 -- >>> import Data.Maybe ( fromJust )
275 -- >>> :{
276 -- let parse_date :: String -> Maybe UTCTime
277 -- parse_date = parseTime defaultTimeLocale date_format
278 -- :}
279 --
280 -- >>> let dates = [ "1/" ++ (d : "/1970") | d <- ['1'..'9'] ]
281 -- >>> let suffixes = map (date_suffix . fromJust . parse_date) dates
282 -- >>> suffixes
283 -- ["st","nd","rd","th","th","th","th","th","th"]
284 --
285 date_suffix :: UTCTime -> String
286 date_suffix t =
287 case (reverse daystr) of
288 [] -> []
289 ('1':_) -> "st"
290 ('2':_) -> "nd"
291 ('3':_) -> "rd"
292 _ -> "th"
293 where
294 daystr = formatTime defaultTimeLocale "%d" t
295
296
297 -- | (Un)pickle a UTCTime from a weather forecast's gamedate. Example
298 -- input looks like,
299 --
300 -- When unpickling we get rid of the suffixes \"st\", \"nd\", \"rd\", and
301 -- \"th\". During pickling, we add them back based on the last digit
302 -- of the date.
303 --
304 -- Examples:
305 --
306 -- >>> let tn = text_node "Monday, December 30th"
307 -- >>> let (Just gd) = unpickleDoc xp_gamedate tn
308 -- >>> gd
309 -- 1970-12-30 00:00:00 UTC
310 -- >>> pickleDoc xp_gamedate gd
311 -- NTree (XTag "/" []) [NTree (XText "Wednesday, December 30th") []]
312 --
313 xp_gamedate :: PU UTCTime
314 xp_gamedate =
315 (to_gamedate, from_gamedate) `xpWrapMaybe` xpText
316 where
317 format = "%A, %B %-d"
318
319 to_gamedate :: String -> Maybe UTCTime
320 to_gamedate s =
321 parseTime defaultTimeLocale format s'
322 where
323 s' = case (reverse s) of
324 (c2:c1:cs) -> let suffix = [c1,c2]
325 in
326 if suffix `elem` ["st","nd","rd","th"]
327 then reverse cs
328 else s -- Unknown suffix, leave it alone.
329
330 _ -> s -- The String is less than two characters long,
331 -- leave it alone.
332
333
334 from_gamedate :: UTCTime -> String
335 from_gamedate d = s ++ (date_suffix d)
336 where
337 s = formatTime defaultTimeLocale format d
338
339
340
341
342
343
344
345 -- | (Un)pickle a UTCTime without the date portion. Doesn't work if
346 -- the fields aren't zero-padded to two characters.
347 --
348 -- /Examples/:
349 --
350 -- Padded, should work:
351 --
352 -- >>> let tn = text_node "04:35 PM"
353 -- >>> unpickleDoc xp_time tn
354 -- Just 1970-01-01 16:35:00 UTC
355 --
356 -- Unpadded, should fail:
357 --
358 -- >>> let tn = text_node "4:35 PM"
359 -- >>> unpickleDoc xp_time tn
360 -- Nothing
361 --
362 xp_time :: PU UTCTime
363 xp_time =
364 (to_time, from_time) `xpWrapMaybe` xpText
365 where
366 to_time :: String -> Maybe UTCTime
367 to_time = parseTime defaultTimeLocale time_format
368
369 from_time :: UTCTime -> String
370 from_time = formatTime defaultTimeLocale time_format
371
372
373 -- | (Un)pickle a UTCTime without the date portion. This differs from
374 -- 'xp_time' in that it uses periods in the AM/PM part, i.e. \"A.M.\"
375 -- and \"P.M.\" It also doesn't use padding for the \"hours\" part.
376 --
377 -- /Examples/:
378 --
379 -- A standard example of the correct form:
380 --
381 -- >>> let tn = text_node "11:30 A.M."
382 -- >>> let (Just result) = unpickleDoc xp_time_dots tn
383 -- >>> result
384 -- 1970-01-01 11:30:00 UTC
385 -- >>> pickleDoc xp_time_dots result
386 -- NTree (XTag "/" []) [NTree (XText "11:30 A.M.") []]
387 --
388 -- Another miracle, it still parses with a leading zero!
389 --
390 -- >>> let tn = text_node "01:30 A.M."
391 -- >>> unpickleDoc xp_time_dots tn
392 -- Just 1970-01-01 01:30:00 UTC
393 --
394 xp_time_dots :: PU UTCTime
395 xp_time_dots =
396 (to_time, from_time) `xpWrapMaybe` xpText
397 where
398 -- | The hours arent padded with zeros.
399 nopad_time_format :: String
400 nopad_time_format = "%-I:%M %p"
401
402 to_time :: String -> Maybe UTCTime
403 to_time = (parseTime defaultTimeLocale nopad_time_format) . (replace "." "")
404
405 from_time :: UTCTime -> String
406 from_time t =
407 replace "AM" "A.M." (replace "PM" "P.M." s)
408 where
409 s = formatTime defaultTimeLocale nopad_time_format t
410
411
412 -- | (Un)pickle a UTCTime without the date portion, allowing for a
413 -- value of \"TBA\" (which gets translated to 'Nothing').
414 --
415 -- /Examples/:
416 --
417 -- A failed parse will return 'Nothing':
418 --
419 -- >>> let tn = text_node "YO"
420 -- >>> unpickleDoc xp_tba_time tn
421 -- Just Nothing
422 --
423 -- And so will parsing a \"TBA\":
424 --
425 -- >>> let tn = text_node "TBA"
426 -- >>> unpickleDoc xp_tba_time tn
427 -- Just Nothing
428 --
429 -- But re-pickling 'Nothing' gives only \"TBA\":
430 --
431 -- >>> pickleDoc xp_tba_time Nothing
432 -- NTree (XTag "/" []) [NTree (XText "TBA") []]
433 --
434 -- A normal time is also parsed successfully, of course:
435 --
436 -- >>> let tn = text_node "08:10 PM"
437 -- >>> unpickleDoc xp_tba_time tn
438 -- Just (Just 1970-01-01 20:10:00 UTC)
439 --
440 xp_tba_time :: PU (Maybe UTCTime)
441 xp_tba_time =
442 (to_time, from_time) `xpWrap` xpText
443 where
444 to_time :: String -> Maybe UTCTime
445 to_time s
446 | s == "TBA" = Nothing
447 | otherwise = parseTime defaultTimeLocale time_format s
448
449 from_time :: Maybe UTCTime -> String
450 from_time Nothing = "TBA"
451 from_time (Just t) = formatTime defaultTimeLocale time_format t
452
453
454
455 -- | (Un)pickle the \<time_stamp\> element format to/from a 'UTCTime'.
456 -- The time_stamp elements look something like,
457 --
458 -- \<time_stamp\> January 6, 2014, at 10:11 PM ET \</time_stamp\>
459 --
460 -- TSN doesn't provide a proper time zone name, only \"ET\" for
461 -- \"Eastern Time\". But \"Eastern Time\" changes throughout the
462 -- year, depending on one's location, for daylight-savings
463 -- time. It's really not any more useful to be off by one hour than
464 -- it is to be off by 5 hours, so rather than guess at EDT/EST, we
465 -- just store the timestamp as UTC.
466 --
467 -- Examples:
468 --
469 -- >>> let tn = text_node " January 6, 2014, at 10:11 PM ET "
470 -- >>> let (Just tstamp) = unpickleDoc xp_time_stamp tn
471 -- >>> tstamp
472 -- 2014-01-06 22:11:00 UTC
473 -- >>> pickleDoc xp_time_stamp tstamp
474 -- NTree (XTag "/" []) [NTree (XText " January 6, 2014, at 10:11 PM ET ") []]
475 --
476 xp_time_stamp :: PU UTCTime
477 xp_time_stamp =
478 (parse_time_stamp, from_time_stamp) `xpWrapMaybe` xpText
479 where
480 -- | We have to re-pad the time_stamp_format with a leading and
481 -- trailing space; see the documentation of 'time_stamp_format'
482 -- for more information.
483 from_time_stamp :: UTCTime -> String
484 from_time_stamp =
485 formatTime defaultTimeLocale (" " ++ time_stamp_format ++ " ")
486
487
488
489 -- | (Un)pickle an ambiguous 12-hour AM/PM time, which is ambiguous
490 -- because it's missing the AM/PM part.
491 --
492 -- Examples:
493 --
494 -- >>> let tn = text_node "8:00"
495 -- >>> unpickleDoc xp_ambiguous_time tn
496 -- Just 1970-01-01 08:00:00 UTC
497 --
498 xp_ambiguous_time :: PU UTCTime
499 xp_ambiguous_time =
500 (to_time, from_time) `xpWrapMaybe` xpText
501 where
502 ambiguous_time_format :: String
503 ambiguous_time_format = "%-I:%M"
504
505 to_time :: String -> Maybe UTCTime
506 to_time = parseTime defaultTimeLocale ambiguous_time_format
507
508 from_time :: UTCTime -> String
509 from_time =
510 formatTime defaultTimeLocale ambiguous_time_format
511
512
513 -- | Pickle a date value from a \<date\> element as they appear in the
514 -- early lines. This is a particularly wacky format, but then so is
515 -- the associated time (see 'xp_ambiguous_time').
516 --
517 -- Examples:
518 --
519 -- >>> let tn = text_node "SUNDAY, MAY 25TH (05/25/2014)"
520 -- >>> let (Just result) = unpickleDoc xp_early_line_date tn
521 -- >>> result
522 -- 2014-05-25 00:00:00 UTC
523 -- >>> pickleDoc xp_early_line_date result
524 -- NTree (XTag "/" []) [NTree (XText "SUNDAY, MAY 25TH (05/25/2014)") []]
525 --
526 -- >>> let tn = text_node "SATURDAY, JUNE 7TH (06/07/2014)"
527 -- >>> let (Just result) = unpickleDoc xp_early_line_date tn
528 -- >>> result
529 -- 2014-06-07 00:00:00 UTC
530 -- >>> pickleDoc xp_early_line_date result
531 -- NTree (XTag "/" []) [NTree (XText "SATURDAY, JUNE 7TH (06/07/2014)") []]
532 --
533 xp_early_line_date :: PU UTCTime
534 xp_early_line_date =
535 (to_time, from_time) `xpWrapMaybe` xpText
536 where
537 -- | We need to create our own time locale that talks IN ALL CAPS.
538 -- Actually, 'parseTime' doesn't seem to care about the
539 -- case. But when we spit it back out again ('formatTime'),
540 -- we'll want it to be in all caps.
541 --
542 caps_time_locale :: TimeLocale
543 caps_time_locale =
544 defaultTimeLocale { wDays = caps_days, months = caps_months }
545
546 caps_days :: [(String,String)]
547 caps_days = map both_to_upper (wDays defaultTimeLocale)
548
549 caps_months :: [(String,String)]
550 caps_months = map both_to_upper (months defaultTimeLocale)
551
552 both_to_upper :: (String,String) -> (String,String)
553 both_to_upper (s1,s2) = (map toUpper s1, map toUpper s2)
554
555 wacko_date_formats :: [String]
556 wacko_date_formats =
557 ["%A, %B %-d" ++ suffix ++ " (" ++ date_format_padded ++ ")" |
558 suffix <- ["ST", "ND", "RD","TH"] ]
559
560 to_time :: String -> Maybe UTCTime
561 to_time s =
562 listToMaybe $ catMaybes possible_parses
563 where
564 possible_parses = [ parseTime caps_time_locale fmt s |
565 fmt <- wacko_date_formats ]
566
567 from_time :: UTCTime -> String
568 from_time t =
569 formatTime caps_time_locale fmt t
570 where
571 upper_suffix = map toUpper (date_suffix t)
572 fmt = "%A, %B %-d" ++ upper_suffix ++ " (" ++ date_format_padded ++ ")"
573
574
575 -- | This is a replacement for @xpOption xpFoo@ within an 'xpAttr'.
576 -- There's a bug <https://github.com/UweSchmidt/hxt/issues/39> in
577 -- newer versions of HXT that prevents us from using the usual
578 -- 'xpOption' solution, so this is our stopgap. It should work on
579 -- any type that can be unpickled with a plain read/show.
580 --
581 xp_attr_option :: forall a. (Read a, Show a) => PU (Maybe a)
582 xp_attr_option =
583 (to_a, from_a) `xpWrap` xpText
584 where
585 to_a :: String -> Maybe a
586 to_a = readMaybe
587
588 from_a :: Maybe a -> String
589 from_a Nothing = ""
590 from_a (Just x) = show x
591
592
593 -- | Create an 'XmlTree' containing only the given text. This is
594 -- useful for testing (un)picklers, where we don't want to have to
595 -- bother to create a dummy XML document.
596 --
597 -- Examples:
598 --
599 -- >>> text_node "8:00"
600 -- NTree (XText "8:00") []
601 --
602 text_node :: String -> XmlTree
603 text_node s = NTree (XText s) []
604
605
606
607 --
608 -- * Tasty Tests
609 --
610
611 -- | A list of all tests for this module. This primary exists to
612 -- eliminate the unused import/export warnings for 'unpickleDoc' and
613 -- 'text_node' which are otherwise only used in the doctests.
614 --
615 pickler_tests :: TestTree
616 pickler_tests =
617 testGroup
618 "Pickler tests"
619 [ test_pickle_of_unpickle_is_identity ]
620
621
622 -- | If we unpickle something and then pickle it, we should wind up
623 -- with the same thing we started with (plus an additional root
624 -- element).
625 --
626 test_pickle_of_unpickle_is_identity :: TestTree
627 test_pickle_of_unpickle_is_identity =
628 testCase "pickle composed with unpickle is (almost) the identity" $ do
629 let tn = text_node "8:00"
630 let (Just utctime) = unpickleDoc xp_ambiguous_time tn
631 let actual = pickleDoc xp_ambiguous_time utctime
632 let expected = NTree (XTag (mkName "/") []) [tn]
633 actual @?= expected