]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/TSN/Picklers.hs
5e620699bbfdc43e53181511493be47f433349ba
[dead/htsn-import.git] / src / TSN / Picklers.hs
1 -- | (Un)picklers for data types present in The Sports Network XML
2 -- feed.
3 --
4 module TSN.Picklers (
5 pickler_tests,
6 xp_ambiguous_time,
7 xp_date,
8 xp_date_padded,
9 xp_datetime,
10 xp_early_line_date,
11 xp_earnings,
12 xp_fracpart_only_double,
13 xp_gamedate,
14 xp_tba_time,
15 xp_time,
16 xp_time_dots,
17 xp_time_stamp )
18 where
19
20 -- System imports.
21 import Data.Char ( toUpper )
22 import Data.List ( intercalate )
23 import Data.List.Split ( chunksOf )
24 import Data.Maybe ( catMaybes, listToMaybe )
25 import Data.String.Utils ( replace )
26 import Data.Time.Clock ( UTCTime )
27 import Data.Time.Format ( formatTime, parseTime )
28 import Data.Tree.NTree.TypeDefs ( NTree(..) )
29 import System.Locale ( TimeLocale( wDays, months ), defaultTimeLocale )
30 import Test.Tasty ( TestTree, testGroup )
31 import Test.Tasty.HUnit ( (@?=), testCase )
32 import Text.Read ( readMaybe )
33 import Text.XML.HXT.Arrow.Pickle (
34 xpText,
35 xpWrap,
36 xpWrapMaybe )
37 import Text.XML.HXT.Arrow.Pickle.Xml ( PU )
38 import Text.XML.HXT.Core (
39 XmlTree,
40 XNode( XTag, XText ),
41 mkName,
42 pickleDoc,
43 unpickleDoc )
44
45 -- Local imports.
46 import TSN.Parse (
47 parse_time_stamp,
48 time_format,
49 time_stamp_format )
50
51
52 -- | The format string for a base date in m/d/yyyy format. The
53 -- day/month are not padded at all. This will match for example,
54 --
55 -- * 2\/15\/1983
56 --
57 -- * 1\/1\/0000
58 --
59 date_format :: String
60 date_format = "%-m/%-d/%Y"
61
62
63 -- | The format string for a base date in mm/dd/yyyy format. The
64 -- day/month are padded to two characters with zeros. This will
65 -- match for example,
66 --
67 -- * 02\/15\/1983
68 --
69 -- * 01\/01\/0000
70 --
71 date_format_padded :: String
72 date_format_padded = "%0m/%0d/%Y"
73
74
75 -- | (Un)pickle a UTCTime without the time portion.
76 --
77 -- /Examples/:
78 --
79 -- This should parse:
80 --
81 -- >>> let tn = text_node "2/15/1983"
82 -- >>> unpickleDoc xp_date tn
83 -- Just 1983-02-15 00:00:00 UTC
84 --
85 -- But for some reason, it can also parse a leading zero in the
86 -- month. Whatever. This isn't required behavior.
87 --
88 -- >>> let tn = text_node "02/15/1983"
89 -- >>> unpickleDoc xp_date tn
90 -- Just 1983-02-15 00:00:00 UTC
91 --
92 xp_date :: PU UTCTime
93 xp_date =
94 (to_date, from_date) `xpWrapMaybe` xpText
95 where
96 to_date :: String -> Maybe UTCTime
97 to_date = parseTime defaultTimeLocale date_format
98
99 from_date :: UTCTime -> String
100 from_date = formatTime defaultTimeLocale date_format
101
102
103 -- | (Un)pickle a UTCTime without the time portion. The day/month are
104 -- padded to two characters with zeros.
105 --
106 -- Examples:
107 --
108 -- >>> let tn = text_node "02/15/1983"
109 -- >>> unpickleDoc xp_date_padded tn
110 -- Just 1983-02-15 00:00:00 UTC
111 --
112 -- >>> let tn = text_node "06/07/2014"
113 -- >>> unpickleDoc xp_date_padded tn
114 -- Just 2014-06-07 00:00:00 UTC
115 --
116 xp_date_padded :: PU UTCTime
117 xp_date_padded =
118 (to_date, from_date) `xpWrapMaybe` xpText
119 where
120 to_date :: String -> Maybe UTCTime
121 to_date = parseTime defaultTimeLocale date_format_padded
122
123 from_date :: UTCTime -> String
124 from_date = formatTime defaultTimeLocale date_format_padded
125
126
127
128 -- | Format a number as a string using a comma as the thousands
129 -- separator.
130 --
131 -- Examples:
132 --
133 -- >>> format_commas 0
134 -- "0"
135 -- >>> format_commas 10
136 -- "10"
137 -- >>> format_commas 100
138 -- "100"
139 -- >>> format_commas 1000
140 -- "1,000"
141 -- >>> format_commas 10000
142 -- "10,000"
143 -- >>> format_commas 100000
144 -- "100,000"
145 -- >>> format_commas 1000000
146 -- "1,000,000"
147 --
148 format_commas :: Int -> String
149 format_commas x =
150 reverse (intercalate "," $ chunksOf 3 $ reverse $ show x)
151
152
153
154 -- | Parse \<Earnings\> from an 'AutoRaceResultsListing'. These are
155 -- essentially 'Int's, but they look like,
156 --
157 -- * \<Earnings\>336,826\</Earnings\>
158 --
159 -- * \<Earnings\>1,000,191\</Earnings\>
160 --
161 -- * \<Earnings\>TBA\</Earnings\>
162 --
163 -- Examples:
164 --
165 -- >>> let tn = text_node "1,000,191"
166 -- >>> unpickleDoc xp_earnings tn
167 -- Just (Just 1000191)
168 --
169 -- >>> let tn = text_node "TBA"
170 -- >>> unpickleDoc xp_earnings tn
171 -- Just Nothing
172 --
173 xp_earnings :: PU (Maybe Int)
174 xp_earnings =
175 (to_earnings, from_earnings) `xpWrap` xpText
176 where
177 strip_commas :: String -> String
178 strip_commas = replace "," ""
179
180 to_earnings :: String -> Maybe Int
181 to_earnings s
182 | s == "TBA" = Nothing
183 | otherwise = Just $ (read . strip_commas) s
184
185 from_earnings :: Maybe Int -> String
186 from_earnings Nothing = "TBA"
187 from_earnings (Just i) = format_commas i
188
189
190
191 -- | Pickle a 'Double' that can be missing its leading zero (for
192 -- values less than one). For example, we've seen,
193 --
194 -- <TrackLength KPH=".805">0.5</TrackLength>
195 --
196 -- Which 'xpPrim' can't handle without the leading
197 -- zero. Unfortunately there's no way pickle/unpickle can be
198 -- inverses of each other here, since \"0.5\" and \".5\" should
199 -- unpickle to the same 'Double'.
200 --
201 -- Examples:
202 --
203 -- >>> let tn = text_node "0.5"
204 -- >>> unpickleDoc xp_fracpart_only_double tn
205 -- Just 0.5
206 --
207 -- >>> let tn = text_node ".5"
208 -- >>> unpickleDoc xp_fracpart_only_double tn
209 -- Just 0.5
210 --
211 -- >>> let tn = text_node "foo"
212 -- >>> unpickleDoc xp_fracpart_only_double tn
213 -- Nothing
214 --
215 xp_fracpart_only_double :: PU Double
216 xp_fracpart_only_double =
217 (to_double, from_double) `xpWrapMaybe` xpText
218 where
219 -- | Convert a 'String' to a 'Double', maybe. We always prepend a
220 -- zero, since it will fix the fraction-only values, and not hurt
221 -- the ones that already have a leading integer.
222 to_double :: String -> Maybe Double
223 to_double s = readMaybe ("0" ++ s)
224
225 from_double :: Double -> String
226 from_double = show
227
228
229
230 -- | (Un)pickle an unpadded 'UTCTime'. Used for example on the
231 -- \<RaceDate\> elements in an 'AutoRaceResults' message.
232 --
233 -- Examples:
234 --
235 -- >>> let tn = text_node "6/1/2014 1:00:00 PM"
236 -- >>> unpickleDoc xp_datetime tn
237 -- Just 2014-06-01 13:00:00 UTC
238 --
239 -- >>> let tn = text_node "5/24/2014 2:45:00 PM"
240 -- >>> unpickleDoc xp_datetime tn
241 -- Just 2014-05-24 14:45:00 UTC
242 --
243 -- Padded! For some reason it works with only one zero in front. I
244 -- dunno man. NOT required (or even desired?) behavior.
245 --
246 -- >>> let tn = text_node "05/24/2014 2:45:00 PM"
247 -- >>> unpickleDoc xp_datetime tn
248 -- Just 2014-05-24 14:45:00 UTC
249 --
250 xp_datetime :: PU UTCTime
251 xp_datetime =
252 (to_datetime, from_datetime) `xpWrapMaybe` xpText
253 where
254 format = date_format ++ " " ++ "%-I:%M:%S %p"
255
256 to_datetime :: String -> Maybe UTCTime
257 to_datetime = parseTime defaultTimeLocale format
258
259 from_datetime :: UTCTime -> String
260 from_datetime = formatTime defaultTimeLocale format
261
262
263
264 -- | Takes a 'UTCTime', and returns the English suffix that would be
265 -- appropriate after the day of the month. For example, if we have a
266 -- UTCTime representing Christmas, this would return \"th\" because
267 -- \"th\" is the right suffix of \"December 25th\".
268 --
269 -- Examples:
270 --
271 -- >>> import Data.Maybe ( fromJust )
272 -- >>> :{
273 -- let parse_date :: String -> Maybe UTCTime
274 -- parse_date = parseTime defaultTimeLocale date_format
275 -- :}
276 --
277 -- >>> let dates = [ "1/" ++ (d : "/1970") | d <- ['1'..'9'] ]
278 -- >>> let suffixes = map (date_suffix . fromJust . parse_date) dates
279 -- >>> suffixes
280 -- ["st","nd","rd","th","th","th","th","th","th"]
281 --
282 date_suffix :: UTCTime -> String
283 date_suffix t =
284 case (reverse daystr) of
285 [] -> []
286 ('1':_) -> "st"
287 ('2':_) -> "nd"
288 ('3':_) -> "rd"
289 _ -> "th"
290 where
291 daystr = formatTime defaultTimeLocale "%d" t
292
293
294 -- | (Un)pickle a UTCTime from a weather forecast's gamedate. Example
295 -- input looks like,
296 --
297 -- When unpickling we get rid of the suffixes \"st\", \"nd\", \"rd\", and
298 -- \"th\". During pickling, we add them back based on the last digit
299 -- of the date.
300 --
301 -- Examples:
302 --
303 -- >>> let tn = text_node "Monday, December 30th"
304 -- >>> let (Just gd) = unpickleDoc xp_gamedate tn
305 -- >>> gd
306 -- 1970-12-30 00:00:00 UTC
307 -- >>> pickleDoc xp_gamedate gd
308 -- NTree (XTag "/" []) [NTree (XText "Wednesday, December 30th") []]
309 --
310 xp_gamedate :: PU UTCTime
311 xp_gamedate =
312 (to_gamedate, from_gamedate) `xpWrapMaybe` xpText
313 where
314 format = "%A, %B %-d"
315
316 to_gamedate :: String -> Maybe UTCTime
317 to_gamedate s =
318 parseTime defaultTimeLocale format s'
319 where
320 s' = case (reverse s) of
321 (c2:c1:cs) -> let suffix = [c1,c2]
322 in
323 if suffix `elem` ["st","nd","rd","th"]
324 then reverse cs
325 else s -- Unknown suffix, leave it alone.
326
327 _ -> s -- The String is less than two characters long,
328 -- leave it alone.
329
330
331 from_gamedate :: UTCTime -> String
332 from_gamedate d = s ++ (date_suffix d)
333 where
334 s = formatTime defaultTimeLocale format d
335
336
337
338
339
340
341
342 -- | (Un)pickle a UTCTime without the date portion. Doesn't work if
343 -- the fields aren't zero-padded to two characters.
344 --
345 -- /Examples/:
346 --
347 -- Padded, should work:
348 --
349 -- >>> let tn = text_node "04:35 PM"
350 -- >>> unpickleDoc xp_time tn
351 -- Just 1970-01-01 16:35:00 UTC
352 --
353 -- Unpadded, should fail:
354 --
355 -- >>> let tn = text_node "4:35 PM"
356 -- >>> unpickleDoc xp_time tn
357 -- Nothing
358 --
359 xp_time :: PU UTCTime
360 xp_time =
361 (to_time, from_time) `xpWrapMaybe` xpText
362 where
363 to_time :: String -> Maybe UTCTime
364 to_time = parseTime defaultTimeLocale time_format
365
366 from_time :: UTCTime -> String
367 from_time = formatTime defaultTimeLocale time_format
368
369
370 -- | (Un)pickle a UTCTime without the date portion. This differs from
371 -- 'xp_time' in that it uses periods in the AM/PM part, i.e. \"A.M.\"
372 -- and \"P.M.\" It also doesn't use padding for the \"hours\" part.
373 --
374 -- /Examples/:
375 --
376 -- A standard example of the correct form:
377 --
378 -- >>> let tn = text_node "11:30 A.M."
379 -- >>> let (Just result) = unpickleDoc xp_time_dots tn
380 -- >>> result
381 -- 1970-01-01 11:30:00 UTC
382 -- >>> pickleDoc xp_time_dots result
383 -- NTree (XTag "/" []) [NTree (XText "11:30 A.M.") []]
384 --
385 -- Another miracle, it still parses with a leading zero!
386 --
387 -- >>> let tn = text_node "01:30 A.M."
388 -- >>> unpickleDoc xp_time_dots tn
389 -- Just 1970-01-01 01:30:00 UTC
390 --
391 xp_time_dots :: PU UTCTime
392 xp_time_dots =
393 (to_time, from_time) `xpWrapMaybe` xpText
394 where
395 -- | The hours arent padded with zeros.
396 nopad_time_format :: String
397 nopad_time_format = "%-I:%M %p"
398
399 to_time :: String -> Maybe UTCTime
400 to_time = (parseTime defaultTimeLocale nopad_time_format) . (replace "." "")
401
402 from_time :: UTCTime -> String
403 from_time t =
404 replace "AM" "A.M." (replace "PM" "P.M." s)
405 where
406 s = formatTime defaultTimeLocale nopad_time_format t
407
408
409 -- | (Un)pickle a UTCTime without the date portion, allowing for a
410 -- value of \"TBA\" (which gets translated to 'Nothing').
411 --
412 -- /Examples/:
413 --
414 -- A failed parse will return 'Nothing':
415 --
416 -- >>> let tn = text_node "YO"
417 -- >>> unpickleDoc xp_tba_time tn
418 -- Just Nothing
419 --
420 -- And so will parsing a \"TBA\":
421 --
422 -- >>> let tn = text_node "TBA"
423 -- >>> unpickleDoc xp_tba_time tn
424 -- Just Nothing
425 --
426 -- But re-pickling 'Nothing' gives only \"TBA\":
427 --
428 -- >>> pickleDoc xp_tba_time Nothing
429 -- NTree (XTag "/" []) [NTree (XText "TBA") []]
430 --
431 -- A normal time is also parsed successfully, of course:
432 --
433 -- >>> let tn = text_node "08:10 PM"
434 -- >>> unpickleDoc xp_tba_time tn
435 -- Just (Just 1970-01-01 20:10:00 UTC)
436 --
437 xp_tba_time :: PU (Maybe UTCTime)
438 xp_tba_time =
439 (to_time, from_time) `xpWrap` xpText
440 where
441 to_time :: String -> Maybe UTCTime
442 to_time s
443 | s == "TBA" = Nothing
444 | otherwise = parseTime defaultTimeLocale time_format s
445
446 from_time :: Maybe UTCTime -> String
447 from_time Nothing = "TBA"
448 from_time (Just t) = formatTime defaultTimeLocale time_format t
449
450
451
452 -- | (Un)pickle the \<time_stamp\> element format to/from a 'UTCTime'.
453 -- The time_stamp elements look something like,
454 --
455 -- \<time_stamp\> January 6, 2014, at 10:11 PM ET \</time_stamp\>
456 --
457 -- TSN doesn't provide a proper time zone name, only \"ET\" for
458 -- \"Eastern Time\". But \"Eastern Time\" changes throughout the
459 -- year, depending on one's location, for daylight-savings
460 -- time. It's really not any more useful to be off by one hour than
461 -- it is to be off by 5 hours, so rather than guess at EDT/EST, we
462 -- just store the timestamp as UTC.
463 --
464 -- Examples:
465 --
466 -- >>> let tn = text_node " January 6, 2014, at 10:11 PM ET "
467 -- >>> let (Just tstamp) = unpickleDoc xp_time_stamp tn
468 -- >>> tstamp
469 -- 2014-01-06 22:11:00 UTC
470 -- >>> pickleDoc xp_time_stamp tstamp
471 -- NTree (XTag "/" []) [NTree (XText " January 6, 2014, at 10:11 PM ET ") []]
472 --
473 xp_time_stamp :: PU UTCTime
474 xp_time_stamp =
475 (parse_time_stamp, from_time_stamp) `xpWrapMaybe` xpText
476 where
477 -- | We have to re-pad the time_stamp_format with a leading and
478 -- trailing space; see the documentation of 'time_stamp_format'
479 -- for more information.
480 from_time_stamp :: UTCTime -> String
481 from_time_stamp =
482 formatTime defaultTimeLocale (" " ++ time_stamp_format ++ " ")
483
484
485
486 -- | (Un)pickle an ambiguous 12-hour AM/PM time, which is ambiguous
487 -- because it's missing the AM/PM part.
488 --
489 -- Examples:
490 --
491 -- >>> let tn = text_node "8:00"
492 -- >>> unpickleDoc xp_ambiguous_time tn
493 -- Just 1970-01-01 08:00:00 UTC
494 --
495 xp_ambiguous_time :: PU UTCTime
496 xp_ambiguous_time =
497 (to_time, from_time) `xpWrapMaybe` xpText
498 where
499 ambiguous_time_format :: String
500 ambiguous_time_format = "%-I:%M"
501
502 to_time :: String -> Maybe UTCTime
503 to_time = parseTime defaultTimeLocale ambiguous_time_format
504
505 from_time :: UTCTime -> String
506 from_time =
507 formatTime defaultTimeLocale ambiguous_time_format
508
509
510 -- | Pickle a date value from a \<date\> element as they appear in the
511 -- early lines. This is a particularly wacky format, but then so is
512 -- the associated time (see 'xp_ambiguous_time').
513 --
514 -- Examples:
515 --
516 -- >>> let tn = text_node "SUNDAY, MAY 25TH (05/25/2014)"
517 -- >>> let (Just result) = unpickleDoc xp_early_line_date tn
518 -- >>> result
519 -- 2014-05-25 00:00:00 UTC
520 -- >>> pickleDoc xp_early_line_date result
521 -- NTree (XTag "/" []) [NTree (XText "SUNDAY, MAY 25TH (05/25/2014)") []]
522 --
523 -- >>> let tn = text_node "SATURDAY, JUNE 7TH (06/07/2014)"
524 -- >>> let (Just result) = unpickleDoc xp_early_line_date tn
525 -- >>> result
526 -- 2014-06-07 00:00:00 UTC
527 -- >>> pickleDoc xp_early_line_date result
528 -- NTree (XTag "/" []) [NTree (XText "SATURDAY, JUNE 7TH (06/07/2014)") []]
529 --
530 xp_early_line_date :: PU UTCTime
531 xp_early_line_date =
532 (to_time, from_time) `xpWrapMaybe` xpText
533 where
534 -- | We need to create our own time locale that talks IN ALL CAPS.
535 -- Actually, 'parseTime' doesn't seem to care about the
536 -- case. But when we spit it back out again ('formatTime'),
537 -- we'll want it to be in all caps.
538 --
539 caps_time_locale :: TimeLocale
540 caps_time_locale =
541 defaultTimeLocale { wDays = caps_days, months = caps_months }
542
543 caps_days :: [(String,String)]
544 caps_days = map both_to_upper (wDays defaultTimeLocale)
545
546 caps_months :: [(String,String)]
547 caps_months = map both_to_upper (months defaultTimeLocale)
548
549 both_to_upper :: (String,String) -> (String,String)
550 both_to_upper (s1,s2) = (map toUpper s1, map toUpper s2)
551
552 wacko_date_formats :: [String]
553 wacko_date_formats =
554 ["%A, %B %-d" ++ suffix ++ " (" ++ date_format_padded ++ ")" |
555 suffix <- ["ST", "ND", "RD","TH"] ]
556
557 to_time :: String -> Maybe UTCTime
558 to_time s =
559 listToMaybe $ catMaybes possible_parses
560 where
561 possible_parses = [ parseTime caps_time_locale fmt s |
562 fmt <- wacko_date_formats ]
563
564 from_time :: UTCTime -> String
565 from_time t =
566 formatTime caps_time_locale fmt t
567 where
568 upper_suffix = map toUpper (date_suffix t)
569 fmt = "%A, %B %-d" ++ upper_suffix ++ " (" ++ date_format_padded ++ ")"
570
571
572
573 -- | Create an 'XmlTree' containing only the given text. This is
574 -- useful for testing (un)picklers, where we don't want to have to
575 -- bother to create a dummy XML document.
576 --
577 -- Examples:
578 --
579 -- >>> text_node "8:00"
580 -- NTree (XText "8:00") []
581 --
582 text_node :: String -> XmlTree
583 text_node s = NTree (XText s) []
584
585
586
587 --
588 -- * Tasty Tests
589 --
590
591 -- | A list of all tests for this module. This primary exists to
592 -- eliminate the unused import/export warnings for 'unpickleDoc' and
593 -- 'text_node' which are otherwise only used in the doctests.
594 --
595 pickler_tests :: TestTree
596 pickler_tests =
597 testGroup
598 "Pickler tests"
599 [ test_pickle_of_unpickle_is_identity ]
600
601
602 -- | If we unpickle something and then pickle it, we should wind up
603 -- with the same thing we started with (plus an additional root
604 -- element).
605 --
606 test_pickle_of_unpickle_is_identity :: TestTree
607 test_pickle_of_unpickle_is_identity =
608 testCase "pickle composed with unpickle is (almost) the identity" $ do
609 let tn = text_node "8:00"
610 let (Just utctime) = unpickleDoc xp_ambiguous_time tn
611 let actual = pickleDoc xp_ambiguous_time utctime
612 let expected = NTree (XTag (mkName "/") []) [tn]
613 actual @?= expected