]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/TSN/Picklers.hs
3d7215a07744436055a7eff73a8169201d394301
[dead/htsn-import.git] / src / TSN / Picklers.hs
1 -- | (Un)picklers for data types present in The Sports Network XML
2 -- feed.
3 --
4 module TSN.Picklers (
5 pickler_tests,
6 xp_ambiguous_time,
7 xp_date,
8 xp_date_padded,
9 xp_datetime,
10 xp_early_line_date,
11 xp_earnings,
12 xp_gamedate,
13 xp_tba_time,
14 xp_time,
15 xp_time_dots,
16 xp_time_stamp )
17 where
18
19 -- System imports.
20 import Data.Char ( toUpper )
21 import Data.List ( intercalate )
22 import Data.List.Split ( chunksOf )
23 import Data.Maybe ( catMaybes, listToMaybe )
24 import Data.String.Utils ( replace )
25 import Data.Time.Clock ( NominalDiffTime, UTCTime, addUTCTime )
26 import Data.Time.Format ( formatTime, parseTime )
27 import Data.Tree.NTree.TypeDefs ( NTree(..) )
28 import System.Locale ( TimeLocale( wDays, months ), defaultTimeLocale )
29 import Test.Tasty ( TestTree, testGroup )
30 import Test.Tasty.HUnit ( (@?=), testCase )
31 import Text.XML.HXT.Arrow.Pickle (
32 xpText,
33 xpWrap,
34 xpWrapMaybe )
35 import Text.XML.HXT.Arrow.Pickle.Xml ( PU )
36 import Text.XML.HXT.Core (
37 XmlTree,
38 XNode( XTag, XText ),
39 mkName,
40 pickleDoc,
41 unpickleDoc )
42
43 -- Local imports.
44 import TSN.Parse (
45 parse_time_stamp,
46 time_format,
47 time_stamp_format )
48
49
50 -- | The format string for a base date in m/d/yyyy format. The
51 -- day/month are not padded at all. This will match for example,
52 --
53 -- * 2\/15\/1983
54 --
55 -- * 1\/1\/0000
56 --
57 date_format :: String
58 date_format = "%-m/%-d/%Y"
59
60
61 -- | The format string for a base date in mm/dd/yyyy format. The
62 -- day/month are padded to two characters with zeros. This will
63 -- match for example,
64 --
65 -- * 02\/15\/1983
66 --
67 -- * 01\/01\/0000
68 --
69 date_format_padded :: String
70 date_format_padded = "%0m/%0d/%Y"
71
72
73 -- | (Un)pickle a UTCTime without the time portion.
74 --
75 -- /Examples/:
76 --
77 -- This should parse:
78 --
79 -- >>> let tn = text_node "2/15/1983"
80 -- >>> unpickleDoc xp_date tn
81 -- Just 1983-02-15 00:00:00 UTC
82 --
83 -- But for some reason, it can also parse a leading zero in the
84 -- month. Whatever. This isn't required behavior.
85 --
86 -- >>> let tn = text_node "02/15/1983"
87 -- >>> unpickleDoc xp_date tn
88 -- Just 1983-02-15 00:00:00 UTC
89 --
90 xp_date :: PU UTCTime
91 xp_date =
92 (to_date, from_date) `xpWrapMaybe` xpText
93 where
94 to_date :: String -> Maybe UTCTime
95 to_date = parseTime defaultTimeLocale date_format
96
97 from_date :: UTCTime -> String
98 from_date = formatTime defaultTimeLocale date_format
99
100
101 -- | (Un)pickle a UTCTime without the time portion. The day/month are
102 -- padded to two characters with zeros.
103 --
104 -- Examples:
105 --
106 -- >>> let tn = text_node "02/15/1983"
107 -- >>> unpickleDoc xp_date_padded tn
108 -- Just 1983-02-15 00:00:00 UTC
109 --
110 -- >>> let tn = text_node "06/07/2014"
111 -- >>> unpickleDoc xp_date_padded tn
112 -- Just 2014-06-07 00:00:00 UTC
113 --
114 xp_date_padded :: PU UTCTime
115 xp_date_padded =
116 (to_date, from_date) `xpWrapMaybe` xpText
117 where
118 to_date :: String -> Maybe UTCTime
119 to_date = parseTime defaultTimeLocale date_format_padded
120
121 from_date :: UTCTime -> String
122 from_date = formatTime defaultTimeLocale date_format_padded
123
124
125
126 -- | Format a number as a string using a comma as the thousands
127 -- separator.
128 --
129 -- Examples:
130 --
131 -- >>> format_commas 0
132 -- "0"
133 -- >>> format_commas 10
134 -- "10"
135 -- >>> format_commas 100
136 -- "100"
137 -- >>> format_commas 1000
138 -- "1,000"
139 -- >>> format_commas 10000
140 -- "10,000"
141 -- >>> format_commas 100000
142 -- "100,000"
143 -- >>> format_commas 1000000
144 -- "1,000,000"
145 --
146 format_commas :: Int -> String
147 format_commas x =
148 reverse (intercalate "," $ chunksOf 3 $ reverse $ show x)
149
150
151 -- | Parse \<Earnings\> from an 'AutoRaceResultsListing'. These are
152 -- essentially 'Int's, but they look like,
153 --
154 -- * \<Earnings\>336,826\</Earnings\>
155 --
156 -- * \<Earnings\>1,000,191\</Earnings\>
157 --
158 -- * \<Earnings\>TBA\</Earnings\>
159 --
160 -- Examples:
161 --
162 -- >>> let tn = text_node "1,000,191"
163 -- >>> unpickleDoc xp_earnings tn
164 -- Just (Just 1000191)
165 --
166 -- >>> let tn = text_node "TBA"
167 -- >>> unpickleDoc xp_earnings tn
168 -- Just Nothing
169 --
170 xp_earnings :: PU (Maybe Int)
171 xp_earnings =
172 (to_earnings, from_earnings) `xpWrap` xpText
173 where
174 strip_commas :: String -> String
175 strip_commas = replace "," ""
176
177 to_earnings :: String -> Maybe Int
178 to_earnings s
179 | s == "TBA" = Nothing
180 | otherwise = Just $ (read . strip_commas) s
181
182 from_earnings :: Maybe Int -> String
183 from_earnings Nothing = "TBA"
184 from_earnings (Just i) = format_commas i
185
186
187
188 -- | (Un)pickle an unpadded 'UTCTime'. Used for example on the
189 -- \<RaceDate\> elements in an 'AutoRaceResults' message.
190 --
191 -- Examples:
192 --
193 -- >>> let tn = text_node "6/1/2014 1:00:00 PM"
194 -- >>> unpickleDoc xp_datetime tn
195 -- Just 2014-06-01 13:00:00 UTC
196 --
197 -- >>> let tn = text_node "5/24/2014 2:45:00 PM"
198 -- >>> unpickleDoc xp_datetime tn
199 -- Just 2014-05-24 14:45:00 UTC
200 --
201 -- Padded! For some reason it works with only one zero in front. I
202 -- dunno man. NOT required (or even desired?) behavior.
203 --
204 -- >>> let tn = text_node "05/24/2014 2:45:00 PM"
205 -- >>> unpickleDoc xp_datetime tn
206 -- Just 2014-05-24 14:45:00 UTC
207 --
208 xp_datetime :: PU UTCTime
209 xp_datetime =
210 (to_datetime, from_datetime) `xpWrapMaybe` xpText
211 where
212 format = date_format ++ " " ++ "%-I:%M:%S %p"
213
214 to_datetime :: String -> Maybe UTCTime
215 to_datetime = parseTime defaultTimeLocale format
216
217 from_datetime :: UTCTime -> String
218 from_datetime = formatTime defaultTimeLocale format
219
220
221
222 -- | Takes a 'UTCTime', and returns the English suffix that would be
223 -- appropriate after the day of the month. For example, if we have a
224 -- UTCTime representing Christmas, this would return \"th\" because
225 -- \"th\" is the right suffix of \"December 25th\".
226 --
227 -- Examples:
228 --
229 -- >>> import Data.Maybe ( fromJust )
230 -- >>> :{
231 -- let parse_date :: String -> Maybe UTCTime;
232 -- parse_date = parseTime defaultTimeLocale date_format;
233 -- :}
234 --
235 -- >>> let dates = [ "1/" ++ (d : "/1970") | d <- ['1'..'9'] ]
236 -- >>> let suffixes = map (date_suffix . fromJust . parse_date) dates
237 -- >>> suffixes
238 -- ["st","nd","rd","th","th","th","th","th","th"]
239 --
240 date_suffix :: UTCTime -> String
241 date_suffix t =
242 case (reverse daystr) of
243 [] -> []
244 ('1':_) -> "st"
245 ('2':_) -> "nd"
246 ('3':_) -> "rd"
247 _ -> "th"
248 where
249 daystr = formatTime defaultTimeLocale "%d" t
250
251
252 -- | (Un)pickle a UTCTime from a weather forecast's gamedate. Example
253 -- input looks like,
254 --
255 -- When unpickling we get rid of the suffixes \"st\", \"nd\", \"rd\", and
256 -- \"th\". During pickling, we add them back based on the last digit
257 -- of the date.
258 --
259 -- Examples:
260 --
261 -- >>> let tn = text_node "Monday, December 30th"
262 -- >>> let (Just gd) = unpickleDoc xp_gamedate tn
263 -- >>> gd
264 -- 1970-12-30 00:00:00 UTC
265 -- >>> pickleDoc xp_gamedate gd
266 -- NTree (XTag "/" []) [NTree (XText "Wednesday, December 30th") []]
267 --
268 xp_gamedate :: PU UTCTime
269 xp_gamedate =
270 (to_gamedate, from_gamedate) `xpWrapMaybe` xpText
271 where
272 format = "%A, %B %-d"
273
274 to_gamedate :: String -> Maybe UTCTime
275 to_gamedate s =
276 parseTime defaultTimeLocale format s'
277 where
278 s' = case (reverse s) of
279 (c2:c1:cs) -> let suffix = [c1,c2]
280 in
281 if suffix `elem` ["st","nd","rd","th"]
282 then reverse cs
283 else s -- Unknown suffix, leave it alone.
284
285 _ -> s -- The String is less than two characters long,
286 -- leave it alone.
287
288
289 from_gamedate :: UTCTime -> String
290 from_gamedate d = s ++ (date_suffix d)
291 where
292 s = formatTime defaultTimeLocale format d
293
294
295
296
297
298
299
300 -- | (Un)pickle a UTCTime without the date portion. Doesn't work if
301 -- the fields aren't zero-padded to two characters.
302 --
303 -- /Examples/:
304 --
305 -- Padded, should work:
306 --
307 -- >>> let tn = text_node "04:35 PM"
308 -- >>> unpickleDoc xp_time tn
309 -- Just 1970-01-01 16:35:00 UTC
310 --
311 -- Unpadded, should fail:
312 --
313 -- >>> let tn = text_node "4:35 PM"
314 -- >>> unpickleDoc xp_time tn
315 -- Nothing
316 --
317 xp_time :: PU UTCTime
318 xp_time =
319 (to_time, from_time) `xpWrapMaybe` xpText
320 where
321 to_time :: String -> Maybe UTCTime
322 to_time = parseTime defaultTimeLocale time_format
323
324 from_time :: UTCTime -> String
325 from_time = formatTime defaultTimeLocale time_format
326
327
328 -- | (Un)pickle a UTCTime without the date portion. This differs from
329 -- 'xp_time' in that it uses periods in the AM/PM part, i.e. \"A.M.\"
330 -- and \"P.M.\" It also doesn't use padding for the \"hours\" part.
331 --
332 -- /Examples/:
333 --
334 -- A standard example of the correct form:
335 --
336 -- >>> let tn = text_node "11:30 A.M."
337 -- >>> let (Just result) = unpickleDoc xp_time_dots tn
338 -- >>> result
339 -- 1970-01-01 11:30:00 UTC
340 -- >>> pickleDoc xp_time_dots result
341 -- NTree (XTag "/" []) [NTree (XText "11:30 A.M.") []]
342 --
343 -- Another miracle, it still parses with a leading zero!
344 --
345 -- >>> let tn = text_node "01:30 A.M."
346 -- >>> unpickleDoc xp_time_dots tn
347 -- Just 1970-01-01 01:30:00 UTC
348 --
349 xp_time_dots :: PU UTCTime
350 xp_time_dots =
351 (to_time, from_time) `xpWrapMaybe` xpText
352 where
353 -- | The hours arent padded with zeros.
354 nopad_time_format :: String
355 nopad_time_format = "%-I:%M %p"
356
357 to_time :: String -> Maybe UTCTime
358 to_time = (parseTime defaultTimeLocale nopad_time_format) . (replace "." "")
359
360 from_time :: UTCTime -> String
361 from_time t =
362 replace "AM" "A.M." (replace "PM" "P.M." s)
363 where
364 s = formatTime defaultTimeLocale nopad_time_format t
365
366
367 -- | (Un)pickle a UTCTime without the date portion, allowing for a
368 -- value of \"TBA\" (which gets translated to 'Nothing').
369 --
370 -- /Examples/:
371 --
372 -- A failed parse will return 'Nothing':
373 --
374 -- >>> let tn = text_node "YO"
375 -- >>> unpickleDoc xp_tba_time tn
376 -- Just Nothing
377 --
378 -- And so will parsing a \"TBA\":
379 --
380 -- >>> let tn = text_node "TBA"
381 -- >>> unpickleDoc xp_tba_time tn
382 -- Just Nothing
383 --
384 -- But re-pickling 'Nothing' gives only \"TBA\":
385 --
386 -- >>> pickleDoc xp_tba_time Nothing
387 -- NTree (XTag "/" []) [NTree (XText "TBA") []]
388 --
389 -- A normal time is also parsed successfully, of course:
390 --
391 -- >>> let tn = text_node "08:10 PM"
392 -- >>> unpickleDoc xp_tba_time tn
393 -- Just (Just 1970-01-01 20:10:00 UTC)
394 --
395 xp_tba_time :: PU (Maybe UTCTime)
396 xp_tba_time =
397 (to_time, from_time) `xpWrap` xpText
398 where
399 to_time :: String -> Maybe UTCTime
400 to_time s
401 | s == "TBA" = Nothing
402 | otherwise = parseTime defaultTimeLocale time_format s
403
404 from_time :: Maybe UTCTime -> String
405 from_time Nothing = "TBA"
406 from_time (Just t) = formatTime defaultTimeLocale time_format t
407
408
409
410 -- | (Un)pickle the \<time_stamp\> element format to/from a 'UTCTime'.
411 -- The time_stamp elements look something like,
412 --
413 -- \<time_stamp\> January 6, 2014, at 10:11 PM ET \</time_stamp\>
414 --
415 -- TSN doesn't provide a proper time zone name, so we assume that
416 -- it's always Eastern Standard Time. EST is UTC-5, so we
417 -- add/subtract 5 hours to convert to/from UTC.
418 --
419 -- Examples:
420 --
421 -- >>> let tn = text_node " January 6, 2014, at 10:11 PM ET "
422 -- >>> unpickleDoc xp_time_stamp tn
423 -- Just 2014-01-07 03:11:00 UTC
424 --
425 xp_time_stamp :: PU UTCTime
426 xp_time_stamp =
427 (parse_time_stamp, from_time_stamp) `xpWrapMaybe` xpText
428 where
429 five_hours :: NominalDiffTime
430 five_hours = 5 * 60 * 60
431
432 subtract_five :: UTCTime -> UTCTime
433 subtract_five = addUTCTime (-1 * five_hours)
434
435 from_time_stamp :: UTCTime -> String
436 from_time_stamp =
437 formatTime defaultTimeLocale time_stamp_format . subtract_five
438
439
440 -- | (Un)pickle an ambiguous 12-hour AM/PM time, which is ambiguous
441 -- because it's missing the AM/PM part.
442 --
443 -- Examples:
444 --
445 -- >>> let tn = text_node "8:00"
446 -- >>> unpickleDoc xp_ambiguous_time tn
447 -- Just 1970-01-01 08:00:00 UTC
448 --
449 xp_ambiguous_time :: PU UTCTime
450 xp_ambiguous_time =
451 (to_time, from_time) `xpWrapMaybe` xpText
452 where
453 ambiguous_time_format :: String
454 ambiguous_time_format = "%-I:%M"
455
456 to_time :: String -> Maybe UTCTime
457 to_time = parseTime defaultTimeLocale ambiguous_time_format
458
459 from_time :: UTCTime -> String
460 from_time =
461 formatTime defaultTimeLocale ambiguous_time_format
462
463
464 -- | Pickle a date value from a \<date\> element as they appear in the
465 -- early lines. This is a particularly wacky format, but then so is
466 -- the associated time (see 'xp_ambiguous_time').
467 --
468 -- Examples:
469 --
470 -- >>> let tn = text_node "SUNDAY, MAY 25TH (05/25/2014)"
471 -- >>> let (Just result) = unpickleDoc xp_early_line_date tn
472 -- >>> result
473 -- 2014-05-25 00:00:00 UTC
474 -- >>> pickleDoc xp_early_line_date result
475 -- NTree (XTag "/" []) [NTree (XText "SUNDAY, MAY 25TH (05/25/2014)") []]
476 --
477 -- >>> let tn = text_node "SATURDAY, JUNE 7TH (06/07/2014)"
478 -- >>> let (Just result) = unpickleDoc xp_early_line_date tn
479 -- >>> result
480 -- 2014-06-07 00:00:00 UTC
481 -- >>> pickleDoc xp_early_line_date result
482 -- NTree (XTag "/" []) [NTree (XText "SATURDAY, JUNE 7TH (06/07/2014)") []]
483 --
484 xp_early_line_date :: PU UTCTime
485 xp_early_line_date =
486 (to_time, from_time) `xpWrapMaybe` xpText
487 where
488 -- | We need to create our own time locale that talks IN ALL CAPS.
489 -- Actually, 'parseTime' doesn't seem to care about the
490 -- case. But when we spit it back out again ('formatTime'),
491 -- we'll want it to be in all caps.
492 --
493 caps_time_locale :: TimeLocale
494 caps_time_locale =
495 defaultTimeLocale { wDays = caps_days, months = caps_months }
496
497 caps_days :: [(String,String)]
498 caps_days = map both_to_upper (wDays defaultTimeLocale)
499
500 caps_months :: [(String,String)]
501 caps_months = map both_to_upper (months defaultTimeLocale)
502
503 both_to_upper :: (String,String) -> (String,String)
504 both_to_upper (s1,s2) = (map toUpper s1, map toUpper s2)
505
506 wacko_date_formats :: [String]
507 wacko_date_formats =
508 ["%A, %B %-d" ++ suffix ++ " (" ++ date_format_padded ++ ")" |
509 suffix <- ["ST", "ND", "RD","TH"] ]
510
511 to_time :: String -> Maybe UTCTime
512 to_time s =
513 listToMaybe $ catMaybes possible_parses
514 where
515 possible_parses = [ parseTime caps_time_locale fmt s |
516 fmt <- wacko_date_formats ]
517
518 from_time :: UTCTime -> String
519 from_time t =
520 formatTime caps_time_locale fmt t
521 where
522 upper_suffix = map toUpper (date_suffix t)
523 fmt = "%A, %B %-d" ++ upper_suffix ++ " (" ++ date_format_padded ++ ")"
524
525
526
527 -- | Create an 'XmlTree' containing only the given text. This is
528 -- useful for testing (un)picklers, where we don't want to have to
529 -- bother to create a dummy XML document.
530 --
531 -- Examples:
532 --
533 -- >>> text_node "8:00"
534 -- NTree (XText "8:00") []
535 --
536 text_node :: String -> XmlTree
537 text_node s = NTree (XText s) []
538
539
540
541 --
542 -- * Tasty Tests
543 --
544
545 -- | A list of all tests for this module. This primary exists to
546 -- eliminate the unused import/export warnings for 'unpickleDoc' and
547 -- 'text_node' which are otherwise only used in the doctests.
548 --
549 pickler_tests :: TestTree
550 pickler_tests =
551 testGroup
552 "Pickler tests"
553 [ test_pickle_of_unpickle_is_identity ]
554
555
556 -- | If we unpickle something and then pickle it, we should wind up
557 -- with the same thing we started with (plus an additional root
558 -- element).
559 --
560 test_pickle_of_unpickle_is_identity :: TestTree
561 test_pickle_of_unpickle_is_identity =
562 testCase "pickle composed with unpickle is (almost) the identity" $ do
563 let tn = text_node "8:00"
564 let (Just utctime) = unpickleDoc xp_ambiguous_time tn
565 let actual = pickleDoc xp_ambiguous_time utctime
566 let expected = NTree (XTag (mkName "/") []) [tn]
567 actual @?= expected