src/TSN/Picklers.hs

   1 -- | (Un)picklers for data types present in The Sports Network XML
   2 --   feed.
   3 --
   4 module TSN.Picklers (
   5   pickler_tests,
   6   xp_ambiguous_time,
   7   xp_date,
   8   xp_date_padded,
   9   xp_datetime,
  10   xp_early_line_date,
  11   xp_earnings,
  12   xp_fracpart_only_double,
  13   xp_gamedate,
  14   xp_tba_time,
  15   xp_time,
  16   xp_time_dots,
  17   xp_time_stamp )
  18 where
  19
  20 -- System imports.
  21 import Data.Char ( toUpper )
  22 import Data.List ( intercalate )
  23 import Data.List.Split ( chunksOf )
  24 import Data.Maybe ( catMaybes, listToMaybe )
  25 import Data.String.Utils ( replace )
  26 import Data.Time.Clock ( UTCTime )
  27 import Data.Time.Format ( formatTime, parseTime )
  28 import Data.Tree.NTree.TypeDefs ( NTree(..) )
  29 import System.Locale ( TimeLocale( wDays, months ), defaultTimeLocale )
  30 import Test.Tasty ( TestTree, testGroup )
  31 import Test.Tasty.HUnit ( (@?=), testCase )
  32 import Text.Read ( readMaybe )
  33 import Text.XML.HXT.Arrow.Pickle (
  34   xpText,
  35   xpWrap,
  36   xpWrapMaybe )
  37 import Text.XML.HXT.Arrow.Pickle.Xml ( PU )
  38 import Text.XML.HXT.Core (
  39   XmlTree,
  40   XNode( XTag, XText ),
  41   mkName,
  42   pickleDoc,
  43   unpickleDoc )
  44
  45 -- Local imports.
  46 import TSN.Parse (
  47   parse_time_stamp,
  48   time_format,
  49   time_stamp_format )
  50
  51
  52 -- | The format string for a base date in m/d/yyyy format. The
  53 --   day/month are not padded at all. This will match for example,
  54 --
  55 --   * 2\/15\/1983
  56 --
  57 --   * 1\/1\/0000
  58 --
  59 date_format :: String
  60 date_format = "%-m/%-d/%Y"
  61
  62
  63 -- | The format string for a base date in mm/dd/yyyy format. The
  64 --   day/month are padded to two characters with zeros. This will
  65 --   match for example,
  66 --
  67 --   * 02\/15\/1983
  68 --
  69 --   * 01\/01\/0000
  70 --
  71 date_format_padded :: String
  72 date_format_padded = "%0m/%0d/%Y"
  73
  74
  75 -- | (Un)pickle a UTCTime without the time portion.
  76 --
  77 --   /Examples/:
  78 --
  79 --   This should parse:
  80 --
  81 --   >>> let tn = text_node "2/15/1983"
  82 --   >>> unpickleDoc xp_date tn
  83 --   Just 1983-02-15 00:00:00 UTC
  84 --
  85 --   But for some reason, it can also parse a leading zero in the
  86 --   month. Whatever. This isn't required behavior.
  87 --
  88 --   >>> let tn = text_node "02/15/1983"
  89 --   >>> unpickleDoc xp_date tn
  90 --   Just 1983-02-15 00:00:00 UTC
  91 --
  92 xp_date :: PU UTCTime
  93 xp_date =
  94   (to_date, from_date) `xpWrapMaybe` xpText
  95   where
  96     to_date :: String -> Maybe UTCTime
  97     to_date = parseTime defaultTimeLocale date_format
  98
  99     from_date :: UTCTime -> String
 100     from_date = formatTime defaultTimeLocale date_format
 101
 102
 103 -- | (Un)pickle a UTCTime without the time portion. The day/month are
 104 --   padded to two characters with zeros.
 105 --
 106 --   Examples:
 107 --
 108 --   >>> let tn = text_node "02/15/1983"
 109 --   >>> unpickleDoc xp_date_padded tn
 110 --   Just 1983-02-15 00:00:00 UTC
 111 --
 112 --   >>> let tn = text_node "06/07/2014"
 113 --   >>> unpickleDoc xp_date_padded tn
 114 --   Just 2014-06-07 00:00:00 UTC
 115 --
 116 xp_date_padded :: PU UTCTime
 117 xp_date_padded =
 118   (to_date, from_date) `xpWrapMaybe` xpText
 119   where
 120     to_date :: String -> Maybe UTCTime
 121     to_date = parseTime defaultTimeLocale date_format_padded
 122
 123     from_date :: UTCTime -> String
 124     from_date = formatTime defaultTimeLocale date_format_padded
 125
 126
 127
 128 -- | Format a number as a string using a comma as the thousands
 129 --   separator.
 130 --
 131 --   Examples:
 132 --
 133 --   >>> format_commas 0
 134 --   "0"
 135 --   >>> format_commas 10
 136 --   "10"
 137 --   >>> format_commas 100
 138 --   "100"
 139 --   >>> format_commas 1000
 140 --   "1,000"
 141 --   >>> format_commas 10000
 142 --   "10,000"
 143 --   >>> format_commas 100000
 144 --   "100,000"
 145 --   >>> format_commas 1000000
 146 --   "1,000,000"
 147 --
 148 format_commas :: Int -> String
 149 format_commas x =
 150   reverse (intercalate "," $ chunksOf 3 $ reverse $ show x)
 151
 152
 153
 154 -- | Parse \<Earnings\> from an 'AutoRaceResultsListing'. These are
 155 --   essentially 'Int's, but they look like,
 156 --
 157 --   * \<Earnings\>336,826\</Earnings\>
 158 --
 159 --   * \<Earnings\>1,000,191\</Earnings\>
 160 --
 161 --   * \<Earnings\>TBA\</Earnings\>
 162 --
 163 --   Examples:
 164 --
 165 --   >>> let tn = text_node "1,000,191"
 166 --   >>> unpickleDoc xp_earnings tn
 167 --   Just (Just 1000191)
 168 --
 169 --   >>> let tn = text_node "TBA"
 170 --   >>> unpickleDoc xp_earnings tn
 171 --   Just Nothing
 172 --
 173 xp_earnings :: PU (Maybe Int)
 174 xp_earnings =
 175   (to_earnings, from_earnings) `xpWrap` xpText
 176   where
 177     strip_commas :: String -> String
 178     strip_commas = replace "," ""
 179
 180     to_earnings :: String -> Maybe Int
 181     to_earnings s
 182       | s == "TBA" = Nothing
 183       | otherwise = Just  $ (read . strip_commas) s
 184
 185     from_earnings :: Maybe Int -> String
 186     from_earnings Nothing = "TBA"
 187     from_earnings (Just i) = format_commas i
 188
 189
 190
 191 -- | Pickle a 'Double' that can be missing its leading zero (for
 192 --   values less than one). For example, we've seen,
 193 --
 194 --   <TrackLength KPH=".805">0.5</TrackLength>
 195 --
 196 --   Which 'xpPrim' can't handle without the leading
 197 --   zero. Unfortunately there's no way pickle/unpickle can be
 198 --   inverses of each other here, since \"0.5\" and \".5\" should
 199 --   unpickle to the same 'Double'.
 200 --
 201 --   Examples:
 202 --
 203 --   >>> let tn = text_node "0.5"
 204 --   >>> unpickleDoc xp_fracpart_only_double tn
 205 --   Just 0.5
 206 --
 207 --   >>> let tn = text_node ".5"
 208 --   >>> unpickleDoc xp_fracpart_only_double tn
 209 --   Just 0.5
 210 --
 211 --   >>> let tn = text_node "foo"
 212 --   >>> unpickleDoc xp_fracpart_only_double tn
 213 --   Nothing
 214 --
 215 xp_fracpart_only_double :: PU Double
 216 xp_fracpart_only_double =
 217   (to_double, from_double) `xpWrapMaybe` xpText
 218   where
 219     -- | Convert a 'String' to a 'Double', maybe. We always prepend a
 220     -- zero, since it will fix the fraction-only values, and not hurt
 221     -- the ones that already have a leading integer.
 222     to_double :: String -> Maybe Double
 223     to_double s = readMaybe ("0" ++ s)
 224
 225     from_double :: Double -> String
 226     from_double = show
 227
 228
 229
 230 -- | (Un)pickle an unpadded 'UTCTime'. Used for example on the
 231 --   \<RaceDate\> elements in an 'AutoRaceResults' message.
 232 --
 233 --   Examples:
 234 --
 235 --   >>> let tn = text_node "6/1/2014 1:00:00 PM"
 236 --   >>> unpickleDoc xp_datetime tn
 237 --   Just 2014-06-01 13:00:00 UTC
 238 --
 239 --   >>> let tn = text_node "5/24/2014 2:45:00 PM"
 240 --   >>> unpickleDoc xp_datetime tn
 241 --   Just 2014-05-24 14:45:00 UTC
 242 --
 243 --   Padded! For some reason it works with only one zero in front. I
 244 --   dunno man. NOT required (or even desired?) behavior.
 245 --
 246 --   >>> let tn = text_node "05/24/2014 2:45:00 PM"
 247 --   >>> unpickleDoc xp_datetime tn
 248 --   Just 2014-05-24 14:45:00 UTC
 249 --
 250 xp_datetime :: PU UTCTime
 251 xp_datetime =
 252   (to_datetime, from_datetime) `xpWrapMaybe` xpText
 253   where
 254     format = date_format ++ " " ++ "%-I:%M:%S %p"
 255
 256     to_datetime :: String -> Maybe UTCTime
 257     to_datetime = parseTime defaultTimeLocale format
 258
 259     from_datetime :: UTCTime -> String
 260     from_datetime = formatTime defaultTimeLocale format
 261
 262
 263
 264 -- | Takes a 'UTCTime', and returns the English suffix that would be
 265 --   appropriate after the day of the month. For example, if we have a
 266 --   UTCTime representing Christmas, this would return \"th\" because
 267 --   \"th\" is the right suffix of \"December 25th\".
 268 --
 269 --   Examples:
 270 --
 271 --   >>> import Data.Maybe ( fromJust )
 272 --   >>> :{
 273 --         let parse_date :: String -> Maybe UTCTime
 274 --             parse_date = parseTime defaultTimeLocale date_format
 275 --       :}
 276 --
 277 --   >>> let dates = [ "1/" ++ (d : "/1970") | d <- ['1'..'9'] ]
 278 --   >>> let suffixes = map (date_suffix . fromJust . parse_date) dates
 279 --   >>> suffixes
 280 --   ["st","nd","rd","th","th","th","th","th","th"]
 281 --
 282 date_suffix :: UTCTime -> String
 283 date_suffix t =
 284   case (reverse daystr) of
 285     []       -> []
 286     ('1':_) -> "st"
 287     ('2':_) -> "nd"
 288     ('3':_) -> "rd"
 289     _        -> "th"
 290   where
 291     daystr = formatTime defaultTimeLocale "%d" t
 292
 293
 294 -- | (Un)pickle a UTCTime from a weather forecast's gamedate. Example
 295 --   input looks like,
 296 --
 297 --   When unpickling we get rid of the suffixes \"st\", \"nd\", \"rd\", and
 298 --   \"th\". During pickling, we add them back based on the last digit
 299 --   of the date.
 300 --
 301 --   Examples:
 302 --
 303 --   >>> let tn = text_node "Monday, December 30th"
 304 --   >>> let (Just gd) = unpickleDoc xp_gamedate tn
 305 --   >>> gd
 306 --   1970-12-30 00:00:00 UTC
 307 --   >>> pickleDoc xp_gamedate gd
 308 --   NTree (XTag "/" []) [NTree (XText "Wednesday, December 30th") []]
 309 --
 310 xp_gamedate :: PU UTCTime
 311 xp_gamedate =
 312   (to_gamedate, from_gamedate) `xpWrapMaybe` xpText
 313   where
 314     format = "%A, %B %-d"
 315
 316     to_gamedate :: String -> Maybe UTCTime
 317     to_gamedate s =
 318       parseTime defaultTimeLocale format s'
 319       where
 320         s' = case (reverse s) of
 321                (c2:c1:cs) -> let suffix = [c1,c2]
 322                              in
 323                                if suffix `elem` ["st","nd","rd","th"]
 324                                then reverse cs
 325                                else s -- Unknown suffix, leave it alone.
 326
 327                _ -> s -- The String is less than two characters long,
 328                       -- leave it alone.
 329
 330
 331     from_gamedate :: UTCTime -> String
 332     from_gamedate d = s ++ (date_suffix d)
 333       where
 334         s = formatTime defaultTimeLocale format d
 335
 336
 337
 338
 339
 340
 341
 342 -- | (Un)pickle a UTCTime without the date portion. Doesn't work if
 343 --   the fields aren't zero-padded to two characters.
 344 --
 345 --   /Examples/:
 346 --
 347 --   Padded, should work:
 348 --
 349 --   >>> let tn = text_node "04:35 PM"
 350 --   >>> unpickleDoc xp_time tn
 351 --   Just 1970-01-01 16:35:00 UTC
 352 --
 353 --   Unpadded, should fail:
 354 --
 355 --   >>> let tn = text_node "4:35 PM"
 356 --   >>> unpickleDoc xp_time tn
 357 --   Nothing
 358 --
 359 xp_time :: PU UTCTime
 360 xp_time =
 361   (to_time, from_time) `xpWrapMaybe` xpText
 362   where
 363     to_time :: String -> Maybe UTCTime
 364     to_time = parseTime defaultTimeLocale time_format
 365
 366     from_time :: UTCTime -> String
 367     from_time = formatTime defaultTimeLocale time_format
 368
 369
 370 -- | (Un)pickle a UTCTime without the date portion. This differs from
 371 --   'xp_time' in that it uses periods in the AM/PM part, i.e. \"A.M.\"
 372 --   and \"P.M.\" It also doesn't use padding for the \"hours\" part.
 373 --
 374 --   /Examples/:
 375 --
 376 --   A standard example of the correct form:
 377 --
 378 --   >>> let tn = text_node "11:30 A.M."
 379 --   >>> let (Just result) = unpickleDoc xp_time_dots tn
 380 --   >>> result
 381 --   1970-01-01 11:30:00 UTC
 382 --   >>> pickleDoc xp_time_dots result
 383 --   NTree (XTag "/" []) [NTree (XText "11:30 A.M.") []]
 384 --
 385 --   Another miracle, it still parses with a leading zero!
 386 --
 387 --   >>> let tn = text_node "01:30 A.M."
 388 --   >>> unpickleDoc xp_time_dots tn
 389 --   Just 1970-01-01 01:30:00 UTC
 390 --
 391 xp_time_dots :: PU UTCTime
 392 xp_time_dots =
 393   (to_time, from_time) `xpWrapMaybe` xpText
 394   where
 395     -- | The hours arent padded with zeros.
 396     nopad_time_format :: String
 397     nopad_time_format = "%-I:%M %p"
 398
 399     to_time :: String -> Maybe UTCTime
 400     to_time = (parseTime defaultTimeLocale nopad_time_format) . (replace "." "")
 401
 402     from_time :: UTCTime -> String
 403     from_time t =
 404         replace "AM" "A.M." (replace "PM" "P.M." s)
 405       where
 406         s = formatTime defaultTimeLocale nopad_time_format t
 407
 408
 409 -- | (Un)pickle a UTCTime without the date portion, allowing for a
 410 --   value of \"TBA\" (which gets translated to 'Nothing').
 411 --
 412 --   /Examples/:
 413 --
 414 --   A failed parse will return 'Nothing':
 415 --
 416 --   >>> let tn = text_node "YO"
 417 --   >>> unpickleDoc xp_tba_time tn
 418 --   Just Nothing
 419 --
 420 --   And so will parsing a \"TBA\":
 421 --
 422 --   >>> let tn = text_node "TBA"
 423 --   >>> unpickleDoc xp_tba_time tn
 424 --   Just Nothing
 425 --
 426 --   But re-pickling 'Nothing' gives only \"TBA\":
 427 --
 428 --   >>> pickleDoc xp_tba_time Nothing
 429 --   NTree (XTag "/" []) [NTree (XText "TBA") []]
 430 --
 431 --   A normal time is also parsed successfully, of course:
 432 --
 433 --   >>> let tn = text_node "08:10 PM"
 434 --   >>> unpickleDoc xp_tba_time tn
 435 --   Just (Just 1970-01-01 20:10:00 UTC)
 436 --
 437 xp_tba_time :: PU (Maybe UTCTime)
 438 xp_tba_time =
 439   (to_time, from_time) `xpWrap` xpText
 440   where
 441     to_time :: String -> Maybe UTCTime
 442     to_time s
 443       | s == "TBA" = Nothing
 444       | otherwise = parseTime defaultTimeLocale time_format s
 445
 446     from_time :: Maybe UTCTime -> String
 447     from_time Nothing = "TBA"
 448     from_time (Just t) = formatTime defaultTimeLocale time_format t
 449
 450
 451
 452 -- | (Un)pickle the \<time_stamp\> element format to/from a 'UTCTime'.
 453 --   The time_stamp elements look something like,
 454 --
 455 --   \<time_stamp\> January 6, 2014, at 10:11 PM ET \</time_stamp\>
 456 --
 457 --   TSN doesn't provide a proper time zone name, only \"ET\" for
 458 --   \"Eastern Time\". But \"Eastern Time\" changes throughout the
 459 --   year, depending on one's location, for daylight-savings
 460 --   time. It's really not any more useful to be off by one hour than
 461 --   it is to be off by 5 hours, so rather than guess at EDT/EST, we
 462 --   just store the timestamp as UTC.
 463 --
 464 --   Examples:
 465 --
 466 --   >>> let tn = text_node " January 6, 2014, at 10:11 PM ET "
 467 --   >>> let (Just tstamp) = unpickleDoc xp_time_stamp tn
 468 --   >>> tstamp
 469 --   2014-01-06 22:11:00 UTC
 470 --   >>> pickleDoc xp_time_stamp tstamp
 471 --   NTree (XTag "/" []) [NTree (XText " January 6, 2014, at 10:11 PM ET ") []]
 472 --
 473 xp_time_stamp :: PU UTCTime
 474 xp_time_stamp =
 475   (parse_time_stamp, from_time_stamp) `xpWrapMaybe` xpText
 476   where
 477     -- | We have to re-pad the time_stamp_format with a leading and
 478     --   trailing space; see the documentation of 'time_stamp_format'
 479     --   for more information.
 480     from_time_stamp :: UTCTime -> String
 481     from_time_stamp =
 482       formatTime defaultTimeLocale (" " ++ time_stamp_format ++ " ")
 483
 484
 485
 486 -- | (Un)pickle an ambiguous 12-hour AM/PM time, which is ambiguous
 487 --   because it's missing the AM/PM part.
 488 --
 489 --   Examples:
 490 --
 491 --   >>> let tn = text_node "8:00"
 492 --   >>> unpickleDoc xp_ambiguous_time tn
 493 --   Just 1970-01-01 08:00:00 UTC
 494 --
 495 xp_ambiguous_time :: PU UTCTime
 496 xp_ambiguous_time =
 497   (to_time, from_time) `xpWrapMaybe` xpText
 498   where
 499     ambiguous_time_format :: String
 500     ambiguous_time_format = "%-I:%M"
 501
 502     to_time :: String -> Maybe UTCTime
 503     to_time = parseTime defaultTimeLocale ambiguous_time_format
 504
 505     from_time :: UTCTime -> String
 506     from_time =
 507       formatTime defaultTimeLocale ambiguous_time_format
 508
 509
 510 -- | Pickle a date value from a \<date\> element as they appear in the
 511 --   early lines. This is a particularly wacky format, but then so is
 512 --   the associated time (see 'xp_ambiguous_time').
 513 --
 514 --   Examples:
 515 --
 516 --   >>> let tn = text_node "SUNDAY, MAY 25TH (05/25/2014)"
 517 --   >>> let (Just result) = unpickleDoc xp_early_line_date tn
 518 --   >>> result
 519 --   2014-05-25 00:00:00 UTC
 520 --   >>> pickleDoc xp_early_line_date result
 521 --   NTree (XTag "/" []) [NTree (XText "SUNDAY, MAY 25TH (05/25/2014)") []]
 522 --
 523 --   >>> let tn = text_node "SATURDAY, JUNE 7TH (06/07/2014)"
 524 --   >>> let (Just result) = unpickleDoc xp_early_line_date tn
 525 --   >>> result
 526 --   2014-06-07 00:00:00 UTC
 527 --   >>> pickleDoc xp_early_line_date result
 528 --   NTree (XTag "/" []) [NTree (XText "SATURDAY, JUNE 7TH (06/07/2014)") []]
 529 --
 530 xp_early_line_date :: PU UTCTime
 531 xp_early_line_date =
 532   (to_time, from_time) `xpWrapMaybe` xpText
 533   where
 534     -- | We need to create our own time locale that talks IN ALL CAPS.
 535     --   Actually, 'parseTime' doesn't seem to care about the
 536     --   case. But when we spit it back out again ('formatTime'),
 537     --   we'll want it to be in all caps.
 538     --
 539     caps_time_locale :: TimeLocale
 540     caps_time_locale =
 541       defaultTimeLocale { wDays = caps_days, months = caps_months }
 542
 543     caps_days :: [(String,String)]
 544     caps_days = map both_to_upper (wDays defaultTimeLocale)
 545
 546     caps_months :: [(String,String)]
 547     caps_months = map both_to_upper (months defaultTimeLocale)
 548
 549     both_to_upper :: (String,String) -> (String,String)
 550     both_to_upper (s1,s2) = (map toUpper s1, map toUpper s2)
 551
 552     wacko_date_formats :: [String]
 553     wacko_date_formats =
 554       ["%A, %B %-d" ++ suffix ++ " (" ++ date_format_padded ++ ")" |
 555          suffix <- ["ST", "ND", "RD","TH"] ]
 556
 557     to_time :: String -> Maybe UTCTime
 558     to_time s =
 559       listToMaybe $ catMaybes possible_parses
 560       where
 561         possible_parses = [ parseTime caps_time_locale fmt s |
 562                               fmt <- wacko_date_formats ]
 563
 564     from_time :: UTCTime -> String
 565     from_time t =
 566       formatTime caps_time_locale fmt t
 567       where
 568         upper_suffix = map toUpper (date_suffix t)
 569         fmt = "%A, %B %-d" ++ upper_suffix ++ " (" ++ date_format_padded ++ ")"
 570
 571
 572
 573 -- | Create an 'XmlTree' containing only the given text. This is
 574 --   useful for testing (un)picklers, where we don't want to have to
 575 --   bother to create a dummy XML document.
 576 --
 577 --   Examples:
 578 --
 579 --   >>> text_node "8:00"
 580 --   NTree (XText "8:00") []
 581 --
 582 text_node :: String -> XmlTree
 583 text_node s = NTree (XText s) []
 584
 585
 586
 587 --
 588 -- * Tasty Tests
 589 --
 590
 591 -- | A list of all tests for this module. This primary exists to
 592 --   eliminate the unused import/export warnings for 'unpickleDoc' and
 593 --   'text_node' which are otherwise only used in the doctests.
 594 --
 595 pickler_tests :: TestTree
 596 pickler_tests =
 597   testGroup
 598     "Pickler tests"
 599     [ test_pickle_of_unpickle_is_identity ]
 600
 601
 602 -- | If we unpickle something and then pickle it, we should wind up
 603 --   with the same thing we started with (plus an additional root
 604 --   element).
 605 --
 606 test_pickle_of_unpickle_is_identity :: TestTree
 607 test_pickle_of_unpickle_is_identity =
 608   testCase "pickle composed with unpickle is (almost) the identity" $ do
 609     let tn = text_node "8:00"
 610     let (Just utctime) = unpickleDoc xp_ambiguous_time tn
 611     let actual = pickleDoc xp_ambiguous_time utctime
 612     let expected = NTree (XTag (mkName "/") []) [tn]
 613     actual @?= expected