-- feed.
--
module TSN.Picklers (
+ pickler_tests,
+ xp_ambiguous_time,
xp_date,
- xp_team_id )
+ xp_date_padded,
+ xp_datetime,
+ xp_early_line_date,
+ xp_earnings,
+ xp_fracpart_only_double,
+ xp_gamedate,
+ xp_tba_time,
+ xp_time,
+ xp_time_dots,
+ xp_time_stamp )
where
-- System imports.
+import Data.Char ( toUpper )
+import Data.List ( intercalate )
+import Data.List.Split ( chunksOf )
+import Data.Maybe ( catMaybes, listToMaybe )
+import Data.String.Utils ( replace )
import Data.Time.Clock ( UTCTime )
import Data.Time.Format ( formatTime, parseTime )
-import System.Locale ( defaultTimeLocale )
-import Text.Printf ( printf )
+import Data.Tree.NTree.TypeDefs ( NTree(..) )
+import System.Locale ( TimeLocale( wDays, months ), defaultTimeLocale )
+import Test.Tasty ( TestTree, testGroup )
+import Test.Tasty.HUnit ( (@?=), testCase )
import Text.Read ( readMaybe )
import Text.XML.HXT.Arrow.Pickle (
xpText,
+ xpWrap,
xpWrapMaybe )
import Text.XML.HXT.Arrow.Pickle.Xml ( PU )
+import Text.XML.HXT.Core (
+ XmlTree,
+ XNode( XTag, XText ),
+ mkName,
+ pickleDoc,
+ unpickleDoc )
+
+-- Local imports.
+import TSN.Parse (
+ parse_time_stamp,
+ time_format,
+ time_stamp_format )
+
+
+-- | The format string for a base date in m/d/yyyy format. The
+-- day/month are not padded at all. This will match for example,
+--
+-- * 2\/15\/1983
+--
+-- * 1\/1\/0000
+--
+date_format :: String
+date_format = "%-m/%-d/%Y"
+
+
+-- | The format string for a base date in mm/dd/yyyy format. The
+-- day/month are padded to two characters with zeros. This will
+-- match for example,
+--
+-- * 02\/15\/1983
+--
+-- * 01\/01\/0000
+--
+date_format_padded :: String
+date_format_padded = "%0m/%0d/%Y"
-- | (Un)pickle a UTCTime without the time portion.
--
+-- /Examples/:
+--
+-- This should parse:
+--
+-- >>> let tn = text_node "2/15/1983"
+-- >>> unpickleDoc xp_date tn
+-- Just 1983-02-15 00:00:00 UTC
+--
+-- But for some reason, it can also parse a leading zero in the
+-- month. Whatever. This isn't required behavior.
+--
+-- >>> let tn = text_node "02/15/1983"
+-- >>> unpickleDoc xp_date tn
+-- Just 1983-02-15 00:00:00 UTC
+--
xp_date :: PU UTCTime
xp_date =
(to_date, from_date) `xpWrapMaybe` xpText
where
- format = "%-m/%-d/%Y"
+ to_date :: String -> Maybe UTCTime
+ to_date = parseTime defaultTimeLocale date_format
+ from_date :: UTCTime -> String
+ from_date = formatTime defaultTimeLocale date_format
+
+
+-- | (Un)pickle a UTCTime without the time portion. The day/month are
+-- padded to two characters with zeros.
+--
+-- Examples:
+--
+-- >>> let tn = text_node "02/15/1983"
+-- >>> unpickleDoc xp_date_padded tn
+-- Just 1983-02-15 00:00:00 UTC
+--
+-- >>> let tn = text_node "06/07/2014"
+-- >>> unpickleDoc xp_date_padded tn
+-- Just 2014-06-07 00:00:00 UTC
+--
+xp_date_padded :: PU UTCTime
+xp_date_padded =
+ (to_date, from_date) `xpWrapMaybe` xpText
+ where
to_date :: String -> Maybe UTCTime
- to_date = parseTime defaultTimeLocale format
+ to_date = parseTime defaultTimeLocale date_format_padded
from_date :: UTCTime -> String
- from_date = formatTime defaultTimeLocale format
+ from_date = formatTime defaultTimeLocale date_format_padded
+
+
+
+-- | Format a number as a string using a comma as the thousands
+-- separator.
+--
+-- Examples:
+--
+-- >>> format_commas 0
+-- "0"
+-- >>> format_commas 10
+-- "10"
+-- >>> format_commas 100
+-- "100"
+-- >>> format_commas 1000
+-- "1,000"
+-- >>> format_commas 10000
+-- "10,000"
+-- >>> format_commas 100000
+-- "100,000"
+-- >>> format_commas 1000000
+-- "1,000,000"
+--
+format_commas :: Int -> String
+format_commas x =
+ reverse (intercalate "," $ chunksOf 3 $ reverse $ show x)
+
+
+
+-- | Parse \<Earnings\> from an 'AutoRaceResultsListing'. These are
+-- essentially 'Int's, but they look like,
+--
+-- * \<Earnings\>336,826\</Earnings\>
+--
+-- * \<Earnings\>1,000,191\</Earnings\>
+--
+-- * \<Earnings\>TBA\</Earnings\>
+--
+-- Examples:
+--
+-- >>> let tn = text_node "1,000,191"
+-- >>> unpickleDoc xp_earnings tn
+-- Just (Just 1000191)
+--
+-- >>> let tn = text_node "TBA"
+-- >>> unpickleDoc xp_earnings tn
+-- Just Nothing
+--
+xp_earnings :: PU (Maybe Int)
+xp_earnings =
+ (to_earnings, from_earnings) `xpWrap` xpText
+ where
+ strip_commas :: String -> String
+ strip_commas = replace "," ""
+
+ to_earnings :: String -> Maybe Int
+ to_earnings s
+ | s == "TBA" = Nothing
+ | otherwise = Just $ (read . strip_commas) s
+
+ from_earnings :: Maybe Int -> String
+ from_earnings Nothing = "TBA"
+ from_earnings (Just i) = format_commas i
+
+
+
+-- | Pickle a 'Double' that can be missing its leading zero (for
+-- values less than one). For example, we've seen,
+--
+-- <TrackLength KPH=".805">0.5</TrackLength>
+--
+-- Which 'xpPrim' can't handle without the leading
+-- zero. Unfortunately there's no way pickle/unpickle can be
+-- inverses of each other here, since \"0.5\" and \".5\" should
+-- unpickle to the same 'Double'.
+--
+-- Examples:
+--
+-- >>> let tn = text_node "0.5"
+-- >>> unpickleDoc xp_fracpart_only_double tn
+-- Just 0.5
+--
+-- >>> let tn = text_node ".5"
+-- >>> unpickleDoc xp_fracpart_only_double tn
+-- Just 0.5
+--
+-- >>> let tn = text_node "foo"
+-- >>> unpickleDoc xp_fracpart_only_double tn
+-- Nothing
+--
+xp_fracpart_only_double :: PU Double
+xp_fracpart_only_double =
+ (to_double, from_double) `xpWrapMaybe` xpText
+ where
+ -- | Convert a 'String' to a 'Double', maybe. We always prepend a
+ -- zero, since it will fix the fraction-only values, and not hurt
+ -- the ones that already have a leading integer.
+ to_double :: String -> Maybe Double
+ to_double s = readMaybe ("0" ++ s)
+
+ from_double :: Double -> String
+ from_double = show
+
+
+
+-- | (Un)pickle an unpadded 'UTCTime'. Used for example on the
+-- \<RaceDate\> elements in an 'AutoRaceResults' message.
+--
+-- Examples:
+--
+-- >>> let tn = text_node "6/1/2014 1:00:00 PM"
+-- >>> unpickleDoc xp_datetime tn
+-- Just 2014-06-01 13:00:00 UTC
+--
+-- >>> let tn = text_node "5/24/2014 2:45:00 PM"
+-- >>> unpickleDoc xp_datetime tn
+-- Just 2014-05-24 14:45:00 UTC
+--
+-- Padded! For some reason it works with only one zero in front. I
+-- dunno man. NOT required (or even desired?) behavior.
+--
+-- >>> let tn = text_node "05/24/2014 2:45:00 PM"
+-- >>> unpickleDoc xp_datetime tn
+-- Just 2014-05-24 14:45:00 UTC
+--
+xp_datetime :: PU UTCTime
+xp_datetime =
+ (to_datetime, from_datetime) `xpWrapMaybe` xpText
+ where
+ format = date_format ++ " " ++ "%-I:%M:%S %p"
+
+ to_datetime :: String -> Maybe UTCTime
+ to_datetime = parseTime defaultTimeLocale format
+
+ from_datetime :: UTCTime -> String
+ from_datetime = formatTime defaultTimeLocale format
+
+
+
+-- | Takes a 'UTCTime', and returns the English suffix that would be
+-- appropriate after the day of the month. For example, if we have a
+-- UTCTime representing Christmas, this would return \"th\" because
+-- \"th\" is the right suffix of \"December 25th\".
+--
+-- Examples:
+--
+-- >>> import Data.Maybe ( fromJust )
+-- >>> :{
+-- let parse_date :: String -> Maybe UTCTime
+-- parse_date = parseTime defaultTimeLocale date_format
+-- :}
+--
+-- >>> let dates = [ "1/" ++ (d : "/1970") | d <- ['1'..'9'] ]
+-- >>> let suffixes = map (date_suffix . fromJust . parse_date) dates
+-- >>> suffixes
+-- ["st","nd","rd","th","th","th","th","th","th"]
+--
+date_suffix :: UTCTime -> String
+date_suffix t =
+ case (reverse daystr) of
+ [] -> []
+ ('1':_) -> "st"
+ ('2':_) -> "nd"
+ ('3':_) -> "rd"
+ _ -> "th"
+ where
+ daystr = formatTime defaultTimeLocale "%d" t
+
+
+-- | (Un)pickle a UTCTime from a weather forecast's gamedate. Example
+-- input looks like,
+--
+-- When unpickling we get rid of the suffixes \"st\", \"nd\", \"rd\", and
+-- \"th\". During pickling, we add them back based on the last digit
+-- of the date.
+--
+-- Examples:
+--
+-- >>> let tn = text_node "Monday, December 30th"
+-- >>> let (Just gd) = unpickleDoc xp_gamedate tn
+-- >>> gd
+-- 1970-12-30 00:00:00 UTC
+-- >>> pickleDoc xp_gamedate gd
+-- NTree (XTag "/" []) [NTree (XText "Wednesday, December 30th") []]
+--
+xp_gamedate :: PU UTCTime
+xp_gamedate =
+ (to_gamedate, from_gamedate) `xpWrapMaybe` xpText
+ where
+ format = "%A, %B %-d"
+
+ to_gamedate :: String -> Maybe UTCTime
+ to_gamedate s =
+ parseTime defaultTimeLocale format s'
+ where
+ s' = case (reverse s) of
+ (c2:c1:cs) -> let suffix = [c1,c2]
+ in
+ if suffix `elem` ["st","nd","rd","th"]
+ then reverse cs
+ else s -- Unknown suffix, leave it alone.
+ _ -> s -- The String is less than two characters long,
+ -- leave it alone.
--- | Parse a team_id. These are (so far!) three characters long, and
--- not necessarily numeric. For simplicity, we return a 'String'
--- rather than e.g. a @(Char, Char, Char)@. But unpickling will fail
--- if the team_id is longer than three characters.
+
+ from_gamedate :: UTCTime -> String
+ from_gamedate d = s ++ (date_suffix d)
+ where
+ s = formatTime defaultTimeLocale format d
+
+
+
+
+
+
+
+-- | (Un)pickle a UTCTime without the date portion. Doesn't work if
+-- the fields aren't zero-padded to two characters.
+--
+-- /Examples/:
+--
+-- Padded, should work:
+--
+-- >>> let tn = text_node "04:35 PM"
+-- >>> unpickleDoc xp_time tn
+-- Just 1970-01-01 16:35:00 UTC
--
-xp_team_id :: PU String
-xp_team_id =
- (to_team_id, from_team_id) `xpWrapMaybe` xpText
+-- Unpadded, should fail:
+--
+-- >>> let tn = text_node "4:35 PM"
+-- >>> unpickleDoc xp_time tn
+-- Nothing
+--
+xp_time :: PU UTCTime
+xp_time =
+ (to_time, from_time) `xpWrapMaybe` xpText
where
- to_team_id :: String -> Maybe String
- to_team_id s
- | length s <= 3 = Just s
- | otherwise = Nothing
+ to_time :: String -> Maybe UTCTime
+ to_time = parseTime defaultTimeLocale time_format
+
+ from_time :: UTCTime -> String
+ from_time = formatTime defaultTimeLocale time_format
- from_team_id :: String -> String
- from_team_id = id
+
+-- | (Un)pickle a UTCTime without the date portion. This differs from
+-- 'xp_time' in that it uses periods in the AM/PM part, i.e. \"A.M.\"
+-- and \"P.M.\" It also doesn't use padding for the \"hours\" part.
+--
+-- /Examples/:
+--
+-- A standard example of the correct form:
+--
+-- >>> let tn = text_node "11:30 A.M."
+-- >>> let (Just result) = unpickleDoc xp_time_dots tn
+-- >>> result
+-- 1970-01-01 11:30:00 UTC
+-- >>> pickleDoc xp_time_dots result
+-- NTree (XTag "/" []) [NTree (XText "11:30 A.M.") []]
+--
+-- Another miracle, it still parses with a leading zero!
+--
+-- >>> let tn = text_node "01:30 A.M."
+-- >>> unpickleDoc xp_time_dots tn
+-- Just 1970-01-01 01:30:00 UTC
+--
+xp_time_dots :: PU UTCTime
+xp_time_dots =
+ (to_time, from_time) `xpWrapMaybe` xpText
+ where
+ -- | The hours arent padded with zeros.
+ nopad_time_format :: String
+ nopad_time_format = "%-I:%M %p"
+
+ to_time :: String -> Maybe UTCTime
+ to_time = (parseTime defaultTimeLocale nopad_time_format) . (replace "." "")
+
+ from_time :: UTCTime -> String
+ from_time t =
+ replace "AM" "A.M." (replace "PM" "P.M." s)
+ where
+ s = formatTime defaultTimeLocale nopad_time_format t
+
+
+-- | (Un)pickle a UTCTime without the date portion, allowing for a
+-- value of \"TBA\" (which gets translated to 'Nothing').
+--
+-- /Examples/:
+--
+-- A failed parse will return 'Nothing':
+--
+-- >>> let tn = text_node "YO"
+-- >>> unpickleDoc xp_tba_time tn
+-- Just Nothing
+--
+-- And so will parsing a \"TBA\":
+--
+-- >>> let tn = text_node "TBA"
+-- >>> unpickleDoc xp_tba_time tn
+-- Just Nothing
+--
+-- But re-pickling 'Nothing' gives only \"TBA\":
+--
+-- >>> pickleDoc xp_tba_time Nothing
+-- NTree (XTag "/" []) [NTree (XText "TBA") []]
+--
+-- A normal time is also parsed successfully, of course:
+--
+-- >>> let tn = text_node "08:10 PM"
+-- >>> unpickleDoc xp_tba_time tn
+-- Just (Just 1970-01-01 20:10:00 UTC)
+--
+xp_tba_time :: PU (Maybe UTCTime)
+xp_tba_time =
+ (to_time, from_time) `xpWrap` xpText
+ where
+ to_time :: String -> Maybe UTCTime
+ to_time s
+ | s == "TBA" = Nothing
+ | otherwise = parseTime defaultTimeLocale time_format s
+
+ from_time :: Maybe UTCTime -> String
+ from_time Nothing = "TBA"
+ from_time (Just t) = formatTime defaultTimeLocale time_format t
+
+
+
+-- | (Un)pickle the \<time_stamp\> element format to/from a 'UTCTime'.
+-- The time_stamp elements look something like,
+--
+-- \<time_stamp\> January 6, 2014, at 10:11 PM ET \</time_stamp\>
+--
+-- TSN doesn't provide a proper time zone name, only \"ET\" for
+-- \"Eastern Time\". But \"Eastern Time\" changes throughout the
+-- year, depending on one's location, for daylight-savings
+-- time. It's really not any more useful to be off by one hour than
+-- it is to be off by 5 hours, so rather than guess at EDT/EST, we
+-- just store the timestamp as UTC.
+--
+-- Examples:
+--
+-- >>> let tn = text_node " January 6, 2014, at 10:11 PM ET "
+-- >>> let (Just tstamp) = unpickleDoc xp_time_stamp tn
+-- >>> tstamp
+-- 2014-01-06 22:11:00 UTC
+-- >>> pickleDoc xp_time_stamp tstamp
+-- NTree (XTag "/" []) [NTree (XText " January 6, 2014, at 10:11 PM ET ") []]
+--
+xp_time_stamp :: PU UTCTime
+xp_time_stamp =
+ (parse_time_stamp, from_time_stamp) `xpWrapMaybe` xpText
+ where
+ -- | We have to re-pad the time_stamp_format with a leading and
+ -- trailing space; see the documentation of 'time_stamp_format'
+ -- for more information.
+ from_time_stamp :: UTCTime -> String
+ from_time_stamp =
+ formatTime defaultTimeLocale (" " ++ time_stamp_format ++ " ")
+
+
+
+-- | (Un)pickle an ambiguous 12-hour AM/PM time, which is ambiguous
+-- because it's missing the AM/PM part.
+--
+-- Examples:
+--
+-- >>> let tn = text_node "8:00"
+-- >>> unpickleDoc xp_ambiguous_time tn
+-- Just 1970-01-01 08:00:00 UTC
+--
+xp_ambiguous_time :: PU UTCTime
+xp_ambiguous_time =
+ (to_time, from_time) `xpWrapMaybe` xpText
+ where
+ ambiguous_time_format :: String
+ ambiguous_time_format = "%-I:%M"
+
+ to_time :: String -> Maybe UTCTime
+ to_time = parseTime defaultTimeLocale ambiguous_time_format
+
+ from_time :: UTCTime -> String
+ from_time =
+ formatTime defaultTimeLocale ambiguous_time_format
+
+
+-- | Pickle a date value from a \<date\> element as they appear in the
+-- early lines. This is a particularly wacky format, but then so is
+-- the associated time (see 'xp_ambiguous_time').
+--
+-- Examples:
+--
+-- >>> let tn = text_node "SUNDAY, MAY 25TH (05/25/2014)"
+-- >>> let (Just result) = unpickleDoc xp_early_line_date tn
+-- >>> result
+-- 2014-05-25 00:00:00 UTC
+-- >>> pickleDoc xp_early_line_date result
+-- NTree (XTag "/" []) [NTree (XText "SUNDAY, MAY 25TH (05/25/2014)") []]
+--
+-- >>> let tn = text_node "SATURDAY, JUNE 7TH (06/07/2014)"
+-- >>> let (Just result) = unpickleDoc xp_early_line_date tn
+-- >>> result
+-- 2014-06-07 00:00:00 UTC
+-- >>> pickleDoc xp_early_line_date result
+-- NTree (XTag "/" []) [NTree (XText "SATURDAY, JUNE 7TH (06/07/2014)") []]
+--
+xp_early_line_date :: PU UTCTime
+xp_early_line_date =
+ (to_time, from_time) `xpWrapMaybe` xpText
+ where
+ -- | We need to create our own time locale that talks IN ALL CAPS.
+ -- Actually, 'parseTime' doesn't seem to care about the
+ -- case. But when we spit it back out again ('formatTime'),
+ -- we'll want it to be in all caps.
+ --
+ caps_time_locale :: TimeLocale
+ caps_time_locale =
+ defaultTimeLocale { wDays = caps_days, months = caps_months }
+
+ caps_days :: [(String,String)]
+ caps_days = map both_to_upper (wDays defaultTimeLocale)
+
+ caps_months :: [(String,String)]
+ caps_months = map both_to_upper (months defaultTimeLocale)
+
+ both_to_upper :: (String,String) -> (String,String)
+ both_to_upper (s1,s2) = (map toUpper s1, map toUpper s2)
+
+ wacko_date_formats :: [String]
+ wacko_date_formats =
+ ["%A, %B %-d" ++ suffix ++ " (" ++ date_format_padded ++ ")" |
+ suffix <- ["ST", "ND", "RD","TH"] ]
+
+ to_time :: String -> Maybe UTCTime
+ to_time s =
+ listToMaybe $ catMaybes possible_parses
+ where
+ possible_parses = [ parseTime caps_time_locale fmt s |
+ fmt <- wacko_date_formats ]
+
+ from_time :: UTCTime -> String
+ from_time t =
+ formatTime caps_time_locale fmt t
+ where
+ upper_suffix = map toUpper (date_suffix t)
+ fmt = "%A, %B %-d" ++ upper_suffix ++ " (" ++ date_format_padded ++ ")"
+
+
+
+-- | Create an 'XmlTree' containing only the given text. This is
+-- useful for testing (un)picklers, where we don't want to have to
+-- bother to create a dummy XML document.
+--
+-- Examples:
+--
+-- >>> text_node "8:00"
+-- NTree (XText "8:00") []
+--
+text_node :: String -> XmlTree
+text_node s = NTree (XText s) []
+
+
+
+--
+-- * Tasty Tests
+--
+
+-- | A list of all tests for this module. This primary exists to
+-- eliminate the unused import/export warnings for 'unpickleDoc' and
+-- 'text_node' which are otherwise only used in the doctests.
+--
+pickler_tests :: TestTree
+pickler_tests =
+ testGroup
+ "Pickler tests"
+ [ test_pickle_of_unpickle_is_identity ]
+
+
+-- | If we unpickle something and then pickle it, we should wind up
+-- with the same thing we started with (plus an additional root
+-- element).
+--
+test_pickle_of_unpickle_is_identity :: TestTree
+test_pickle_of_unpickle_is_identity =
+ testCase "pickle composed with unpickle is (almost) the identity" $ do
+ let tn = text_node "8:00"
+ let (Just utctime) = unpickleDoc xp_ambiguous_time tn
+ let actual = pickleDoc xp_ambiguous_time utctime
+ let expected = NTree (XTag (mkName "/") []) [tn]
+ actual @?= expected