X-Git-Url: http://gitweb.michael.orlitzky.com/?a=blobdiff_plain;f=src%2FTSN%2FPicklers.hs;h=4f0020b6ad167123c10a66224a0b5092f09b8489;hb=7b33eb87f59d46ec97d619f7521c067d8a6b9308;hp=232f3579c7a6f257c018b7ad24d6d4c01e40c095;hpb=84b0b8621593800f0fba275b86ad9f9961a07530;p=dead%2Fhtsn-import.git diff --git a/src/TSN/Picklers.hs b/src/TSN/Picklers.hs index 232f357..4f0020b 100644 --- a/src/TSN/Picklers.hs +++ b/src/TSN/Picklers.hs @@ -1,15 +1,20 @@ +{-# LANGUAGE ScopedTypeVariables #-} + -- | (Un)picklers for data types present in The Sports Network XML -- feed. -- module TSN.Picklers ( pickler_tests, xp_ambiguous_time, + xp_attr_option, xp_date, xp_date_padded, xp_datetime, xp_early_line_date, xp_earnings, + xp_fracpart_only_double, xp_gamedate, + xp_tba_int, xp_tba_time, xp_time, xp_time_dots, @@ -22,12 +27,13 @@ import Data.List ( intercalate ) import Data.List.Split ( chunksOf ) import Data.Maybe ( catMaybes, listToMaybe ) import Data.String.Utils ( replace ) -import Data.Time.Clock ( NominalDiffTime, UTCTime, addUTCTime ) +import Data.Time.Clock ( UTCTime ) import Data.Time.Format ( formatTime, parseTime ) import Data.Tree.NTree.TypeDefs ( NTree(..) ) import System.Locale ( TimeLocale( wDays, months ), defaultTimeLocale ) import Test.Tasty ( TestTree, testGroup ) import Test.Tasty.HUnit ( (@?=), testCase ) +import Text.Read ( readMaybe ) import Text.XML.HXT.Arrow.Pickle ( xpText, xpWrap, @@ -107,6 +113,10 @@ xp_date = -- >>> unpickleDoc xp_date_padded tn -- Just 1983-02-15 00:00:00 UTC -- +-- >>> let tn = text_node "06/07/2014" +-- >>> unpickleDoc xp_date_padded tn +-- Just 2014-06-07 00:00:00 UTC +-- xp_date_padded :: PU UTCTime xp_date_padded = (to_date, from_date) `xpWrapMaybe` xpText @@ -144,6 +154,7 @@ format_commas x = reverse (intercalate "," $ chunksOf 3 $ reverse $ show x) + -- | Parse \ from an 'AutoRaceResultsListing'. These are -- essentially 'Int's, but they look like, -- @@ -181,6 +192,45 @@ xp_earnings = +-- | Pickle a 'Double' that can be missing its leading zero (for +-- values less than one). For example, we've seen, +-- +-- 0.5 +-- +-- Which 'xpPrim' can't handle without the leading +-- zero. Unfortunately there's no way pickle/unpickle can be +-- inverses of each other here, since \"0.5\" and \".5\" should +-- unpickle to the same 'Double'. +-- +-- Examples: +-- +-- >>> let tn = text_node "0.5" +-- >>> unpickleDoc xp_fracpart_only_double tn +-- Just 0.5 +-- +-- >>> let tn = text_node ".5" +-- >>> unpickleDoc xp_fracpart_only_double tn +-- Just 0.5 +-- +-- >>> let tn = text_node "foo" +-- >>> unpickleDoc xp_fracpart_only_double tn +-- Nothing +-- +xp_fracpart_only_double :: PU Double +xp_fracpart_only_double = + (to_double, from_double) `xpWrapMaybe` xpText + where + -- | Convert a 'String' to a 'Double', maybe. We always prepend a + -- zero, since it will fix the fraction-only values, and not hurt + -- the ones that already have a leading integer. + to_double :: String -> Maybe Double + to_double s = readMaybe ("0" ++ s) + + from_double :: Double -> String + from_double = show + + + -- | (Un)pickle an unpadded 'UTCTime'. Used for example on the -- \ elements in an 'AutoRaceResults' message. -- @@ -222,26 +272,16 @@ xp_datetime = -- -- Examples: -- +-- >>> import Data.Maybe ( fromJust ) -- >>> :{ --- let parse_date :: String -> Maybe UTCTime; --- parse_date = parseTime defaultTimeLocale date_format; +-- let parse_date :: String -> Maybe UTCTime +-- parse_date = parseTime defaultTimeLocale date_format -- :} -- --- >>> let (Just d1) = parse_date "1/1/1970" --- >>> date_suffix d1 --- "st" --- --- >>> let (Just d2) = parse_date "1/2/1970" --- >>> date_suffix d2 --- "nd" --- --- >>> let (Just d3) = parse_date "1/3/1970" --- >>> date_suffix d3 --- "rd" --- --- >>> let (Just d4) = parse_date "1/4/1970" --- >>> date_suffix d4 --- "th" +-- >>> let dates = [ "1/" ++ (d : "/1970") | d <- ['1'..'9'] ] +-- >>> let suffixes = map (date_suffix . fromJust . parse_date) dates +-- >>> suffixes +-- ["st","nd","rd","th","th","th","th","th","th"] -- date_suffix :: UTCTime -> String date_suffix t = @@ -412,35 +452,79 @@ xp_tba_time = from_time (Just t) = formatTime defaultTimeLocale time_format t +-- | (Un)pickle a 'Int', allowing for a value of \"TBA\" (which gets +-- translated to 'Nothing'). +-- +-- /Examples/: +-- +-- A failed parse will return 'Nothing': +-- +-- >>> let tn = text_node "YO" +-- >>> unpickleDoc xp_tba_int tn +-- Just Nothing +-- +-- And so will parsing a \"TBA\": +-- +-- >>> let tn = text_node "TBA" +-- >>> unpickleDoc xp_tba_int tn +-- Just Nothing +-- +-- But re-pickling 'Nothing' gives only \"TBA\": +-- +-- >>> pickleDoc xp_tba_int Nothing +-- NTree (XTag "/" []) [NTree (XText "TBA") []] +-- +-- A normal integer is also parsed successfully, of course: +-- +-- >>> let tn = text_node "110" +-- >>> unpickleDoc xp_tba_int tn +-- Just (Just 110) +-- +xp_tba_int :: PU (Maybe Int) +xp_tba_int = + (to_int, from_int) `xpWrap` xpText + where + to_int :: String -> Maybe Int + to_int = readMaybe + + from_int :: Maybe Int -> String + from_int Nothing = "TBA" + from_int (Just t) = show t + + -- | (Un)pickle the \ element format to/from a 'UTCTime'. -- The time_stamp elements look something like, -- -- \ January 6, 2014, at 10:11 PM ET \ -- --- TSN doesn't provide a proper time zone name, so we assume that --- it's always Eastern Standard Time. EST is UTC-5, so we --- add/subtract 5 hours to convert to/from UTC. +-- TSN doesn't provide a proper time zone name, only \"ET\" for +-- \"Eastern Time\". But \"Eastern Time\" changes throughout the +-- year, depending on one's location, for daylight-savings +-- time. It's really not any more useful to be off by one hour than +-- it is to be off by 5 hours, so rather than guess at EDT/EST, we +-- just store the timestamp as UTC. -- -- Examples: -- -- >>> let tn = text_node " January 6, 2014, at 10:11 PM ET " --- >>> unpickleDoc xp_time_stamp tn --- Just 2014-01-07 03:11:00 UTC +-- >>> let (Just tstamp) = unpickleDoc xp_time_stamp tn +-- >>> tstamp +-- 2014-01-06 22:11:00 UTC +-- >>> pickleDoc xp_time_stamp tstamp +-- NTree (XTag "/" []) [NTree (XText " January 6, 2014, at 10:11 PM ET ") []] -- xp_time_stamp :: PU UTCTime xp_time_stamp = (parse_time_stamp, from_time_stamp) `xpWrapMaybe` xpText where - five_hours :: NominalDiffTime - five_hours = 5 * 60 * 60 - - subtract_five :: UTCTime -> UTCTime - subtract_five = addUTCTime (-1 * five_hours) - + -- | We have to re-pad the time_stamp_format with a leading and + -- trailing space; see the documentation of 'time_stamp_format' + -- for more information. from_time_stamp :: UTCTime -> String from_time_stamp = - formatTime defaultTimeLocale time_stamp_format . subtract_five + formatTime defaultTimeLocale (" " ++ time_stamp_format ++ " ") + -- | (Un)pickle an ambiguous 12-hour AM/PM time, which is ambiguous @@ -480,6 +564,13 @@ xp_ambiguous_time = -- >>> pickleDoc xp_early_line_date result -- NTree (XTag "/" []) [NTree (XText "SUNDAY, MAY 25TH (05/25/2014)") []] -- +-- >>> let tn = text_node "SATURDAY, JUNE 7TH (06/07/2014)" +-- >>> let (Just result) = unpickleDoc xp_early_line_date tn +-- >>> result +-- 2014-06-07 00:00:00 UTC +-- >>> pickleDoc xp_early_line_date result +-- NTree (XTag "/" []) [NTree (XText "SATURDAY, JUNE 7TH (06/07/2014)") []] +-- xp_early_line_date :: PU UTCTime xp_early_line_date = (to_time, from_time) `xpWrapMaybe` xpText @@ -504,7 +595,7 @@ xp_early_line_date = wacko_date_formats :: [String] wacko_date_formats = - ["%A, %B %d" ++ suffix ++ " (" ++ date_format_padded ++ ")" | + ["%A, %B %-d" ++ suffix ++ " (" ++ date_format_padded ++ ")" | suffix <- ["ST", "ND", "RD","TH"] ] to_time :: String -> Maybe UTCTime @@ -519,7 +610,26 @@ xp_early_line_date = formatTime caps_time_locale fmt t where upper_suffix = map toUpper (date_suffix t) - fmt = "%A, %B %d" ++ upper_suffix ++ " (" ++ date_format_padded ++ ")" + fmt = "%A, %B %-d" ++ upper_suffix ++ " (" ++ date_format_padded ++ ")" + + +-- | This is a replacement for @xpOption xpFoo@ within an 'xpAttr'. +-- There's a bug in +-- newer versions of HXT that prevents us from using the usual +-- 'xpOption' solution, so this is our stopgap. It should work on +-- any type that can be unpickled with a plain read/show. +-- +xp_attr_option :: forall a. (Read a, Show a) => PU (Maybe a) +xp_attr_option = + (to_a, from_a) `xpWrap` xpText + where + to_a :: String -> Maybe a + to_a = readMaybe + + from_a :: Maybe a -> String + from_a Nothing = "" + from_a (Just x) = show x + -- | Create an 'XmlTree' containing only the given text. This is -- useful for testing (un)picklers, where we don't want to have to