X-Git-Url: http://gitweb.michael.orlitzky.com/?a=blobdiff_plain;f=src%2FTSN%2FPicklers.hs;h=a64db8a2023760375ac467d888b45f96294d7322;hb=acd67d75aab3f2350b488fe6402bc0a20e476a18;hp=387f4cf4fe4926d0c436678ca0e2ed509b075049;hpb=8c663269b4a2981fd35b60dcaad8ba2de9915dc2;p=dead%2Fhtsn-import.git diff --git a/src/TSN/Picklers.hs b/src/TSN/Picklers.hs index 387f4cf..a64db8a 100644 --- a/src/TSN/Picklers.hs +++ b/src/TSN/Picklers.hs @@ -2,26 +2,40 @@ -- feed. -- module TSN.Picklers ( + pickler_tests, + xp_ambiguous_time, xp_date, xp_date_padded, + xp_datetime, xp_earnings, xp_gamedate, - xp_racedate, xp_tba_time, xp_time, + xp_time_dots, xp_time_stamp ) where -- System imports. +import Data.List ( intercalate ) +import Data.List.Split ( chunksOf ) import Data.String.Utils ( replace ) import Data.Time.Clock ( NominalDiffTime, UTCTime, addUTCTime ) import Data.Time.Format ( formatTime, parseTime ) +import Data.Tree.NTree.TypeDefs ( NTree(..) ) import System.Locale ( defaultTimeLocale ) +import Test.Tasty ( TestTree, testGroup ) +import Test.Tasty.HUnit ( (@?=), testCase ) import Text.XML.HXT.Arrow.Pickle ( xpText, xpWrap, xpWrapMaybe ) import Text.XML.HXT.Arrow.Pickle.Xml ( PU ) +import Text.XML.HXT.Core ( + XmlTree, + XNode( XTag, XText ), + mkName, + pickleDoc, + unpickleDoc ) -- Local imports. import TSN.Parse ( @@ -33,8 +47,9 @@ import TSN.Parse ( -- | The format string for a base date in m/d/yyyy format. The -- day/month are not padded at all. This will match for example, -- --- * 2/15/1983 --- * 1/1/0000 +-- * 2\/15\/1983 +-- +-- * 1\/1\/0000 -- date_format :: String date_format = "%-m/%-d/%Y" @@ -44,8 +59,9 @@ date_format = "%-m/%-d/%Y" -- day/month are padded to two characters with zeros. This will -- match for example, -- --- * 02/15/1983 --- * 01/01/0000 +-- * 02\/15\/1983 +-- +-- * 01\/01\/0000 -- date_format_padded :: String date_format_padded = "%0m/%0d/%Y" @@ -53,6 +69,21 @@ date_format_padded = "%0m/%0d/%Y" -- | (Un)pickle a UTCTime without the time portion. -- +-- /Examples/: +-- +-- This should parse: +-- +-- >>> let tn = text_node "2/15/1983" +-- >>> unpickleDoc xp_date tn +-- Just 1983-02-15 00:00:00 UTC +-- +-- But for some reason, it can also parse a leading zero in the +-- month. Whatever. This isn't required behavior. +-- +-- >>> let tn = text_node "02/15/1983" +-- >>> unpickleDoc xp_date tn +-- Just 1983-02-15 00:00:00 UTC +-- xp_date :: PU UTCTime xp_date = (to_date, from_date) `xpWrapMaybe` xpText @@ -67,6 +98,12 @@ xp_date = -- | (Un)pickle a UTCTime without the time portion. The day/month are -- padded to two characters with zeros. -- +-- Examples: +-- +-- >>> let tn = text_node "02/15/1983" +-- >>> unpickleDoc xp_date_padded tn +-- Just 1983-02-15 00:00:00 UTC +-- xp_date_padded :: PU UTCTime xp_date_padded = (to_date, from_date) `xpWrapMaybe` xpText @@ -78,12 +115,50 @@ xp_date_padded = from_date = formatTime defaultTimeLocale date_format_padded + +-- | Format a number as a string using a comma as the thousands +-- separator. +-- +-- Examples: +-- +-- >>> format_commas 0 +-- "0" +-- >>> format_commas 10 +-- "10" +-- >>> format_commas 100 +-- "100" +-- >>> format_commas 1000 +-- "1,000" +-- >>> format_commas 10000 +-- "10,000" +-- >>> format_commas 100000 +-- "100,000" +-- >>> format_commas 1000000 +-- "1,000,000" +-- +format_commas :: Int -> String +format_commas x = + reverse (intercalate "," $ chunksOf 3 $ reverse $ show x) + + -- | Parse \ from an 'AutoRaceResultsListing'. These are -- essentially 'Int's, but they look like, -- --- * 336,826 --- * 1,000,191 --- * TBA +-- * \336,826\ +-- +-- * \1,000,191\ +-- +-- * \TBA\ +-- +-- Examples: +-- +-- >>> let tn = text_node "1,000,191" +-- >>> unpickleDoc xp_earnings tn +-- Just (Just 1000191) +-- +-- >>> let tn = text_node "TBA" +-- >>> unpickleDoc xp_earnings tn +-- Just Nothing -- xp_earnings :: PU (Maybe Int) xp_earnings = @@ -95,43 +170,63 @@ xp_earnings = to_earnings :: String -> Maybe Int to_earnings s | s == "TBA" = Nothing - | otherwise = (read . strip_commas . show) s + | otherwise = Just $ (read . strip_commas) s from_earnings :: Maybe Int -> String - from_earnings Nothing = "" - from_earnings (Just i) = show i + from_earnings Nothing = "TBA" + from_earnings (Just i) = format_commas i + --- | (Un)pickle a 'UTCTime' from a \ element in an --- 'AutoRaceResults' message. +-- | (Un)pickle an unpadded 'UTCTime'. Used for example on the +-- \ elements in an 'AutoRaceResults' message. -- --- Example: +-- Examples: -- --- 6/1/2014 1:00:00 PM --- 5/24/2014 2:45:00 PM +-- >>> let tn = text_node "6/1/2014 1:00:00 PM" +-- >>> unpickleDoc xp_datetime tn +-- Just 2014-06-01 13:00:00 UTC -- -xp_racedate :: PU UTCTime -xp_racedate = - (to_racedate, from_racedate) `xpWrapMaybe` xpText +-- >>> let tn = text_node "5/24/2014 2:45:00 PM" +-- >>> unpickleDoc xp_datetime tn +-- Just 2014-05-24 14:45:00 UTC +-- +-- Padded! For some reason it works with only one zero in front. I +-- dunno man. NOT required (or even desired?) behavior. +-- +-- >>> let tn = text_node "05/24/2014 2:45:00 PM" +-- >>> unpickleDoc xp_datetime tn +-- Just 2014-05-24 14:45:00 UTC +-- +xp_datetime :: PU UTCTime +xp_datetime = + (to_datetime, from_datetime) `xpWrapMaybe` xpText where format = date_format ++ " " ++ "%-I:%M:%S %p" - to_racedate :: String -> Maybe UTCTime - to_racedate = parseTime defaultTimeLocale format + to_datetime :: String -> Maybe UTCTime + to_datetime = parseTime defaultTimeLocale format - from_racedate :: UTCTime -> String - from_racedate = formatTime defaultTimeLocale format + from_datetime :: UTCTime -> String + from_datetime = formatTime defaultTimeLocale format -- | (Un)pickle a UTCTime from a weather forecast's gamedate. Example -- input looks like, -- --- \ --- -- When unpickling we get rid of the suffixes \"st\", \"nd\", \"rd\", and -- \"th\". During pickling, we add them back based on the last digit -- of the date. -- +-- Examples: +-- +-- >>> let tn = text_node "Monday, December 30th" +-- >>> let (Just gd) = unpickleDoc xp_gamedate tn +-- >>> gd +-- 1970-12-30 00:00:00 UTC +-- >>> pickleDoc xp_gamedate gd +-- NTree (XTag "/" []) [NTree (XText "Wednesday, December 30th") []] +-- xp_gamedate :: PU UTCTime xp_gamedate = (to_gamedate, from_gamedate) `xpWrapMaybe` xpText @@ -174,7 +269,22 @@ xp_gamedate = --- | (Un)pickle a UTCTime without the date portion. +-- | (Un)pickle a UTCTime without the date portion. Doesn't work if +-- the fields aren't zero-padded to two characters. +-- +-- /Examples/: +-- +-- Padded, should work: +-- +-- >>> let tn = text_node "04:35 PM" +-- >>> unpickleDoc xp_time tn +-- Just 1970-01-01 16:35:00 UTC +-- +-- Unpadded, should fail: +-- +-- >>> let tn = text_node "4:35 PM" +-- >>> unpickleDoc xp_time tn +-- Nothing -- xp_time :: PU UTCTime xp_time = @@ -187,9 +297,73 @@ xp_time = from_time = formatTime defaultTimeLocale time_format +-- | (Un)pickle a UTCTime without the date portion. This differs from +-- 'xp_time' in that it uses periods in the AM/PM part, i.e. \"A.M.\" +-- and \"P.M.\" It also doesn't use padding for the \"hours\" part. +-- +-- /Examples/: +-- +-- A standard example of the correct form: +-- +-- >>> let tn = text_node "11:30 A.M." +-- >>> let (Just result) = unpickleDoc xp_time_dots tn +-- >>> result +-- 1970-01-01 11:30:00 UTC +-- >>> pickleDoc xp_time_dots result +-- NTree (XTag "/" []) [NTree (XText "11:30 A.M.") []] +-- +-- Another miracle, it still parses with a leading zero! +-- +-- >>> let tn = text_node "01:30 A.M." +-- >>> unpickleDoc xp_time_dots tn +-- Just 1970-01-01 01:30:00 UTC +-- +xp_time_dots :: PU UTCTime +xp_time_dots = + (to_time, from_time) `xpWrapMaybe` xpText + where + -- | The hours arent padded with zeros. + nopad_time_format :: String + nopad_time_format = "%-I:%M %p" + + to_time :: String -> Maybe UTCTime + to_time = (parseTime defaultTimeLocale nopad_time_format) . (replace "." "") + + from_time :: UTCTime -> String + from_time t = + replace "AM" "A.M." (replace "PM" "P.M." s) + where + s = formatTime defaultTimeLocale nopad_time_format t + + -- | (Un)pickle a UTCTime without the date portion, allowing for a -- value of \"TBA\" (which gets translated to 'Nothing'). -- +-- /Examples/: +-- +-- A failed parse will return 'Nothing': +-- +-- >>> let tn = text_node "YO" +-- >>> unpickleDoc xp_tba_time tn +-- Just Nothing +-- +-- And so will parsing a \"TBA\": +-- +-- >>> let tn = text_node "TBA" +-- >>> unpickleDoc xp_tba_time tn +-- Just Nothing +-- +-- But re-pickling 'Nothing' gives only \"TBA\": +-- +-- >>> pickleDoc xp_tba_time Nothing +-- NTree (XTag "/" []) [NTree (XText "TBA") []] +-- +-- A normal time is also parsed successfully, of course: +-- +-- >>> let tn = text_node "08:10 PM" +-- >>> unpickleDoc xp_tba_time tn +-- Just (Just 1970-01-01 20:10:00 UTC) +-- xp_tba_time :: PU (Maybe UTCTime) xp_tba_time = (to_time, from_time) `xpWrap` xpText @@ -200,19 +374,26 @@ xp_tba_time = | otherwise = parseTime defaultTimeLocale time_format s from_time :: Maybe UTCTime -> String - from_time Nothing = "" + from_time Nothing = "TBA" from_time (Just t) = formatTime defaultTimeLocale time_format t -- | (Un)pickle the \ element format to/from a 'UTCTime'. +-- The time_stamp elements look something like, -- --- Example: \ January 6, 2014, at 10:11 PM ET \ +-- \ January 6, 2014, at 10:11 PM ET \ -- -- TSN doesn't provide a proper time zone name, so we assume that -- it's always Eastern Standard Time. EST is UTC-5, so we -- add/subtract 5 hours to convert to/from UTC. -- +-- Examples: +-- +-- >>> let tn = text_node " January 6, 2014, at 10:11 PM ET " +-- >>> unpickleDoc xp_time_stamp tn +-- Just 2014-01-07 03:11:00 UTC +-- xp_time_stamp :: PU UTCTime xp_time_stamp = (parse_time_stamp, from_time_stamp) `xpWrapMaybe` xpText @@ -226,3 +407,70 @@ xp_time_stamp = from_time_stamp :: UTCTime -> String from_time_stamp = formatTime defaultTimeLocale time_stamp_format . subtract_five + + +-- | (Un)pickle an ambiguous 12-hour AM/PM time, which is ambiguous +-- because it's missing the AM/PM part. +-- +-- Examples: +-- +-- >>> let tn = text_node "8:00" +-- >>> unpickleDoc xp_ambiguous_time tn +-- Just 1970-01-01 08:00:00 UTC +-- +xp_ambiguous_time :: PU UTCTime +xp_ambiguous_time = + (to_time, from_time) `xpWrapMaybe` xpText + where + ambiguous_time_format :: String + ambiguous_time_format = "%-I:%M" + + to_time :: String -> Maybe UTCTime + to_time = parseTime defaultTimeLocale ambiguous_time_format + + from_time :: UTCTime -> String + from_time = + formatTime defaultTimeLocale ambiguous_time_format + + +-- | Create an 'XmlTree' containing only the given text. This is +-- useful for testing (un)picklers, where we don't want to have to +-- bother to create a dummy XML document. +-- +-- Examples: +-- +-- >>> text_node "8:00" +-- NTree (XText "8:00") [] +-- +text_node :: String -> XmlTree +text_node s = NTree (XText s) [] + + + +-- +-- * Tasty Tests +-- + +-- | A list of all tests for this module. This primary exists to +-- eliminate the unused import/export warnings for 'unpickleDoc' and +-- 'text_node' which are otherwise only used in the doctests. +-- +pickler_tests :: TestTree +pickler_tests = + testGroup + "Pickler tests" + [ test_pickle_of_unpickle_is_identity ] + + +-- | If we unpickle something and then pickle it, we should wind up +-- with the same thing we started with (plus an additional root +-- element). +-- +test_pickle_of_unpickle_is_identity :: TestTree +test_pickle_of_unpickle_is_identity = + testCase "pickle composed with unpickle is (almost) the identity" $ do + let tn = text_node "8:00" + let (Just utctime) = unpickleDoc xp_ambiguous_time tn + let actual = pickleDoc xp_ambiguous_time utctime + let expected = NTree (XTag (mkName "/") []) [tn] + actual @?= expected