X-Git-Url: http://gitweb.michael.orlitzky.com/?a=blobdiff_plain;f=src%2FTSN%2FPicklers.hs;h=232f3579c7a6f257c018b7ad24d6d4c01e40c095;hb=84b0b8621593800f0fba275b86ad9f9961a07530;hp=a8b0567819c89a1c03297a086ac4165a6f77c7e7;hpb=86b11d329ffa739f9b3ae83997e6bbb38af19b9b;p=dead%2Fhtsn-import.git diff --git a/src/TSN/Picklers.hs b/src/TSN/Picklers.hs index a8b0567..232f357 100644 --- a/src/TSN/Picklers.hs +++ b/src/TSN/Picklers.hs @@ -7,6 +7,7 @@ module TSN.Picklers ( xp_date, xp_date_padded, xp_datetime, + xp_early_line_date, xp_earnings, xp_gamedate, xp_tba_time, @@ -16,13 +17,15 @@ module TSN.Picklers ( where -- System imports. +import Data.Char ( toUpper ) import Data.List ( intercalate ) import Data.List.Split ( chunksOf ) +import Data.Maybe ( catMaybes, listToMaybe ) import Data.String.Utils ( replace ) import Data.Time.Clock ( NominalDiffTime, UTCTime, addUTCTime ) import Data.Time.Format ( formatTime, parseTime ) import Data.Tree.NTree.TypeDefs ( NTree(..) ) -import System.Locale ( defaultTimeLocale ) +import System.Locale ( TimeLocale( wDays, months ), defaultTimeLocale ) import Test.Tasty ( TestTree, testGroup ) import Test.Tasty.HUnit ( (@?=), testCase ) import Text.XML.HXT.Arrow.Pickle ( @@ -69,6 +72,21 @@ date_format_padded = "%0m/%0d/%Y" -- | (Un)pickle a UTCTime without the time portion. -- +-- /Examples/: +-- +-- This should parse: +-- +-- >>> let tn = text_node "2/15/1983" +-- >>> unpickleDoc xp_date tn +-- Just 1983-02-15 00:00:00 UTC +-- +-- But for some reason, it can also parse a leading zero in the +-- month. Whatever. This isn't required behavior. +-- +-- >>> let tn = text_node "02/15/1983" +-- >>> unpickleDoc xp_date tn +-- Just 1983-02-15 00:00:00 UTC +-- xp_date :: PU UTCTime xp_date = (to_date, from_date) `xpWrapMaybe` xpText @@ -83,6 +101,12 @@ xp_date = -- | (Un)pickle a UTCTime without the time portion. The day/month are -- padded to two characters with zeros. -- +-- Examples: +-- +-- >>> let tn = text_node "02/15/1983" +-- >>> unpickleDoc xp_date_padded tn +-- Just 1983-02-15 00:00:00 UTC +-- xp_date_padded :: PU UTCTime xp_date_padded = (to_date, from_date) `xpWrapMaybe` xpText @@ -119,6 +143,7 @@ format_commas :: Int -> String format_commas x = reverse (intercalate "," $ chunksOf 3 $ reverse $ show x) + -- | Parse \ from an 'AutoRaceResultsListing'. These are -- essentially 'Int's, but they look like, -- @@ -128,6 +153,16 @@ format_commas x = -- -- * \TBA\ -- +-- Examples: +-- +-- >>> let tn = text_node "1,000,191" +-- >>> unpickleDoc xp_earnings tn +-- Just (Just 1000191) +-- +-- >>> let tn = text_node "TBA" +-- >>> unpickleDoc xp_earnings tn +-- Just Nothing +-- xp_earnings :: PU (Maybe Int) xp_earnings = (to_earnings, from_earnings) `xpWrap` xpText @@ -145,14 +180,26 @@ xp_earnings = from_earnings (Just i) = format_commas i + -- | (Un)pickle an unpadded 'UTCTime'. Used for example on the -- \ elements in an 'AutoRaceResults' message. -- -- Examples: -- --- * \6/1/2014 1:00:00 PM\ +-- >>> let tn = text_node "6/1/2014 1:00:00 PM" +-- >>> unpickleDoc xp_datetime tn +-- Just 2014-06-01 13:00:00 UTC +-- +-- >>> let tn = text_node "5/24/2014 2:45:00 PM" +-- >>> unpickleDoc xp_datetime tn +-- Just 2014-05-24 14:45:00 UTC +-- +-- Padded! For some reason it works with only one zero in front. I +-- dunno man. NOT required (or even desired?) behavior. -- --- * \5/24/2014 2:45:00 PM\ +-- >>> let tn = text_node "05/24/2014 2:45:00 PM" +-- >>> unpickleDoc xp_datetime tn +-- Just 2014-05-24 14:45:00 UTC -- xp_datetime :: PU UTCTime xp_datetime = @@ -167,15 +214,63 @@ xp_datetime = from_datetime = formatTime defaultTimeLocale format + +-- | Takes a 'UTCTime', and returns the English suffix that would be +-- appropriate after the day of the month. For example, if we have a +-- UTCTime representing Christmas, this would return \"th\" because +-- \"th\" is the right suffix of \"December 25th\". +-- +-- Examples: +-- +-- >>> :{ +-- let parse_date :: String -> Maybe UTCTime; +-- parse_date = parseTime defaultTimeLocale date_format; +-- :} +-- +-- >>> let (Just d1) = parse_date "1/1/1970" +-- >>> date_suffix d1 +-- "st" +-- +-- >>> let (Just d2) = parse_date "1/2/1970" +-- >>> date_suffix d2 +-- "nd" +-- +-- >>> let (Just d3) = parse_date "1/3/1970" +-- >>> date_suffix d3 +-- "rd" +-- +-- >>> let (Just d4) = parse_date "1/4/1970" +-- >>> date_suffix d4 +-- "th" +-- +date_suffix :: UTCTime -> String +date_suffix t = + case (reverse daystr) of + [] -> [] + ('1':_) -> "st" + ('2':_) -> "nd" + ('3':_) -> "rd" + _ -> "th" + where + daystr = formatTime defaultTimeLocale "%d" t + + -- | (Un)pickle a UTCTime from a weather forecast's gamedate. Example -- input looks like, -- --- \ --- -- When unpickling we get rid of the suffixes \"st\", \"nd\", \"rd\", and -- \"th\". During pickling, we add them back based on the last digit -- of the date. -- +-- Examples: +-- +-- >>> let tn = text_node "Monday, December 30th" +-- >>> let (Just gd) = unpickleDoc xp_gamedate tn +-- >>> gd +-- 1970-12-30 00:00:00 UTC +-- >>> pickleDoc xp_gamedate gd +-- NTree (XTag "/" []) [NTree (XText "Wednesday, December 30th") []] +-- xp_gamedate :: PU UTCTime xp_gamedate = (to_gamedate, from_gamedate) `xpWrapMaybe` xpText @@ -189,36 +284,41 @@ xp_gamedate = s' = case (reverse s) of (c2:c1:cs) -> let suffix = [c1,c2] in - case suffix of - "st" -> reverse cs - "nd" -> reverse cs - "rd" -> reverse cs - "th" -> reverse cs - _ -> s -- Unknown suffix, leave it alone. + if suffix `elem` ["st","nd","rd","th"] + then reverse cs + else s -- Unknown suffix, leave it alone. + _ -> s -- The String is less than two characters long, -- leave it alone. from_gamedate :: UTCTime -> String - from_gamedate d = s ++ (suffix s) + from_gamedate d = s ++ (date_suffix d) where s = formatTime defaultTimeLocale format d - suffix :: String -> String - suffix cs = - case (reverse cs) of - [] -> [] - ('1':_) -> "st" - ('2':_) -> "nd" - ('3':_) -> "rd" - _ -> "th" --- | (Un)pickle a UTCTime without the date portion. +-- | (Un)pickle a UTCTime without the date portion. Doesn't work if +-- the fields aren't zero-padded to two characters. +-- +-- /Examples/: +-- +-- Padded, should work: +-- +-- >>> let tn = text_node "04:35 PM" +-- >>> unpickleDoc xp_time tn +-- Just 1970-01-01 16:35:00 UTC +-- +-- Unpadded, should fail: +-- +-- >>> let tn = text_node "4:35 PM" +-- >>> unpickleDoc xp_time tn +-- Nothing -- xp_time :: PU UTCTime xp_time = @@ -235,9 +335,22 @@ xp_time = -- 'xp_time' in that it uses periods in the AM/PM part, i.e. \"A.M.\" -- and \"P.M.\" It also doesn't use padding for the \"hours\" part. -- --- Examples: +-- /Examples/: +-- +-- A standard example of the correct form: +-- +-- >>> let tn = text_node "11:30 A.M." +-- >>> let (Just result) = unpickleDoc xp_time_dots tn +-- >>> result +-- 1970-01-01 11:30:00 UTC +-- >>> pickleDoc xp_time_dots result +-- NTree (XTag "/" []) [NTree (XText "11:30 A.M.") []] +-- +-- Another miracle, it still parses with a leading zero! -- --- * \11:30 A.M.\ +-- >>> let tn = text_node "01:30 A.M." +-- >>> unpickleDoc xp_time_dots tn +-- Just 1970-01-01 01:30:00 UTC -- xp_time_dots :: PU UTCTime xp_time_dots = @@ -354,6 +467,60 @@ xp_ambiguous_time = formatTime defaultTimeLocale ambiguous_time_format +-- | Pickle a date value from a \ element as they appear in the +-- early lines. This is a particularly wacky format, but then so is +-- the associated time (see 'xp_ambiguous_time'). +-- +-- Examples: +-- +-- >>> let tn = text_node "SUNDAY, MAY 25TH (05/25/2014)" +-- >>> let (Just result) = unpickleDoc xp_early_line_date tn +-- >>> result +-- 2014-05-25 00:00:00 UTC +-- >>> pickleDoc xp_early_line_date result +-- NTree (XTag "/" []) [NTree (XText "SUNDAY, MAY 25TH (05/25/2014)") []] +-- +xp_early_line_date :: PU UTCTime +xp_early_line_date = + (to_time, from_time) `xpWrapMaybe` xpText + where + -- | We need to create our own time locale that talks IN ALL CAPS. + -- Actually, 'parseTime' doesn't seem to care about the + -- case. But when we spit it back out again ('formatTime'), + -- we'll want it to be in all caps. + -- + caps_time_locale :: TimeLocale + caps_time_locale = + defaultTimeLocale { wDays = caps_days, months = caps_months } + + caps_days :: [(String,String)] + caps_days = map both_to_upper (wDays defaultTimeLocale) + + caps_months :: [(String,String)] + caps_months = map both_to_upper (months defaultTimeLocale) + + both_to_upper :: (String,String) -> (String,String) + both_to_upper (s1,s2) = (map toUpper s1, map toUpper s2) + + wacko_date_formats :: [String] + wacko_date_formats = + ["%A, %B %d" ++ suffix ++ " (" ++ date_format_padded ++ ")" | + suffix <- ["ST", "ND", "RD","TH"] ] + + to_time :: String -> Maybe UTCTime + to_time s = + listToMaybe $ catMaybes possible_parses + where + possible_parses = [ parseTime caps_time_locale fmt s | + fmt <- wacko_date_formats ] + + from_time :: UTCTime -> String + from_time t = + formatTime caps_time_locale fmt t + where + upper_suffix = map toUpper (date_suffix t) + fmt = "%A, %B %d" ++ upper_suffix ++ " (" ++ date_format_padded ++ ")" + -- | Create an 'XmlTree' containing only the given text. This is -- useful for testing (un)picklers, where we don't want to have to -- bother to create a dummy XML document.