\(dqET\(dq is not a valid time zone. It stands for \(dqEastern
Time\(dq, which can belong to either of two time zones, EST or EDT,
based on the time of the year (that is, whether or not daylight
-savings time is in effect). Since we can't tell from the timestamp, we
-always parse these as EST which is UTC-5. When daylight savings is in
-effect, they will be off by an hour.
+savings time is in effect) and one's location (for example, Arizona
+doesn't observe daylight savings time). It's not much more useful to
+be off by one hour than it is to be off by five hours, and since we
+can't determine the true offset from the timestamp, we always parse
+and store these as UTC.
Here's a list of the ones that may cause surprises:
where
import Data.Either.Utils ( maybeToEither )
-import Data.Time.Clock ( NominalDiffTime, UTCTime, addUTCTime )
+import Data.Time.Clock ( UTCTime )
import Data.Time.Format ( parseTime )
import System.Locale ( defaultTimeLocale )
import Text.Read ( readMaybe )
time_format :: String
time_format = "%I:%M %p"
--- | The format string for a time_stamp. This omits the leading and
--- trailing space.
+
+-- | The format string for a time_stamp. We keep the leading/trailing
+-- space so that parseTime and formatTime are inverses are one
+-- another, even though there is some confusion as to how these two
+-- functions should behave:
+--
+-- <https://ghc.haskell.org/trac/ghc/ticket/9150>
+--
time_stamp_format :: String
-time_stamp_format = "%B %-d, %Y, at " ++ time_format ++ " ET"
+time_stamp_format = " %B %-d, %Y, at " ++ time_format ++ " ET "
--- | Parse a time stamp from a 'String' (maybe).
---
--- TSN doesn't provide a proper time zone name, so we assume that
--- it's always Eastern Standard Time. EST is UTC-5, so we
--- add five hours to convert to UTC.
+
+-- | Parse a time stamp from a 'String' (maybe). TSN doesn't provide a
+-- proper time zone name, so we parse it as UTC, and maybe our
+-- eventual consumer can figure out a way to deduce the time zone.
--
parse_time_stamp :: String -> Maybe UTCTime
parse_time_stamp =
- fmap add_five . parseTime defaultTimeLocale time_stamp_format
- where
- five_hours :: NominalDiffTime
- five_hours = 5 * 60 * 60
+ parseTime defaultTimeLocale time_stamp_format
- add_five :: UTCTime -> UTCTime
- add_five = addUTCTime five_hours
-- | Extract the \"time_stamp\" element from a document. If we fail
import Data.List.Split ( chunksOf )
import Data.Maybe ( catMaybes, listToMaybe )
import Data.String.Utils ( replace )
-import Data.Time.Clock ( NominalDiffTime, UTCTime, addUTCTime )
+import Data.Time.Clock ( UTCTime )
import Data.Time.Format ( formatTime, parseTime )
import Data.Tree.NTree.TypeDefs ( NTree(..) )
import System.Locale ( TimeLocale( wDays, months ), defaultTimeLocale )
--
-- \<time_stamp\> January 6, 2014, at 10:11 PM ET \</time_stamp\>
--
--- TSN doesn't provide a proper time zone name, so we assume that
--- it's always Eastern Standard Time. EST is UTC-5, so we
--- add/subtract 5 hours to convert to/from UTC.
+-- TSN doesn't provide a proper time zone name, only \"ET\" for
+-- \"Eastern Time\". But \"Eastern Time\" changes throughout the
+-- year, depending on one's location, for daylight-savings
+-- time. It's really not any more useful to be off by one hour than
+-- it is to be off by 5 hours, so rather than guess at EDT/EST, we
+-- just store the timestamp as UTC.
--
-- Examples:
--
-- >>> let tn = text_node " January 6, 2014, at 10:11 PM ET "
--- >>> unpickleDoc xp_time_stamp tn
--- Just 2014-01-07 03:11:00 UTC
+-- >>> let (Just tstamp) = unpickleDoc xp_time_stamp tn
+-- >>> tstamp
+-- 2014-01-06 22:11:00 UTC
+-- >>> pickleDoc xp_time_stamp tstamp
+-- NTree (XTag "/" []) [NTree (XText " January 6, 2014, at 10:11 PM ET ") []]
--
xp_time_stamp :: PU UTCTime
xp_time_stamp =
(parse_time_stamp, from_time_stamp) `xpWrapMaybe` xpText
where
- five_hours :: NominalDiffTime
- five_hours = 5 * 60 * 60
-
- subtract_five :: UTCTime -> UTCTime
- subtract_five = addUTCTime (-1 * five_hours)
-
from_time_stamp :: UTCTime -> String
from_time_stamp =
- formatTime defaultTimeLocale time_stamp_format . subtract_five
+ formatTime defaultTimeLocale time_stamp_format
+
-- | (Un)pickle an ambiguous 12-hour AM/PM time, which is ambiguous
let a2 = xml_file_id t
let ex2 = 21201550
let a3 = show $ time_stamp t
- let ex3 = "2014-05-31 20:13:00 UTC"
+ let ex3 = "2014-05-31 15:13:00 UTC"
let a4 = take 9 (xml t)
let ex4 = "<message>"
let actual = (a1,a2,a3,a4)
let a2 = xml_file_id t
let ex2 = 2011
let a3 = show $ time_stamp t
- let ex3 = "2009-09-28 00:50:00 UTC"
+ let ex3 = "2009-09-27 19:50:00 UTC"
let a4 = take 9 (xml t)
let ex4 = "<message>"
let actual = (a1,a2,a3,a4)