+{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
+-- | Parse TSN XML for the DTD \"earlylineXML.dtd\". For that DTD,
+-- each \<message\> element contains a bunch of \<date\>s, and those
+-- \<date\>s contain a single \<game\>. In the database, we merge
+-- the date info into the games, and key the games to the messages.
+--
+-- Real life is not so simple, however. There is another module,
+-- "TSN.XML.MLBEarlyLine" that is something of a subclass of this
+-- one. It contains early lines, but only for MLB games. The data
+-- types and XML schema are /almost/ the same, but TSN like to make
+-- things difficult.
+--
+-- A full list of the differences is given in that module. In this
+-- one, we mention where data types have been twerked a little to
+-- support the second document type.
+--
module TSN.XML.EarlyLine (
+ EarlyLine, -- Used in TSN.XML.MLBEarlyLine
+ EarlyLineGame, -- Used in TSN.XML.MLBEarlyLine
dtd,
pickle_message,
+ -- * Tests
+ early_line_tests,
-- * WARNING: these are private but exported to silence warnings
EarlyLineConstructor(..),
EarlyLineGameConstructor(..) )
where
-- System imports.
+import Control.Monad ( join )
import Data.Time ( UTCTime(..) )
import Data.Tuple.Curry ( uncurryN )
+import Database.Groundhog (
+ countAll,
+ deleteAll,
+ insert_,
+ migrate,
+ runMigration,
+ silentMigrationLogger )
import Database.Groundhog.Core ( DefaultKey )
+import Database.Groundhog.Generic ( runDbConn )
+import Database.Groundhog.Sqlite ( withSqliteConn )
import Database.Groundhog.TH (
groundhog,
mkPersist )
+import qualified GHC.Generics as GHC ( Generic )
+import Test.Tasty ( TestTree, testGroup )
+import Test.Tasty.HUnit ( (@?=), testCase )
import Text.XML.HXT.Core (
PU,
xp4Tuple,
+ xp6Tuple,
xp7Tuple,
xpAttr,
xpElem,
xpInt,
xpList,
xpOption,
+ xpPair,
xpText,
- xpTriple,
xpWrap )
-- Local imports.
+import Generics ( Generic(..), to_tuple )
import TSN.Codegen ( tsn_codegen_config )
-import TSN.DbImport ( DbImport(..) )
+import TSN.DbImport ( DbImport(..), ImportResult(..), run_dbmigrate )
import TSN.Picklers (
xp_ambiguous_time,
xp_early_line_date,
import TSN.XmlImport ( XmlImport(..) )
import Xml (
FromXml(..),
- ToDb(..) )
+ ToDb(..),
+ pickle_unpickle,
+ unpickleable,
+ unsafe_unpickle )
-- | The DTD to which this module corresponds. Used to invoke dbimport.
dtd = "earlylineXML.dtd"
--
--- DB/XML data types
+-- * DB/XML data types
--
-- * EarlyLine/Message
xml_category :: String,
xml_sport :: String,
xml_title :: String,
- xml_dates :: [EarlyLineDateXml],
+ xml_dates :: [EarlyLineDate],
xml_time_stamp :: UTCTime }
- deriving (Eq, Show)
+ deriving (Eq, GHC.Generic, Show)
+
+-- | For 'Generics.to_tuple'.
+--
+instance Generic Message
instance ToDb Message where
--- * EarlyLineDateXml
+-- * EarlyLineDate / EarlyLineGameWithNote
+
+-- | This is a very sad data type. It exists so that we can
+-- successfully unpickle/pickle the MLB_earlylineXML.dtd documents
+-- and get back what we started with. In that document type, the
+-- dates all have multiple \<game\>s associated with them (as
+-- children). But the dates also have multiple \<note\>s as
+-- children, and we're supposed to figure out which notes go with
+-- which games based on the order that they appear in the XML
+-- file. Yeah, right.
+--
+-- In any case, instead of expecting the games and notes in some
+-- nice order, we use this data type to expect \"a game and maybe a
+-- note\" multiple times. This will pair the notes with only one
+-- game, rather than all of the games that TSN think it should go
+-- with. But it allows us to pickle and unpickle correctly at least.
+--
+data EarlyLineGameWithNote =
+ EarlyLineGameWithNote
+ (Maybe String) -- date_note, unused
+ EarlyLineGameXml -- date_game
+ deriving (Eq, GHC.Generic, Show)
+
+-- | Accessor for the game within a 'EarlyLineGameWithNote'. We define
+-- this ourselves to avoid an unused field warning for date_note.
+--
+date_game :: EarlyLineGameWithNote -> EarlyLineGameXml
+date_game (EarlyLineGameWithNote _ g) = g
+
+-- | For 'Generics.to_tuple'.
+--
+instance Generic EarlyLineGameWithNote
+
+
-- | XML representation of a \<date\>. It has a \"value\" attribute
-- containing the actual date string. As children it contains a
-- (non-optional) note, and a game. The note and date value are
-- properties of the game as far as I can tell.
--
-data EarlyLineDateXml =
- EarlyLineDateXml {
- xml_date_value :: UTCTime,
- xml_note :: String,
- xml_game :: EarlyLineGameXml }
- deriving (Eq, Show)
+data EarlyLineDate =
+ EarlyLineDate {
+ date_value :: UTCTime,
+ date_games_with_notes :: [EarlyLineGameWithNote] }
+ deriving (Eq, GHC.Generic, Show)
+
+-- | For 'Generics.to_tuple'.
+--
+instance Generic EarlyLineDate
-- * EarlyLineGame / EarlyLineGameXml
+-- | Database representation of a \<game\> in earlylineXML.dtd and
+-- MLB_earlylineXML.dtd. We've had to make a sacrifice here to
+-- support both document types. Since it's not possible to pair the
+-- \<note\>s with \<game\>s reliably in MLB_earlylineXML.dtd, we
+-- have omitted the notes entirely. This is sad, but totally not our
+-- fault.
+--
+-- In earlylineXML.dtd, each \<date\> and thus each \<note\> is
+-- paired with exactly one \<game\>, so if we only cared about that
+-- document type, we could have retained the notes.
+--
+-- In earlylinexml.DTD, the over/under is required, but in
+-- MLB_earlylinexml.DTD it is not. So another compromise is to have
+-- it optional here.
+--
+-- The 'db_game_time' should be the combined date/time using the
+-- date value from the \<game\> element's containing
+-- \<date\>. That's why EarlyLineGame isn't an instance of
+-- 'FromXmlFk': the foreign key isn't enough to construct one, we
+-- also need the date.
+--
data EarlyLineGame =
EarlyLineGame {
db_early_lines_id :: DefaultKey EarlyLine,
db_game_time :: UTCTime, -- ^ Combined date/time
- db_note :: String, -- ^ Taken from the parent \<date\>
db_away_team :: EarlyLineGameTeam,
db_home_team :: EarlyLineGameTeam,
- db_over_under :: String }
+ db_over_under :: Maybe String }
+
+-- | XML representation of a 'EarlyLineGame'. Comparatively, it lacks
+-- only the foreign key to the parent message.
+--
data EarlyLineGameXml =
EarlyLineGameXml {
- xml_game_time :: UTCTime, -- ^ Only an ambiguous time string, e.g. \"8:30\"
- xml_away_team :: EarlyLineGameTeam,
- xml_home_team :: EarlyLineGameTeam,
- xml_over_under :: String }
- deriving (Eq, Show)
+ xml_game_time :: Maybe UTCTime, -- ^ Only an ambiguous time string,
+ -- e.g. \"8:30\". Can be empty.
+ xml_away_team :: EarlyLineGameTeamXml,
+ xml_home_team :: EarlyLineGameTeamXml,
+ xml_over_under :: Maybe String }
+ deriving (Eq, GHC.Generic, Show)
+
+
+-- | For 'Generics.to_tuple'.
+--
+instance Generic EarlyLineGameXml
--- | XML representation of an earlyline team. It doubles as an
+-- * EarlyLineGameTeam / EarlyLineGameTeamXml
+
+-- | Database representation of an EarlyLine team, used in both
+-- earlylineXML.dtd and MLB_earlylineXML.dtd. It doubles as an
-- embedded type within the DB representation 'EarlyLineGame'.
--
+-- The team name is /not/ optional. However, since we're overloading
+-- the XML representation, we're constructing 'db_team_name' name
+-- from two Maybes, 'xml_team_name_attr' and
+-- 'xml_team_name_text'. To ensure type safety (and avoid a runtime
+-- crash), we allow the database field to be optional as well.
+--
data EarlyLineGameTeam =
EarlyLineGameTeam {
- db_rotation_number :: Int,
+ db_rotation_number :: Maybe Int, -- ^ Usually there but sometimes empty.
db_line :: Maybe String, -- ^ Can be blank, a Double, or \"off\".
- db_team_name :: String }
+ db_team_name :: Maybe String, -- ^ NOT optional, see the data type docs.
+ db_pitcher :: Maybe String -- ^ Optional in MLB_earlylineXML.dtd,
+ -- always absent in earlylineXML.dtd.
+ }
+
+
+-- | This here is an abomination. What we've got is an XML
+-- representation, not for either earlylineXML.dtd or
+-- MLB_earlylineXML.dtd, but one that will work for /both/. Even
+-- though they represent the teams totally differently! Argh!
+--
+-- The earlylineXML.dtd teams look like,
+--
+-- \<teamA rotation=\"709\" line=\"\">Miami\</teamA\>
+--
+-- While the MLB_earlylineXML.dtd teams look like,
+--
+-- <teamA rotation="901" name="LOS">
+-- <pitcher>D.Haren</pitcher>
+-- <line>-130</line>
+-- </teamA>
+--
+-- So that's cool. This data type has placeholders that should allow
+-- the name/line to appear either as an attribute or as a text
+-- node. We'll sort it all out in the conversion to
+-- EarlyLineGameTeam.
+--
+data EarlyLineGameTeamXml =
+ EarlyLineGameTeamXml {
+ xml_rotation_number :: Maybe Int,
+ xml_line_attr :: Maybe String,
+ xml_team_name_attr :: Maybe String,
+ xml_team_name_text :: Maybe String,
+ xml_pitcher :: Maybe String,
+ xml_line_elem :: Maybe String }
deriving (Eq, Show)
+
+instance ToDb EarlyLineGameTeamXml where
+ -- | The database analogue of a 'EarlyLineGameTeamXml' is an
+ -- 'EarlyLineGameTeam', although the DB type is merely embedded
+ -- in another type.
+ --
+ type Db EarlyLineGameTeamXml = EarlyLineGameTeam
+
+
+-- | The 'FromXml' instance for 'EarlyLineGameTeamXml' lets us convert
+-- it to a 'EarlyLineGameTeam' easily.
+--
+instance FromXml EarlyLineGameTeamXml where
+ -- | To convert a 'EarlyLineGameTeamXml' to an 'EarlyLineGameTeam',
+ -- we figure how its fields were represented and choose the ones
+ -- that are populated. For example if the \"line\" attribute was
+ -- there, we'll use it, but if now, we'll use the \<line\>
+ -- element.
+ --
+ from_xml EarlyLineGameTeamXml{..} =
+ EarlyLineGameTeam {
+ db_rotation_number = xml_rotation_number,
+ db_line = merge xml_line_attr xml_line_elem,
+ db_team_name = merge xml_team_name_attr xml_team_name_text,
+ db_pitcher = xml_pitcher }
+ where
+ merge :: Maybe String -> Maybe String -> Maybe String
+ merge Nothing y = y
+ merge x Nothing = x
+ merge _ _ = Nothing
+
+
+
+
+-- | Convert an 'EarlyLineDate' into a list of 'EarlyLineGame's. Each
+-- date has one or more games, and the fields that belong to the date
+-- should really be in the game anyway. So the database
+-- representation of a game has the combined fields of the XML
+-- date/game.
+--
+-- This function gets the games out of a date, and then sticks the
+-- date value inside the games. It also adds the foreign key
+-- reference to the games' parent message, and returns the result.
+--
+-- This would convert a single date to a single game if we only
+-- needed to support earlylineXML.dtd and not MLB_earlylineXML.dtd.
+--
+date_to_games :: (DefaultKey EarlyLine) -> EarlyLineDate -> [EarlyLineGame]
+date_to_games fk date =
+ map convert_game games_only
+ where
+ -- | Get the list of games out of a date (i.e. drop the notes).
+ --
+ games_only :: [EarlyLineGameXml]
+ games_only = (map date_game (date_games_with_notes date))
+
+ -- | Stick the date value into the given game. If our
+ -- 'EarlyLineGameXml' has an 'xml_game_time', then we combine it
+ -- with the day portion of the supplied @date@. If not, then we
+ -- just use @date as-is.
+ --
+ combine_date_time :: Maybe UTCTime -> UTCTime
+ combine_date_time (Just t) =
+ UTCTime (utctDay $ date_value date) (utctDayTime t)
+ combine_date_time Nothing = date_value date
+
+ -- | Convert an XML game to a database one.
+ --
+ convert_game :: EarlyLineGameXml -> EarlyLineGame
+ convert_game EarlyLineGameXml{..} =
+ EarlyLineGame {
+ db_early_lines_id = fk,
+ db_game_time = combine_date_time xml_game_time,
+ db_away_team = from_xml xml_away_team,
+ db_home_team = from_xml xml_home_team,
+ db_over_under = xml_over_under }
+
+
--
-- * Database stuff
--
instance DbImport Message where
- dbmigrate = undefined
- dbimport = undefined
+ dbmigrate _ =
+ run_dbmigrate $ do
+ migrate (undefined :: EarlyLine)
+ migrate (undefined :: EarlyLineGame)
+
+ dbimport m = do
+ -- Insert the message and obtain its ID.
+ msg_id <- insert_xml m
+
+ -- Create a function that will turn a list of dates into a list of
+ -- games by converting each date to its own list of games, and
+ -- then concatenating all of the game lists together.
+ let convert_dates_to_games = concatMap (date_to_games msg_id)
+
+ -- Now use it to make dem games.
+ let games = convert_dates_to_games (xml_dates m)
+
+ -- And insert all of them
+ mapM_ insert_ games
+
+ return ImportSucceeded
mkPersist tsn_codegen_config [groundhog|
- {name: rotation_number, dbName: away_team_rotation_number}
- {name: line, dbName: away_team_line}
- {name: team_name, dbName: away_team_name}
+ - {name: pitcher, dbName: away_team_pitcher}
- name: db_home_team
embeddedType:
- {name: rotation_number, dbName: home_team_rotation_number}
- {name: line, dbName: home_team_line}
- {name: team_name, dbName: home_team_name}
+ - {name: pitcher, dbName: home_team_pitcher}
- embedded: EarlyLineGameTeam
fields:
dbName: line
- name: db_team_name
dbName: team_name
-
+ - name: db_pitcher
+ dbName: pitcher
|]
--
-- * Pickling
--
+
+
+-- | Pickler for the top-level 'Message'.
+--
pickle_message :: PU Message
pickle_message =
xpElem "message" $
(xpElem "time_stamp" xp_time_stamp)
where
from_tuple = uncurryN Message
- to_tuple m = (xml_xml_file_id m,
- xml_heading m,
- xml_category m,
- xml_sport m,
- xml_title m,
- xml_dates m,
- xml_time_stamp m)
-
-pickle_date :: PU EarlyLineDateXml
+
+
+
+-- | Pickler for a '\<note\> followed by a \<game\>. We turn them into
+-- a 'EarlyLineGameWithNote'.
+--
+pickle_game_with_note :: PU EarlyLineGameWithNote
+pickle_game_with_note =
+ xpWrap (from_tuple, to_tuple) $
+ xpPair (xpOption $ xpElem "note" xpText)
+ pickle_game
+ where
+ from_tuple = uncurry EarlyLineGameWithNote
+
+
+-- | Pickler for the \<date\> elements within each \<message\>.
+--
+pickle_date :: PU EarlyLineDate
pickle_date =
xpElem "date" $
xpWrap (from_tuple, to_tuple) $
- xpTriple (xpAttr "value" xp_early_line_date)
- (xpElem "note" xpText)
- pickle_game
+ xpPair (xpAttr "value" xp_early_line_date)
+ (xpList pickle_game_with_note)
where
- from_tuple = uncurryN EarlyLineDateXml
- to_tuple m = (xml_date_value m, xml_note m, xml_game m)
+ from_tuple = uncurry EarlyLineDate
+
+-- | Pickler for the \<game\> elements within each \<date\>.
+--
pickle_game :: PU EarlyLineGameXml
pickle_game =
xpElem "game" $
xpWrap (from_tuple, to_tuple) $
- xp4Tuple (xpElem "time" xp_ambiguous_time)
+ xp4Tuple (xpElem "time" (xpOption xp_ambiguous_time))
pickle_away_team
pickle_home_team
- (xpElem "over_under" xpText)
+ (xpElem "over_under" (xpOption xpText))
where
from_tuple = uncurryN EarlyLineGameXml
- to_tuple m = (xml_game_time m,
- xml_away_team m,
- xml_home_team m,
- xml_over_under m)
-pickle_away_team :: PU EarlyLineGameTeam
+-- | Pickle an away team (\<teamA\>) element within a \<game\>. Most
+-- of the work (common with the home team pickler) is done by
+-- 'pickle_team'.
+--
+pickle_away_team :: PU EarlyLineGameTeamXml
pickle_away_team = xpElem "teamA" pickle_team
-pickle_home_team :: PU EarlyLineGameTeam
+
+-- | Pickle a home team (\<teamH\>) element within a \<game\>. Most
+-- of the work (common with theaway team pickler) is done by
+-- 'pickle_team'.
+--
+pickle_home_team :: PU EarlyLineGameTeamXml
pickle_home_team = xpElem "teamH" pickle_team
-pickle_team :: PU EarlyLineGameTeam
+
+-- | Team pickling common to both 'pickle_away_team' and
+-- 'pickle_home_team'. Handles everything inside the \<teamA\> and
+-- \<teamH\> elements. We try to parse the line/name as both an
+-- attribute and an element in order to accomodate
+-- MLB_earlylineXML.dtd.
+--
+-- The \"line\" and \"pitcher\" fields wind up being double-Maybes,
+-- since they can be empty even if they exist.
+--
+pickle_team :: PU EarlyLineGameTeamXml
pickle_team =
- xpWrap (from_tuple, to_tuple) $
- xpTriple (xpAttr "rotation" xpInt)
- (xpAttr "line" (xpOption xpText))
- xpText
+ xpWrap (from_tuple, to_tuple') $
+ xp6Tuple (xpAttr "rotation" (xpOption xpInt))
+ (xpOption $ xpAttr "line" (xpOption xpText))
+ (xpOption $ xpAttr "name" xpText)
+ (xpOption xpText)
+ (xpOption $ xpElem "pitcher" (xpOption xpText))
+ (xpOption $ xpElem "line" (xpOption xpText))
+ where
+ from_tuple (u,v,w,x,y,z) =
+ EarlyLineGameTeamXml u (join v) w x (join y) (join z)
+
+ to_tuple' (EarlyLineGameTeamXml u v w x y z) =
+ (u, double_just v, w, x, double_just y, double_just z)
+ where
+ double_just val = case val of
+ Nothing -> Nothing
+ just_something -> Just just_something
+
+
+
+
+--
+-- * Tasty Tests
+--
+
+-- | A list of all tests for this module.
+--
+early_line_tests :: TestTree
+early_line_tests =
+ testGroup
+ "EarlyLine tests"
+ [ test_on_delete_cascade,
+ test_pickle_of_unpickle_is_identity,
+ test_unpickle_succeeds ]
+
+-- | If we unpickle something and then pickle it, we should wind up
+-- with the same thing we started with. WARNING: success of this
+-- test does not mean that unpickling succeeded.
+--
+test_pickle_of_unpickle_is_identity :: TestTree
+test_pickle_of_unpickle_is_identity = testGroup "pickle-unpickle tests" $
+ [ check "pickle composed with unpickle is the identity"
+ "test/xml/earlylineXML.xml",
+
+ check "pickle composed with unpickle is the identity (empty game time)"
+ "test/xml/earlylineXML-empty-game-time.xml" ]
+ where
+ check desc path = testCase desc $ do
+ (expected, actual) <- pickle_unpickle pickle_message path
+ actual @?= expected
+
+
+
+-- | Make sure we can actually unpickle these things.
+--
+test_unpickle_succeeds :: TestTree
+test_unpickle_succeeds = testGroup "unpickle tests" $
+ [ check "unpickling succeeds"
+ "test/xml/earlylineXML.xml",
+
+ check "unpickling succeeds (empty game time)"
+ "test/xml/earlylineXML-empty-game-time.xml" ]
+ where
+ check desc path = testCase desc $ do
+ actual <- unpickleable path pickle_message
+ let expected = True
+ actual @?= expected
+
+
+
+-- | Make sure everything gets deleted when we delete the top-level
+-- record.
+--
+test_on_delete_cascade :: TestTree
+test_on_delete_cascade = testGroup "cascading delete tests" $
+ [ check "deleting early_lines deletes its children"
+ "test/xml/earlylineXML.xml",
+
+ check "deleting early_lines deletes its children (empty game time)"
+ "test/xml/earlylineXML-empty-game-time.xml" ]
where
- from_tuple = uncurryN EarlyLineGameTeam
- to_tuple m = (db_rotation_number m, db_line m, db_team_name m)
+ check desc path = testCase desc $ do
+ results <- unsafe_unpickle path pickle_message
+ let a = undefined :: EarlyLine
+ let b = undefined :: EarlyLineGame
+
+ actual <- withSqliteConn ":memory:" $ runDbConn $ do
+ runMigration silentMigrationLogger $ do
+ migrate a
+ migrate b
+ _ <- dbimport results
+ deleteAll a
+ count_a <- countAll a
+ count_b <- countAll b
+ return $ sum [count_a, count_b]
+ let expected = 0
+ actual @?= expected