X-Git-Url: http://gitweb.michael.orlitzky.com/?p=dead%2Fhtsn-import.git;a=blobdiff_plain;f=src%2FTSN%2FXML%2FEarlyLine.hs;h=7f5a89048d93afb202237aa03f746e0d41cfaa82;hp=26625e748b2dec6d769ea0be068c5b9b3962b644;hb=f0425854304197ab5ad47293b27b2e0b188cb844;hpb=253d9c8dc02a342aaba98459b6b6cc811ddc7724 diff --git a/src/TSN/XML/EarlyLine.hs b/src/TSN/XML/EarlyLine.hs index 26625e7..7f5a890 100644 --- a/src/TSN/XML/EarlyLine.hs +++ b/src/TSN/XML/EarlyLine.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE QuasiQuotes #-} @@ -5,45 +6,81 @@ {-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TypeFamilies #-} +-- | Parse TSN XML for the DTD \"earlylineXML.dtd\". For that DTD, +-- each \ element contains a bunch of \s, and those +-- \s contain a single \. In the database, we merge +-- the date info into the games, and key the games to the messages. +-- +-- Real life is not so simple, however. There is another module, +-- "TSN.XML.MLBEarlyLine" that is something of a subclass of this +-- one. It contains early lines, but only for MLB games. The data +-- types and XML schema are /almost/ the same, but TSN like to make +-- things difficult. +-- +-- A full list of the differences is given in that module. In this +-- one, we mention where data types have been twerked a little to +-- support the second document type. +-- module TSN.XML.EarlyLine ( + EarlyLine, -- Used in TSN.XML.MLBEarlyLine + EarlyLineGame, -- Used in TSN.XML.MLBEarlyLine dtd, pickle_message, + -- * Tests + early_line_tests, -- * WARNING: these are private but exported to silence warnings EarlyLineConstructor(..), EarlyLineGameConstructor(..) ) where -- System imports. +import Control.Monad ( join ) import Data.Time ( UTCTime(..) ) import Data.Tuple.Curry ( uncurryN ) +import qualified Data.Vector.HFixed as H ( HVector, convert ) +import Database.Groundhog ( + countAll, + deleteAll, + insert_, + migrate ) import Database.Groundhog.Core ( DefaultKey ) +import Database.Groundhog.Generic ( runDbConn, runMigrationSilent ) +import Database.Groundhog.Sqlite ( withSqliteConn ) import Database.Groundhog.TH ( groundhog, mkPersist ) +import qualified GHC.Generics as GHC ( Generic ) +import Test.Tasty ( TestTree, testGroup ) +import Test.Tasty.HUnit ( (@?=), testCase ) import Text.XML.HXT.Core ( PU, xp4Tuple, + xp6Tuple, xp7Tuple, xpAttr, xpElem, xpInt, xpList, xpOption, + xpPair, xpText, - xpTriple, xpWrap ) -- Local imports. import TSN.Codegen ( tsn_codegen_config ) -import TSN.DbImport ( DbImport(..) ) +import TSN.DbImport ( DbImport(..), ImportResult(..), run_dbmigrate ) import TSN.Picklers ( xp_ambiguous_time, + xp_attr_option, xp_early_line_date, xp_time_stamp ) import TSN.XmlImport ( XmlImport(..) ) import Xml ( FromXml(..), - ToDb(..) ) + ToDb(..), + pickle_unpickle, + unpickleable, + unsafe_unpickle ) -- | The DTD to which this module corresponds. Used to invoke dbimport. @@ -52,7 +89,7 @@ dtd :: String dtd = "earlylineXML.dtd" -- --- DB/XML data types +-- * DB/XML data types -- -- * EarlyLine/Message @@ -83,9 +120,13 @@ data Message = xml_category :: String, xml_sport :: String, xml_title :: String, - xml_dates :: [EarlyLineDateXml], + xml_dates :: [EarlyLineDate], xml_time_stamp :: UTCTime } - deriving (Eq, Show) + deriving (Eq, GHC.Generic, Show) + +-- | For 'H.convert'. +-- +instance H.HVector Message instance ToDb Message where @@ -118,60 +159,266 @@ instance XmlImport Message --- * EarlyLineDateXml +-- * EarlyLineDate / EarlyLineGameWithNote + +-- | This is a very sad data type. It exists so that we can +-- successfully unpickle/pickle the MLB_earlylineXML.dtd documents +-- and get back what we started with. In that document type, the +-- dates all have multiple \s associated with them (as +-- children). But the dates also have multiple \s as +-- children, and we're supposed to figure out which notes go with +-- which games based on the order that they appear in the XML +-- file. Yeah, right. +-- +-- In any case, instead of expecting the games and notes in some +-- nice order, we use this data type to expect \"a game and maybe a +-- note\" multiple times. This will pair the notes with only one +-- game, rather than all of the games that TSN think it should go +-- with. But it allows us to pickle and unpickle correctly at least. +-- +data EarlyLineGameWithNote = + EarlyLineGameWithNote + (Maybe String) -- date_note, unused + EarlyLineGameXml -- date_game + deriving (Eq, GHC.Generic, Show) + +-- | Accessor for the game within a 'EarlyLineGameWithNote'. We define +-- this ourselves to avoid an unused field warning for date_note. +-- +date_game :: EarlyLineGameWithNote -> EarlyLineGameXml +date_game (EarlyLineGameWithNote _ g) = g + +-- | For 'H.convert'. +-- +instance H.HVector EarlyLineGameWithNote + + -- | XML representation of a \. It has a \"value\" attribute -- containing the actual date string. As children it contains a -- (non-optional) note, and a game. The note and date value are -- properties of the game as far as I can tell. -- -data EarlyLineDateXml = - EarlyLineDateXml { - xml_date_value :: UTCTime, - xml_note :: String, - xml_game :: EarlyLineGameXml } - deriving (Eq, Show) +data EarlyLineDate = + EarlyLineDate { + date_value :: UTCTime, + date_games_with_notes :: [EarlyLineGameWithNote] } + deriving (Eq, GHC.Generic, Show) + +-- | For 'H.convert'. +-- +instance H.HVector EarlyLineDate -- * EarlyLineGame / EarlyLineGameXml +-- | Database representation of a \ in earlylineXML.dtd and +-- MLB_earlylineXML.dtd. We've had to make a sacrifice here to +-- support both document types. Since it's not possible to pair the +-- \s with \s reliably in MLB_earlylineXML.dtd, we +-- have omitted the notes entirely. This is sad, but totally not our +-- fault. +-- +-- In earlylineXML.dtd, each \ and thus each \ is +-- paired with exactly one \, so if we only cared about that +-- document type, we could have retained the notes. +-- +-- In earlylinexml.DTD, the over/under is required, but in +-- MLB_earlylinexml.DTD it is not. So another compromise is to have +-- it optional here. +-- +-- The 'db_game_time' should be the combined date/time using the +-- date value from the \ element's containing +-- \. That's why EarlyLineGame isn't an instance of +-- 'FromXmlFk': the foreign key isn't enough to construct one, we +-- also need the date. +-- data EarlyLineGame = EarlyLineGame { db_early_lines_id :: DefaultKey EarlyLine, db_game_time :: UTCTime, -- ^ Combined date/time - db_note :: String, -- ^ Taken from the parent \ db_away_team :: EarlyLineGameTeam, db_home_team :: EarlyLineGameTeam, - db_over_under :: String } + db_over_under :: Maybe String } + +-- | XML representation of a 'EarlyLineGame'. Comparatively, it lacks +-- only the foreign key to the parent message. +-- data EarlyLineGameXml = EarlyLineGameXml { - xml_game_time :: UTCTime, -- ^ Only an ambiguous time string, e.g. \"8:30\" - xml_away_team :: EarlyLineGameTeam, - xml_home_team :: EarlyLineGameTeam, - xml_over_under :: String } - deriving (Eq, Show) + xml_game_time :: Maybe UTCTime, -- ^ Only an ambiguous time string, + -- e.g. \"8:30\". Can be empty. + xml_away_team :: EarlyLineGameTeamXml, + xml_home_team :: EarlyLineGameTeamXml, + xml_over_under :: Maybe String } + deriving (Eq, GHC.Generic, Show) --- | XML representation of an earlyline team. It doubles as an +-- | For 'H.convert'. +-- +instance H.HVector EarlyLineGameXml + + +-- * EarlyLineGameTeam / EarlyLineGameTeamXml + +-- | Database representation of an EarlyLine team, used in both +-- earlylineXML.dtd and MLB_earlylineXML.dtd. It doubles as an -- embedded type within the DB representation 'EarlyLineGame'. -- +-- The team name is /not/ optional. However, since we're overloading +-- the XML representation, we're constructing 'db_team_name' name +-- from two Maybes, 'xml_team_name_attr' and +-- 'xml_team_name_text'. To ensure type safety (and avoid a runtime +-- crash), we allow the database field to be optional as well. +-- data EarlyLineGameTeam = EarlyLineGameTeam { - db_rotation_number :: Int, + db_rotation_number :: Maybe Int, -- ^ Usually there but sometimes empty. db_line :: Maybe String, -- ^ Can be blank, a Double, or \"off\". - db_team_name :: String } + db_team_name :: Maybe String, -- ^ NOT optional, see the data type docs. + db_pitcher :: Maybe String -- ^ Optional in MLB_earlylineXML.dtd, + -- always absent in earlylineXML.dtd. + } + + +-- | This here is an abomination. What we've got is an XML +-- representation, not for either earlylineXML.dtd or +-- MLB_earlylineXML.dtd, but one that will work for /both/. Even +-- though they represent the teams totally differently! Argh! +-- +-- The earlylineXML.dtd teams look like, +-- +-- \Miami\ +-- +-- While the MLB_earlylineXML.dtd teams look like, +-- +-- +-- D.Haren +-- -130 +-- +-- +-- So that's cool. This data type has placeholders that should allow +-- the name/line to appear either as an attribute or as a text +-- node. We'll sort it all out in the conversion to +-- EarlyLineGameTeam. +-- +data EarlyLineGameTeamXml = + EarlyLineGameTeamXml { + xml_rotation_number :: Maybe Int, + xml_line_attr :: Maybe String, + xml_team_name_attr :: Maybe String, + xml_team_name_text :: Maybe String, + xml_pitcher :: Maybe String, + xml_line_elem :: Maybe String } deriving (Eq, Show) + +instance ToDb EarlyLineGameTeamXml where + -- | The database analogue of a 'EarlyLineGameTeamXml' is an + -- 'EarlyLineGameTeam', although the DB type is merely embedded + -- in another type. + -- + type Db EarlyLineGameTeamXml = EarlyLineGameTeam + + +-- | The 'FromXml' instance for 'EarlyLineGameTeamXml' lets us convert +-- it to a 'EarlyLineGameTeam' easily. +-- +instance FromXml EarlyLineGameTeamXml where + -- | To convert a 'EarlyLineGameTeamXml' to an 'EarlyLineGameTeam', + -- we figure how its fields were represented and choose the ones + -- that are populated. For example if the \"line\" attribute was + -- there, we'll use it, but if now, we'll use the \ + -- element. + -- + from_xml EarlyLineGameTeamXml{..} = + EarlyLineGameTeam { + db_rotation_number = xml_rotation_number, + db_line = merge xml_line_attr xml_line_elem, + db_team_name = merge xml_team_name_attr xml_team_name_text, + db_pitcher = xml_pitcher } + where + merge :: Maybe String -> Maybe String -> Maybe String + merge Nothing y = y + merge x Nothing = x + merge _ _ = Nothing + + + + +-- | Convert an 'EarlyLineDate' into a list of 'EarlyLineGame's. Each +-- date has one or more games, and the fields that belong to the date +-- should really be in the game anyway. So the database +-- representation of a game has the combined fields of the XML +-- date/game. +-- +-- This function gets the games out of a date, and then sticks the +-- date value inside the games. It also adds the foreign key +-- reference to the games' parent message, and returns the result. +-- +-- This would convert a single date to a single game if we only +-- needed to support earlylineXML.dtd and not MLB_earlylineXML.dtd. +-- +date_to_games :: (DefaultKey EarlyLine) -> EarlyLineDate -> [EarlyLineGame] +date_to_games fk date = + map convert_game games_only + where + -- | Get the list of games out of a date (i.e. drop the notes). + -- + games_only :: [EarlyLineGameXml] + games_only = (map date_game (date_games_with_notes date)) + + -- | Stick the date value into the given game. If our + -- 'EarlyLineGameXml' has an 'xml_game_time', then we combine it + -- with the day portion of the supplied @date@. If not, then we + -- just use @date as-is. + -- + combine_date_time :: Maybe UTCTime -> UTCTime + combine_date_time (Just t) = + UTCTime (utctDay $ date_value date) (utctDayTime t) + combine_date_time Nothing = date_value date + + -- | Convert an XML game to a database one. + -- + convert_game :: EarlyLineGameXml -> EarlyLineGame + convert_game EarlyLineGameXml{..} = + EarlyLineGame { + db_early_lines_id = fk, + db_game_time = combine_date_time xml_game_time, + db_away_team = from_xml xml_away_team, + db_home_team = from_xml xml_home_team, + db_over_under = xml_over_under } + + -- -- * Database stuff -- instance DbImport Message where - dbmigrate = undefined - dbimport = undefined + dbmigrate _ = + run_dbmigrate $ do + migrate (undefined :: EarlyLine) + migrate (undefined :: EarlyLineGame) + + dbimport m = do + -- Insert the message and obtain its ID. + msg_id <- insert_xml m + + -- Create a function that will turn a list of dates into a list of + -- games by converting each date to its own list of games, and + -- then concatenating all of the game lists together. + let convert_dates_to_games = concatMap (date_to_games msg_id) + + -- Now use it to make dem games. + let games = convert_dates_to_games (xml_dates m) + + -- And insert all of them + mapM_ insert_ games + + return ImportSucceeded mkPersist tsn_codegen_config [groundhog| @@ -200,11 +447,13 @@ mkPersist tsn_codegen_config [groundhog| - {name: rotation_number, dbName: away_team_rotation_number} - {name: line, dbName: away_team_line} - {name: team_name, dbName: away_team_name} + - {name: pitcher, dbName: away_team_pitcher} - name: db_home_team embeddedType: - {name: rotation_number, dbName: home_team_rotation_number} - {name: line, dbName: home_team_line} - {name: team_name, dbName: home_team_name} + - {name: pitcher, dbName: home_team_pitcher} - embedded: EarlyLineGameTeam fields: @@ -214,7 +463,8 @@ mkPersist tsn_codegen_config [groundhog| dbName: line - name: db_team_name dbName: team_name - + - name: db_pitcher + dbName: pitcher |] @@ -222,10 +472,14 @@ mkPersist tsn_codegen_config [groundhog| -- -- * Pickling -- + + +-- | Pickler for the top-level 'Message'. +-- pickle_message :: PU Message pickle_message = xpElem "message" $ - xpWrap (from_tuple, to_tuple) $ + xpWrap (from_tuple, H.convert) $ xp7Tuple (xpElem "XML_File_ID" xpInt) (xpElem "heading" xpText) (xpElem "category" xpText) @@ -235,55 +489,170 @@ pickle_message = (xpElem "time_stamp" xp_time_stamp) where from_tuple = uncurryN Message - to_tuple m = (xml_xml_file_id m, - xml_heading m, - xml_category m, - xml_sport m, - xml_title m, - xml_dates m, - xml_time_stamp m) - -pickle_date :: PU EarlyLineDateXml + + + +-- | Pickler for a '\ followed by a \. We turn them into +-- a 'EarlyLineGameWithNote'. +-- +pickle_game_with_note :: PU EarlyLineGameWithNote +pickle_game_with_note = + xpWrap (from_tuple, H.convert) $ + xpPair (xpOption $ xpElem "note" xpText) + pickle_game + where + from_tuple = uncurry EarlyLineGameWithNote + + +-- | Pickler for the \ elements within each \. +-- +pickle_date :: PU EarlyLineDate pickle_date = xpElem "date" $ - xpWrap (from_tuple, to_tuple) $ - xpTriple (xpAttr "value" xp_early_line_date) - (xpElem "note" xpText) - pickle_game + xpWrap (from_tuple, H.convert) $ + xpPair (xpAttr "value" xp_early_line_date) + (xpList pickle_game_with_note) where - from_tuple = uncurryN EarlyLineDateXml - to_tuple m = (xml_date_value m, xml_note m, xml_game m) + from_tuple = uncurry EarlyLineDate + +-- | Pickler for the \ elements within each \. +-- pickle_game :: PU EarlyLineGameXml pickle_game = xpElem "game" $ - xpWrap (from_tuple, to_tuple) $ - xp4Tuple (xpElem "time" xp_ambiguous_time) + xpWrap (from_tuple, H.convert) $ + xp4Tuple (xpElem "time" (xpOption xp_ambiguous_time)) pickle_away_team pickle_home_team - (xpElem "over_under" xpText) + (xpElem "over_under" (xpOption xpText)) where from_tuple = uncurryN EarlyLineGameXml - to_tuple m = (xml_game_time m, - xml_away_team m, - xml_home_team m, - xml_over_under m) -pickle_away_team :: PU EarlyLineGameTeam +-- | Pickle an away team (\) element within a \. Most +-- of the work (common with the home team pickler) is done by +-- 'pickle_team'. +-- +pickle_away_team :: PU EarlyLineGameTeamXml pickle_away_team = xpElem "teamA" pickle_team -pickle_home_team :: PU EarlyLineGameTeam + +-- | Pickle a home team (\) element within a \. Most +-- of the work (common with theaway team pickler) is done by +-- 'pickle_team'. +-- +pickle_home_team :: PU EarlyLineGameTeamXml pickle_home_team = xpElem "teamH" pickle_team -pickle_team :: PU EarlyLineGameTeam + +-- | Team pickling common to both 'pickle_away_team' and +-- 'pickle_home_team'. Handles everything inside the \ and +-- \ elements. We try to parse the line/name as both an +-- attribute and an element in order to accomodate +-- MLB_earlylineXML.dtd. +-- +-- The \"line\" and \"pitcher\" fields wind up being double-Maybes, +-- since they can be empty even if they exist. +-- +pickle_team :: PU EarlyLineGameTeamXml pickle_team = - xpWrap (from_tuple, to_tuple) $ - xpTriple (xpAttr "rotation" xpInt) - (xpAttr "line" (xpOption xpText)) - xpText + xpWrap (from_tuple, to_tuple') $ + xp6Tuple (xpAttr "rotation" xp_attr_option) + (xpOption $ xpAttr "line" (xpOption xpText)) + (xpOption $ xpAttr "name" xpText) + (xpOption xpText) + (xpOption $ xpElem "pitcher" (xpOption xpText)) + (xpOption $ xpElem "line" (xpOption xpText)) + where + from_tuple (u,v,w,x,y,z) = + EarlyLineGameTeamXml u (join v) w x (join y) (join z) + + to_tuple' (EarlyLineGameTeamXml u v w x y z) = + (u, double_just v, w, x, double_just y, double_just z) + where + double_just val = case val of + Nothing -> Nothing + just_something -> Just just_something + + + + +-- +-- * Tasty Tests +-- + +-- | A list of all tests for this module. +-- +early_line_tests :: TestTree +early_line_tests = + testGroup + "EarlyLine tests" + [ test_on_delete_cascade, + test_pickle_of_unpickle_is_identity, + test_unpickle_succeeds ] + +-- | If we unpickle something and then pickle it, we should wind up +-- with the same thing we started with. WARNING: success of this +-- test does not mean that unpickling succeeded. +-- +test_pickle_of_unpickle_is_identity :: TestTree +test_pickle_of_unpickle_is_identity = testGroup "pickle-unpickle tests" + [ check "pickle composed with unpickle is the identity" + "test/xml/earlylineXML.xml", + + check "pickle composed with unpickle is the identity (empty game time)" + "test/xml/earlylineXML-empty-game-time.xml" ] + where + check desc path = testCase desc $ do + (expected, actual) <- pickle_unpickle pickle_message path + actual @?= expected + + + +-- | Make sure we can actually unpickle these things. +-- +test_unpickle_succeeds :: TestTree +test_unpickle_succeeds = testGroup "unpickle tests" + [ check "unpickling succeeds" + "test/xml/earlylineXML.xml", + + check "unpickling succeeds (empty game time)" + "test/xml/earlylineXML-empty-game-time.xml" ] + where + check desc path = testCase desc $ do + actual <- unpickleable path pickle_message + let expected = True + actual @?= expected + + + +-- | Make sure everything gets deleted when we delete the top-level +-- record. +-- +test_on_delete_cascade :: TestTree +test_on_delete_cascade = testGroup "cascading delete tests" + [ check "deleting early_lines deletes its children" + "test/xml/earlylineXML.xml", + + check "deleting early_lines deletes its children (empty game time)" + "test/xml/earlylineXML-empty-game-time.xml" ] where - from_tuple = uncurryN EarlyLineGameTeam - to_tuple m = (db_rotation_number m, db_line m, db_team_name m) + check desc path = testCase desc $ do + results <- unsafe_unpickle path pickle_message + let a = undefined :: EarlyLine + let b = undefined :: EarlyLineGame + + actual <- withSqliteConn ":memory:" $ runDbConn $ do + runMigrationSilent $ do + migrate a + migrate b + _ <- dbimport results + deleteAll a + count_a <- countAll a + count_b <- countAll b + return $ sum [count_a, count_b] + let expected = 0 + actual @?= expected