X-Git-Url: http://gitweb.michael.orlitzky.com/?a=blobdiff_plain;f=src%2FTSN%2FXML%2FEarlyLine.hs;h=b0d118d046f0905134ad9c91324188b3d55d3103;hb=fcc8c66fa433134d374f9347329ffa122ad0e4be;hp=627238f92bfbf280eb02b764be45f20c6b0fff5d;hpb=1c101735be50ddbef397bcb67882562ab2c506e4;p=dead%2Fhtsn-import.git diff --git a/src/TSN/XML/EarlyLine.hs b/src/TSN/XML/EarlyLine.hs index 627238f..b0d118d 100644 --- a/src/TSN/XML/EarlyLine.hs +++ b/src/TSN/XML/EarlyLine.hs @@ -5,42 +5,80 @@ {-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TypeFamilies #-} +-- | Parse TSN XML for the DTD \"earlylineXML.dtd\". For that DTD, +-- each \ element contains a bunch of \s, and those +-- \s contain a single \. In the database, we merge +-- the date info into the games, and key the games to the messages. +-- +-- Real life is not so simple, however. There is another module, +-- "TSN.XML.MLBEarlyLine" that is something of a subclass of this +-- one. It contains early lines, but only for MLB games. The data +-- types and XML schema are /almost/ the same, but TSN like to make +-- things difficult. +-- +-- A full list of the differences is given in that module. In this +-- one, we mention where data types have been twerked a little to +-- support the second document type. +-- module TSN.XML.EarlyLine ( + EarlyLine, -- Used in TSN.XML.MLBEarlyLine + EarlyLineGame, -- Used in TSN.XML.MLBEarlyLine dtd, pickle_message, + -- * Tests + early_line_tests, -- * WARNING: these are private but exported to silence warnings EarlyLineConstructor(..), EarlyLineGameConstructor(..) ) where -- System imports. +import Control.Monad ( join ) import Data.Time ( UTCTime(..) ) import Data.Tuple.Curry ( uncurryN ) +import Database.Groundhog ( + countAll, + deleteAll, + insert_, + migrate, + runMigration, + silentMigrationLogger ) import Database.Groundhog.Core ( DefaultKey ) +import Database.Groundhog.Generic ( runDbConn ) +import Database.Groundhog.Sqlite ( withSqliteConn ) import Database.Groundhog.TH ( groundhog, mkPersist ) +import Test.Tasty ( TestTree, testGroup ) +import Test.Tasty.HUnit ( (@?=), testCase ) import Text.XML.HXT.Core ( PU, xp4Tuple, + xp6Tuple, xp7Tuple, xpAttr, xpElem, xpInt, xpList, xpOption, + xpPair, xpText, - xpTriple, xpWrap ) -- Local imports. import TSN.Codegen ( tsn_codegen_config ) -import TSN.DbImport ( DbImport(..) ) -import TSN.Picklers ( xp_time_stamp ) +import TSN.DbImport ( DbImport(..), ImportResult(..), run_dbmigrate ) +import TSN.Picklers ( + xp_ambiguous_time, + xp_early_line_date, + xp_time_stamp ) import TSN.XmlImport ( XmlImport(..) ) import Xml ( FromXml(..), - ToDb(..) ) + ToDb(..), + pickle_unpickle, + unpickleable, + unsafe_unpickle ) -- | The DTD to which this module corresponds. Used to invoke dbimport. @@ -49,7 +87,7 @@ dtd :: String dtd = "earlylineXML.dtd" -- --- DB/XML data types +-- * DB/XML data types -- -- * EarlyLine/Message @@ -80,7 +118,7 @@ data Message = xml_category :: String, xml_sport :: String, xml_title :: String, - xml_dates :: [EarlyLineDateXml], + xml_dates :: [EarlyLineDate], xml_time_stamp :: UTCTime } deriving (Eq, Show) @@ -115,60 +153,247 @@ instance XmlImport Message --- * EarlyLineDateXml +-- * EarlyLineDate / EarlyLineGameWithNote + +-- | This is a very sad data type. It exists so that we can +-- successfully unpickle/pickle the MLB_earlylineXML.dtd documents +-- and get back what we started with. In that document type, the +-- dates all have multiple \s associated with them (as +-- children). But the dates also have multiple \s as +-- children, and we're supposed to figure out which notes go with +-- which games based on the order that they appear in the XML +-- file. Yeah, right. +-- +-- In any case, instead of expecting the games and notes in some +-- nice order, we use this data type to expect \"a game and maybe a +-- note\" multiple times. This will pair the notes with only one +-- game, rather than all of the games that TSN think it should go +-- with. But it allows us to pickle and unpickle correctly at least. +-- +data EarlyLineGameWithNote = + EarlyLineGameWithNote { + date_note :: Maybe String, + date_game :: EarlyLineGameXml } + deriving (Eq, Show) + -- | XML representation of a \. It has a \"value\" attribute -- containing the actual date string. As children it contains a -- (non-optional) note, and a game. The note and date value are -- properties of the game as far as I can tell. -- -data EarlyLineDateXml = - EarlyLineDateXml { - xml_date_value :: UTCTime, - xml_note :: String, - xml_game :: EarlyLineGameXml } +data EarlyLineDate = + EarlyLineDate { + date_value :: UTCTime, + date_games_with_notes :: [EarlyLineGameWithNote] } deriving (Eq, Show) -- * EarlyLineGame / EarlyLineGameXml +-- | Database representation of a \ in earlylineXML.dtd and +-- MLB_earlylineXML.dtd. We've had to make a sacrifice here to +-- support both document types. Since it's not possible to pair the +-- \s with \s reliably in MLB_earlylineXML.dtd, we +-- have omitted the notes entirely. This is sad, but totally not our +-- fault. +-- +-- In earlylineXML.dtd, each \ and thus each \ is +-- paired with exactly one \, so if we only cared about that +-- document type, we could have retained the notes. +-- +-- In earlylinexml.DTD, the over/under is required, but in +-- MLB_earlylinexml.DTD it is not. So another compromise is to have +-- it optional here. +-- +-- The 'db_game_time' should be the combined date/time using the +-- date value from the \ element's containing +-- \. That's why EarlyLineGame isn't an instance of +-- 'FromXmlFk': the foreign key isn't enough to construct one, we +-- also need the date. +-- data EarlyLineGame = EarlyLineGame { db_early_lines_id :: DefaultKey EarlyLine, db_game_time :: UTCTime, -- ^ Combined date/time - db_note :: String, -- ^ Taken from the parent \ db_away_team :: EarlyLineGameTeam, db_home_team :: EarlyLineGameTeam, - db_over_under :: String } + db_over_under :: Maybe String } + +-- | XML representation of a 'EarlyLineGame'. Comparatively, it lacks +-- only the foreign key to the parent message. +-- data EarlyLineGameXml = EarlyLineGameXml { - xml_game_time :: UTCTime, -- ^ Only an ambiguous time string, e.g. \"8:30\" - xml_away_team :: EarlyLineGameTeam, - xml_home_team :: EarlyLineGameTeam, - xml_over_under :: String } + xml_game_time :: Maybe UTCTime, -- ^ Only an ambiguous time string, + -- e.g. \"8:30\". Can be empty. + xml_away_team :: EarlyLineGameTeamXml, + xml_home_team :: EarlyLineGameTeamXml, + xml_over_under :: Maybe String } deriving (Eq, Show) --- | XML representation of an earlyline team. It doubles as an + +-- * EarlyLineGameTeam / EarlyLineGameTeamXml + +-- | Database representation of an EarlyLine team, used in both +-- earlylineXML.dtd and MLB_earlylineXML.dtd. It doubles as an -- embedded type within the DB representation 'EarlyLineGame'. -- +-- The team name is /not/ optional. However, since we're overloading +-- the XML representation, we're constructing 'db_team_name' name +-- from two Maybes, 'xml_team_name_attr' and +-- 'xml_team_name_text'. To ensure type safety (and avoid a runtime +-- crash), we allow the database field to be optional as well. +-- data EarlyLineGameTeam = EarlyLineGameTeam { db_rotation_number :: Int, db_line :: Maybe String, -- ^ Can be blank, a Double, or \"off\". - db_team_name :: String } + db_team_name :: Maybe String, -- ^ NOT optional, see the data type docs. + db_pitcher :: Maybe String -- ^ Optional in MLB_earlylineXML.dtd, + -- always absent in earlylineXML.dtd. + } + + +-- | This here is an abomination. What we've got is an XML +-- representation, not for either earlylineXML.dtd or +-- MLB_earlylineXML.dtd, but one that will work for /both/. Even +-- though they represent the teams totally differently! Argh! +-- +-- The earlylineXML.dtd teams look like, +-- +-- \Miami\ +-- +-- While the MLB_earlylineXML.dtd teams look like, +-- +-- +-- D.Haren +-- -130 +-- +-- +-- So that's cool. This data type has placeholders that should allow +-- the name/line to appear either as an attribute or as a text +-- node. We'll sort it all out in the conversion to +-- EarlyLineGameTeam. +-- +data EarlyLineGameTeamXml = + EarlyLineGameTeamXml { + xml_rotation_number :: Int, + xml_line_attr :: Maybe String, + xml_team_name_attr :: Maybe String, + xml_team_name_text :: Maybe String, + xml_pitcher :: Maybe String, + xml_line_elem :: Maybe String } deriving (Eq, Show) + +instance ToDb EarlyLineGameTeamXml where + -- | The database analogue of a 'EarlyLineGameTeamXml' is an + -- 'EarlyLineGameTeam', although the DB type is merely embedded + -- in another type. + -- + type Db EarlyLineGameTeamXml = EarlyLineGameTeam + + +-- | The 'FromXml' instance for 'EarlyLineGameTeamXml' lets us convert +-- it to a 'EarlyLineGameTeam' easily. +-- +instance FromXml EarlyLineGameTeamXml where + -- | To convert a 'EarlyLineGameTeamXml' to an 'EarlyLineGameTeam', + -- we figure how its fields were represented and choose the ones + -- that are populated. For example if the \"line\" attribute was + -- there, we'll use it, but if now, we'll use the \ + -- element. + -- + from_xml EarlyLineGameTeamXml{..} = + EarlyLineGameTeam { + db_rotation_number = xml_rotation_number, + db_line = merge xml_line_attr xml_line_elem, + db_team_name = merge xml_team_name_attr xml_team_name_text, + db_pitcher = xml_pitcher } + where + merge :: Maybe String -> Maybe String -> Maybe String + merge Nothing y = y + merge x Nothing = x + merge _ _ = Nothing + + + + +-- | Convert an 'EarlyLineDate' into a list of 'EarlyLineGame's. Each +-- date has one or more games, and the fields that belong to the date +-- should really be in the game anyway. So the database +-- representation of a game has the combined fields of the XML +-- date/game. +-- +-- This function gets the games out of a date, and then sticks the +-- date value inside the games. It also adds the foreign key +-- reference to the games' parent message, and returns the result. +-- +-- This would convert a single date to a single game if we only +-- needed to support earlylineXML.dtd and not MLB_earlylineXML.dtd. +-- +date_to_games :: (DefaultKey EarlyLine) -> EarlyLineDate -> [EarlyLineGame] +date_to_games fk date = + map convert_game games_only + where + -- | Get the list of games out of a date (i.e. drop the notes). + -- + games_only :: [EarlyLineGameXml] + games_only = (map date_game (date_games_with_notes date)) + + -- | Stick the date value into the given game. If our + -- 'EarlyLineGameXml' has an 'xml_game_time', then we combine it + -- with the day portion of the supplied @date@. If not, then we + -- just use @date as-is. + -- + combine_date_time :: EarlyLineGameXml -> UTCTime + combine_date_time (EarlyLineGameXml (Just t) _ _ _) = + UTCTime (utctDay $ date_value date) (utctDayTime t) + combine_date_time (EarlyLineGameXml Nothing _ _ _ ) = date_value date + + -- | Convert an XML game to a database one. + -- + convert_game :: EarlyLineGameXml -> EarlyLineGame + convert_game gx = + EarlyLineGame { + db_early_lines_id = fk, + db_game_time = combine_date_time gx, + db_away_team = from_xml (xml_away_team gx), + db_home_team = from_xml (xml_home_team gx), + db_over_under = xml_over_under gx } + + -- -- * Database stuff -- instance DbImport Message where - dbmigrate = undefined - dbimport = undefined + dbmigrate _ = + run_dbmigrate $ do + migrate (undefined :: EarlyLine) + migrate (undefined :: EarlyLineGame) + + dbimport m = do + -- Insert the message and obtain its ID. + msg_id <- insert_xml m + + -- Create a function that will turn a list of dates into a list of + -- games by converting each date to its own list of games, and + -- then concatenating all of the game lists together. + let convert_dates_to_games = concatMap (date_to_games msg_id) + + -- Now use it to make dem games. + let games = convert_dates_to_games (xml_dates m) + + -- And insert all of them + mapM_ insert_ games + + return ImportSucceeded mkPersist tsn_codegen_config [groundhog| @@ -197,11 +422,13 @@ mkPersist tsn_codegen_config [groundhog| - {name: rotation_number, dbName: away_team_rotation_number} - {name: line, dbName: away_team_line} - {name: team_name, dbName: away_team_name} + - {name: pitcher, dbName: away_team_pitcher} - name: db_home_team embeddedType: - {name: rotation_number, dbName: home_team_rotation_number} - {name: line, dbName: home_team_line} - {name: team_name, dbName: home_team_name} + - {name: pitcher, dbName: home_team_pitcher} - embedded: EarlyLineGameTeam fields: @@ -211,7 +438,8 @@ mkPersist tsn_codegen_config [groundhog| dbName: line - name: db_team_name dbName: team_name - + - name: db_pitcher + dbName: pitcher |] @@ -219,6 +447,10 @@ mkPersist tsn_codegen_config [groundhog| -- -- * Pickling -- + + +-- | Pickler for the top-level 'Message'. +-- pickle_message :: PU Message pickle_message = xpElem "message" $ @@ -240,26 +472,45 @@ pickle_message = xml_dates m, xml_time_stamp m) -pickle_date :: PU EarlyLineDateXml + + +-- | Pickler for a '\ followed by a \. We turn them into +-- a 'EarlyLineGameWithNote'. +-- +pickle_game_with_note :: PU EarlyLineGameWithNote +pickle_game_with_note = + xpWrap (from_tuple, to_tuple) $ + xpPair (xpOption $ xpElem "note" xpText) + pickle_game + where + from_tuple = uncurry EarlyLineGameWithNote + to_tuple m = (date_note m, date_game m) + + +-- | Pickler for the \ elements within each \. +-- +pickle_date :: PU EarlyLineDate pickle_date = xpElem "date" $ xpWrap (from_tuple, to_tuple) $ - xpTriple (xpAttr "value" undefined) - (xpElem "note" xpText) - pickle_game + xpPair (xpAttr "value" xp_early_line_date) + (xpList pickle_game_with_note) where - from_tuple = uncurryN EarlyLineDateXml - to_tuple m = (xml_date_value m, xml_note m, xml_game m) + from_tuple = uncurry EarlyLineDate + to_tuple m = (date_value m, date_games_with_notes m) + +-- | Pickler for the \ elements within each \. +-- pickle_game :: PU EarlyLineGameXml pickle_game = xpElem "game" $ xpWrap (from_tuple, to_tuple) $ - xp4Tuple (xpElem "time" undefined) + xp4Tuple (xpElem "time" (xpOption xp_ambiguous_time)) pickle_away_team pickle_home_team - (xpElem "over_under" xpText) + (xpElem "over_under" (xpOption xpText)) where from_tuple = uncurryN EarlyLineGameXml to_tuple m = (xml_game_time m, @@ -269,18 +520,127 @@ pickle_game = -pickle_away_team :: PU EarlyLineGameTeam +-- | Pickle an away team (\) element within a \. Most +-- of the work (common with the home team pickler) is done by +-- 'pickle_team'. +-- +pickle_away_team :: PU EarlyLineGameTeamXml pickle_away_team = xpElem "teamA" pickle_team -pickle_home_team :: PU EarlyLineGameTeam + +-- | Pickle a home team (\) element within a \. Most +-- of the work (common with theaway team pickler) is done by +-- 'pickle_team'. +-- +pickle_home_team :: PU EarlyLineGameTeamXml pickle_home_team = xpElem "teamH" pickle_team -pickle_team :: PU EarlyLineGameTeam + +-- | Team pickling common to both 'pickle_away_team' and +-- 'pickle_home_team'. Handles everything inside the \ and +-- \ elements. We try to parse the line/name as both an +-- attribute and an element in order to accomodate +-- MLB_earlylineXML.dtd. +-- +-- The \"line\" and \"pitcher\" fields wind up being double-Maybes, +-- since they can be empty even if they exist. +-- +pickle_team :: PU EarlyLineGameTeamXml pickle_team = xpWrap (from_tuple, to_tuple) $ - xpTriple (xpAttr "rotation" xpInt) - (xpAttr "line" (xpOption xpText)) - xpText + xp6Tuple (xpAttr "rotation" xpInt) + (xpOption $ xpAttr "line" (xpOption xpText)) + (xpOption $ xpAttr "name" xpText) + (xpOption xpText) + (xpOption $ xpElem "pitcher" (xpOption xpText)) + (xpOption $ xpElem "line" (xpOption xpText)) + where + from_tuple (u,v,w,x,y,z) = + EarlyLineGameTeamXml u (join v) w x (join y) (join z) + + to_tuple (EarlyLineGameTeamXml u v w x y z) = + (u, double_just v, w, x, double_just y, double_just z) + where + double_just val = case val of + Nothing -> Nothing + just_something -> Just just_something + + + + +-- +-- * Tasty Tests +-- + +-- | A list of all tests for this module. +-- +early_line_tests :: TestTree +early_line_tests = + testGroup + "EarlyLine tests" + [ test_on_delete_cascade, + test_pickle_of_unpickle_is_identity, + test_unpickle_succeeds ] + +-- | If we unpickle something and then pickle it, we should wind up +-- with the same thing we started with. WARNING: success of this +-- test does not mean that unpickling succeeded. +-- +test_pickle_of_unpickle_is_identity :: TestTree +test_pickle_of_unpickle_is_identity = testGroup "pickle-unpickle tests" $ + [ check "pickle composed with unpickle is the identity" + "test/xml/earlylineXML.xml", + + check "pickle composed with unpickle is the identity (empty game time)" + "test/xml/earlylineXML-empty-game-time.xml" ] + where + check desc path = testCase desc $ do + (expected, actual) <- pickle_unpickle pickle_message path + actual @?= expected + + + +-- | Make sure we can actually unpickle these things. +-- +test_unpickle_succeeds :: TestTree +test_unpickle_succeeds = testGroup "unpickle tests" $ + [ check "unpickling succeeds" + "test/xml/earlylineXML.xml", + + check "unpickling succeeds (empty game time)" + "test/xml/earlylineXML-empty-game-time.xml" ] + where + check desc path = testCase desc $ do + actual <- unpickleable path pickle_message + let expected = True + actual @?= expected + + + +-- | Make sure everything gets deleted when we delete the top-level +-- record. +-- +test_on_delete_cascade :: TestTree +test_on_delete_cascade = testGroup "cascading delete tests" $ + [ check "deleting early_lines deletes its children" + "test/xml/earlylineXML.xml", + + check "deleting early_lines deletes its children (empty game time)" + "test/xml/earlylineXML-empty-game-time.xml" ] where - from_tuple = uncurryN EarlyLineGameTeam - to_tuple m = (db_rotation_number m, db_line m, db_team_name m) + check desc path = testCase desc $ do + results <- unsafe_unpickle path pickle_message + let a = undefined :: EarlyLine + let b = undefined :: EarlyLineGame + + actual <- withSqliteConn ":memory:" $ runDbConn $ do + runMigration silentMigrationLogger $ do + migrate a + migrate b + _ <- dbimport results + deleteAll a + count_a <- countAll a + count_b <- countAll b + return $ sum [count_a, count_b] + let expected = 0 + actual @?= expected