X-Git-Url: http://gitweb.michael.orlitzky.com/?a=blobdiff_plain;f=src%2FTSN%2FXML%2FEarlyLine.hs;h=fe11b980f5503a9a0bf5373fe04ec2638b3721a9;hb=fdd85d5ed7944e6a6373c99c2e341f370cd931f8;hp=4ad9ae2ca0a5f1bc59c9d0b3bb60b71f13f8fa5d;hpb=16d86e7a3c1eda08b91752f92510a1de0b952a17;p=dead%2Fhtsn-import.git diff --git a/src/TSN/XML/EarlyLine.hs b/src/TSN/XML/EarlyLine.hs index 4ad9ae2..fe11b98 100644 --- a/src/TSN/XML/EarlyLine.hs +++ b/src/TSN/XML/EarlyLine.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE QuasiQuotes #-} @@ -5,12 +6,24 @@ {-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TypeFamilies #-} --- | Parse TSN XML for the DTD \"earlylineXML.dtd\". Each \ --- element contains a bunch of \s, and those \s --- contain a single \. In the database, we merge the date --- info into the games, and key the games to the messages. +-- | Parse TSN XML for the DTD \"earlylineXML.dtd\". For that DTD, +-- each \ element contains a bunch of \s, and those +-- \s contain a single \. In the database, we merge +-- the date info into the games, and key the games to the messages. +-- +-- Real life is not so simple, however. There is another module, +-- "TSN.XML.MLBEarlyLine" that is something of a subclass of this +-- one. It contains early lines, but only for MLB games. The data +-- types and XML schema are /almost/ the same, but TSN like to make +-- things difficult. +-- +-- A full list of the differences is given in that module. In this +-- one, we mention where data types have been twerked a little to +-- support the second document type. -- module TSN.XML.EarlyLine ( + EarlyLine, -- Used in TSN.XML.MLBEarlyLine + EarlyLineGame, -- Used in TSN.XML.MLBEarlyLine dtd, pickle_message, -- * Tests @@ -21,42 +34,45 @@ module TSN.XML.EarlyLine ( where -- System imports. -import Control.Monad ( forM_ ) +import Control.Monad ( join ) import Data.Time ( UTCTime(..) ) import Data.Tuple.Curry ( uncurryN ) +import qualified Data.Vector.HFixed as H ( HVector, convert ) import Database.Groundhog ( countAll, deleteAll, insert_, - migrate, - runMigration, - silentMigrationLogger ) + migrate ) import Database.Groundhog.Core ( DefaultKey ) -import Database.Groundhog.Generic ( runDbConn ) +import Database.Groundhog.Generic ( runDbConn, runMigrationSilent ) import Database.Groundhog.Sqlite ( withSqliteConn ) import Database.Groundhog.TH ( groundhog, mkPersist ) +import qualified GHC.Generics as GHC ( Generic ) import Test.Tasty ( TestTree, testGroup ) import Test.Tasty.HUnit ( (@?=), testCase ) import Text.XML.HXT.Core ( PU, xp4Tuple, + xp6Tuple, xp7Tuple, xpAttr, xpElem, xpInt, xpList, xpOption, + xpPair, xpText, - xpTriple, xpWrap ) -- Local imports. +import Misc ( double_just ) import TSN.Codegen ( tsn_codegen_config ) import TSN.DbImport ( DbImport(..), ImportResult(..), run_dbmigrate ) import TSN.Picklers ( xp_ambiguous_time, + xp_attr_option, xp_early_line_date, xp_time_stamp ) import TSN.XmlImport ( XmlImport(..) ) @@ -74,7 +90,7 @@ dtd :: String dtd = "earlylineXML.dtd" -- --- DB/XML data types +-- * DB/XML data types -- -- * EarlyLine/Message @@ -107,7 +123,11 @@ data Message = xml_title :: String, xml_dates :: [EarlyLineDate], xml_time_stamp :: UTCTime } - deriving (Eq, Show) + deriving (Eq, GHC.Generic, Show) + +-- | For 'H.convert'. +-- +instance H.HVector Message instance ToDb Message where @@ -140,7 +160,40 @@ instance XmlImport Message --- * EarlyLineDate +-- * EarlyLineDate / EarlyLineGameWithNote + +-- | This is a very sad data type. It exists so that we can +-- successfully unpickle/pickle the MLB_earlylineXML.dtd documents +-- and get back what we started with. In that document type, the +-- dates all have multiple \s associated with them (as +-- children). But the dates also have multiple \s as +-- children, and we're supposed to figure out which notes go with +-- which games based on the order that they appear in the XML +-- file. Yeah, right. +-- +-- In any case, instead of expecting the games and notes in some +-- nice order, we use this data type to expect \"a game and maybe a +-- note\" multiple times. This will pair the notes with only one +-- game, rather than all of the games that TSN think it should go +-- with. But it allows us to pickle and unpickle correctly at least. +-- +data EarlyLineGameWithNote = + EarlyLineGameWithNote + (Maybe String) -- date_note, unused + EarlyLineGameXml -- date_game + deriving (Eq, GHC.Generic, Show) + +-- | Accessor for the game within a 'EarlyLineGameWithNote'. We define +-- this ourselves to avoid an unused field warning for date_note. +-- +date_game :: EarlyLineGameWithNote -> EarlyLineGameXml +date_game (EarlyLineGameWithNote _ g) = g + +-- | For 'H.convert'. +-- +instance H.HVector EarlyLineGameWithNote + + -- | XML representation of a \. It has a \"value\" attribute -- containing the actual date string. As children it contains a @@ -150,65 +203,196 @@ instance XmlImport Message data EarlyLineDate = EarlyLineDate { date_value :: UTCTime, - date_note :: String, - date_game :: EarlyLineGameXml } - deriving (Eq, Show) + date_games_with_notes :: [EarlyLineGameWithNote] } + deriving (Eq, GHC.Generic, Show) + +-- | For 'H.convert'. +-- +instance H.HVector EarlyLineDate -- * EarlyLineGame / EarlyLineGameXml +-- | Database representation of a \ in earlylineXML.dtd and +-- MLB_earlylineXML.dtd. We've had to make a sacrifice here to +-- support both document types. Since it's not possible to pair the +-- \s with \s reliably in MLB_earlylineXML.dtd, we +-- have omitted the notes entirely. This is sad, but totally not our +-- fault. +-- +-- In earlylineXML.dtd, each \ and thus each \ is +-- paired with exactly one \, so if we only cared about that +-- document type, we could have retained the notes. +-- +-- In earlylinexml.DTD, the over/under is required, but in +-- MLB_earlylinexml.DTD it is not. So another compromise is to have +-- it optional here. +-- +-- The 'db_game_time' should be the combined date/time using the +-- date value from the \ element's containing +-- \. That's why EarlyLineGame isn't an instance of +-- 'FromXmlFk': the foreign key isn't enough to construct one, we +-- also need the date. +-- data EarlyLineGame = EarlyLineGame { db_early_lines_id :: DefaultKey EarlyLine, db_game_time :: UTCTime, -- ^ Combined date/time - db_note :: String, -- ^ Taken from the parent \ db_away_team :: EarlyLineGameTeam, db_home_team :: EarlyLineGameTeam, - db_over_under :: String } + db_over_under :: Maybe String } + +-- | XML representation of a 'EarlyLineGame'. Comparatively, it lacks +-- only the foreign key to the parent message. +-- data EarlyLineGameXml = EarlyLineGameXml { - xml_game_time :: UTCTime, -- ^ Only an ambiguous time string, e.g. \"8:30\" - xml_away_team :: EarlyLineGameTeam, - xml_home_team :: EarlyLineGameTeam, - xml_over_under :: String } - deriving (Eq, Show) + xml_game_time :: Maybe UTCTime, -- ^ Only an ambiguous time string, + -- e.g. \"8:30\". Can be empty. + xml_away_team :: EarlyLineGameTeamXml, + xml_home_team :: EarlyLineGameTeamXml, + xml_over_under :: Maybe String } + deriving (Eq, GHC.Generic, Show) --- | XML representation of an earlyline team. It doubles as an +-- | For 'H.convert'. +-- +instance H.HVector EarlyLineGameXml + + +-- * EarlyLineGameTeam / EarlyLineGameTeamXml + +-- | Database representation of an EarlyLine team, used in both +-- earlylineXML.dtd and MLB_earlylineXML.dtd. It doubles as an -- embedded type within the DB representation 'EarlyLineGame'. -- +-- The team name is /not/ optional. However, since we're overloading +-- the XML representation, we're constructing 'db_team_name' name +-- from two Maybes, 'xml_team_name_attr' and +-- 'xml_team_name_text'. To ensure type safety (and avoid a runtime +-- crash), we allow the database field to be optional as well. +-- data EarlyLineGameTeam = EarlyLineGameTeam { - db_rotation_number :: Int, + db_rotation_number :: Maybe Int, -- ^ Usually there but sometimes empty. db_line :: Maybe String, -- ^ Can be blank, a Double, or \"off\". - db_team_name :: String } + db_team_name :: Maybe String, -- ^ NOT optional, see the data type docs. + db_pitcher :: Maybe String -- ^ Optional in MLB_earlylineXML.dtd, + -- always absent in earlylineXML.dtd. + } + + +-- | This here is an abomination. What we've got is an XML +-- representation, not for either earlylineXML.dtd or +-- MLB_earlylineXML.dtd, but one that will work for /both/. Even +-- though they represent the teams totally differently! Argh! +-- +-- The earlylineXML.dtd teams look like, +-- +-- \Miami\ +-- +-- While the MLB_earlylineXML.dtd teams look like, +-- +-- +-- D.Haren +-- -130 +-- +-- +-- So that's cool. This data type has placeholders that should allow +-- the name/line to appear either as an attribute or as a text +-- node. We'll sort it all out in the conversion to +-- EarlyLineGameTeam. +-- +data EarlyLineGameTeamXml = + EarlyLineGameTeamXml { + xml_rotation_number :: Maybe Int, + xml_line_attr :: Maybe String, + xml_team_name_attr :: Maybe String, + xml_team_name_text :: Maybe String, + xml_pitcher :: Maybe String, + xml_line_elem :: Maybe String } deriving (Eq, Show) --- | Convert an 'EarlyLineDate' into an 'EarlyLineGame'. Each date has --- exactly one game, and the fields that belong to the date should --- really be in the game anyway. So the database representation of a --- game has the combined fields of the XML date/game. + +instance ToDb EarlyLineGameTeamXml where + -- | The database analogue of a 'EarlyLineGameTeamXml' is an + -- 'EarlyLineGameTeam', although the DB type is merely embedded + -- in another type. + -- + type Db EarlyLineGameTeamXml = EarlyLineGameTeam + + +-- | The 'FromXml' instance for 'EarlyLineGameTeamXml' lets us convert +-- it to a 'EarlyLineGameTeam' easily. +-- +instance FromXml EarlyLineGameTeamXml where + -- | To convert a 'EarlyLineGameTeamXml' to an 'EarlyLineGameTeam', + -- we figure how its fields were represented and choose the ones + -- that are populated. For example if the \"line\" attribute was + -- there, we'll use it, but if now, we'll use the \ + -- element. + -- + from_xml EarlyLineGameTeamXml{..} = + EarlyLineGameTeam { + db_rotation_number = xml_rotation_number, + db_line = merge xml_line_attr xml_line_elem, + db_team_name = merge xml_team_name_attr xml_team_name_text, + db_pitcher = xml_pitcher } + where + merge :: Maybe String -> Maybe String -> Maybe String + merge Nothing y = y + merge x Nothing = x + merge _ _ = Nothing + + + + +-- | Convert an 'EarlyLineDate' into a list of 'EarlyLineGame's. Each +-- date has one or more games, and the fields that belong to the date +-- should really be in the game anyway. So the database +-- representation of a game has the combined fields of the XML +-- date/game. -- --- This function gets the game out of a date, and then sticks the --- date value and note inside the game. It also adds the foreign key --- reference to the game's parent message, and returns the result. +-- This function gets the games out of a date, and then sticks the +-- date value inside the games. It also adds the foreign key +-- reference to the games' parent message, and returns the result. -- -date_to_game :: (DefaultKey EarlyLine) -> EarlyLineDate -> EarlyLineGame -date_to_game fk date = - EarlyLineGame { - db_early_lines_id = fk, - db_game_time = combined_date_time, - db_note = (date_note date), - db_away_team = xml_away_team (date_game date), - db_home_team = xml_home_team (date_game date), - db_over_under = xml_over_under (date_game date) } +-- This would convert a single date to a single game if we only +-- needed to support earlylineXML.dtd and not MLB_earlylineXML.dtd. +-- +date_to_games :: (DefaultKey EarlyLine) -> EarlyLineDate -> [EarlyLineGame] +date_to_games fk date = + map convert_game games_only where - date_part = date_value date - time_part = xml_game_time (date_game date) - combined_date_time = UTCTime (utctDay date_part) (utctDayTime time_part) + -- | Get the list of games out of a date (i.e. drop the notes). + -- + games_only :: [EarlyLineGameXml] + games_only = (map date_game (date_games_with_notes date)) + + -- | Stick the date value into the given game. If our + -- 'EarlyLineGameXml' has an 'xml_game_time', then we combine it + -- with the day portion of the supplied @date@. If not, then we + -- just use @date as-is. + -- + combine_date_time :: Maybe UTCTime -> UTCTime + combine_date_time (Just t) = + UTCTime (utctDay $ date_value date) (utctDayTime t) + combine_date_time Nothing = date_value date + + -- | Convert an XML game to a database one. + -- + convert_game :: EarlyLineGameXml -> EarlyLineGame + convert_game EarlyLineGameXml{..} = + EarlyLineGame { + db_early_lines_id = fk, + db_game_time = combine_date_time xml_game_time, + db_away_team = from_xml xml_away_team, + db_home_team = from_xml xml_home_team, + db_over_under = xml_over_under } + -- -- * Database stuff @@ -224,12 +408,16 @@ instance DbImport Message where -- Insert the message and obtain its ID. msg_id <- insert_xml m - -- Now loop through the message's s. - forM_ (xml_dates m) $ \date -> do - -- Each date only contains one game, so we convert the date to a - -- game and then insert the game (keyed to the message). - let game = date_to_game msg_id date - insert_ game + -- Create a function that will turn a list of dates into a list of + -- games by converting each date to its own list of games, and + -- then concatenating all of the game lists together. + let convert_dates_to_games = concatMap (date_to_games msg_id) + + -- Now use it to make dem games. + let games = convert_dates_to_games (xml_dates m) + + -- And insert all of them + mapM_ insert_ games return ImportSucceeded @@ -260,11 +448,13 @@ mkPersist tsn_codegen_config [groundhog| - {name: rotation_number, dbName: away_team_rotation_number} - {name: line, dbName: away_team_line} - {name: team_name, dbName: away_team_name} + - {name: pitcher, dbName: away_team_pitcher} - name: db_home_team embeddedType: - {name: rotation_number, dbName: home_team_rotation_number} - {name: line, dbName: home_team_line} - {name: team_name, dbName: home_team_name} + - {name: pitcher, dbName: home_team_pitcher} - embedded: EarlyLineGameTeam fields: @@ -274,7 +464,8 @@ mkPersist tsn_codegen_config [groundhog| dbName: line - name: db_team_name dbName: team_name - + - name: db_pitcher + dbName: pitcher |] @@ -289,7 +480,7 @@ mkPersist tsn_codegen_config [groundhog| pickle_message :: PU Message pickle_message = xpElem "message" $ - xpWrap (from_tuple, to_tuple) $ + xpWrap (from_tuple, H.convert) $ xp7Tuple (xpElem "XML_File_ID" xpInt) (xpElem "heading" xpText) (xpElem "category" xpText) @@ -299,13 +490,19 @@ pickle_message = (xpElem "time_stamp" xp_time_stamp) where from_tuple = uncurryN Message - to_tuple m = (xml_xml_file_id m, - xml_heading m, - xml_category m, - xml_sport m, - xml_title m, - xml_dates m, - xml_time_stamp m) + + + +-- | Pickler for a '\ followed by a \. We turn them into +-- a 'EarlyLineGameWithNote'. +-- +pickle_game_with_note :: PU EarlyLineGameWithNote +pickle_game_with_note = + xpWrap (from_tuple, H.convert) $ + xpPair (xpOption $ xpElem "note" xpText) + pickle_game + where + from_tuple = uncurry EarlyLineGameWithNote -- | Pickler for the \ elements within each \. @@ -313,32 +510,26 @@ pickle_message = pickle_date :: PU EarlyLineDate pickle_date = xpElem "date" $ - xpWrap (from_tuple, to_tuple) $ - xpTriple (xpAttr "value" xp_early_line_date) - (xpElem "note" xpText) - pickle_game + xpWrap (from_tuple, H.convert) $ + xpPair (xpAttr "value" xp_early_line_date) + (xpList pickle_game_with_note) where - from_tuple = uncurryN EarlyLineDate - to_tuple m = (date_value m, date_note m, date_game m) + from_tuple = uncurry EarlyLineDate --- | Pickler for the \ element within each \. +-- | Pickler for the \ elements within each \. -- pickle_game :: PU EarlyLineGameXml pickle_game = xpElem "game" $ - xpWrap (from_tuple, to_tuple) $ - xp4Tuple (xpElem "time" xp_ambiguous_time) + xpWrap (from_tuple, H.convert) $ + xp4Tuple (xpElem "time" (xpOption xp_ambiguous_time)) pickle_away_team pickle_home_team - (xpElem "over_under" xpText) + (xpElem "over_under" (xpOption xpText)) where from_tuple = uncurryN EarlyLineGameXml - to_tuple m = (xml_game_time m, - xml_away_team m, - xml_home_team m, - xml_over_under m) @@ -346,7 +537,7 @@ pickle_game = -- of the work (common with the home team pickler) is done by -- 'pickle_team'. -- -pickle_away_team :: PU EarlyLineGameTeam +pickle_away_team :: PU EarlyLineGameTeamXml pickle_away_team = xpElem "teamA" pickle_team @@ -354,23 +545,35 @@ pickle_away_team = xpElem "teamA" pickle_team -- of the work (common with theaway team pickler) is done by -- 'pickle_team'. -- -pickle_home_team :: PU EarlyLineGameTeam +pickle_home_team :: PU EarlyLineGameTeamXml pickle_home_team = xpElem "teamH" pickle_team -- | Team pickling common to both 'pickle_away_team' and -- 'pickle_home_team'. Handles everything inside the \ and --- \ elements. +-- \ elements. We try to parse the line/name as both an +-- attribute and an element in order to accomodate +-- MLB_earlylineXML.dtd. +-- +-- The \"line\" and \"pitcher\" fields wind up being double-Maybes, +-- since they can be empty even if they exist. -- -pickle_team :: PU EarlyLineGameTeam +pickle_team :: PU EarlyLineGameTeamXml pickle_team = - xpWrap (from_tuple, to_tuple) $ - xpTriple (xpAttr "rotation" xpInt) - (xpAttr "line" (xpOption xpText)) - xpText + xpWrap (from_tuple, to_tuple') $ + xp6Tuple (xpAttr "rotation" xp_attr_option) + (xpOption $ xpAttr "line" (xpOption xpText)) + (xpOption $ xpAttr "name" xpText) + (xpOption xpText) + (xpOption $ xpElem "pitcher" (xpOption xpText)) + (xpOption $ xpElem "line" (xpOption xpText)) where - from_tuple = uncurryN EarlyLineGameTeam - to_tuple m = (db_rotation_number m, db_line m, db_team_name m) + from_tuple (u,v,w,x,y,z) = + EarlyLineGameTeamXml u (join v) w x (join y) (join z) + + to_tuple' (EarlyLineGameTeamXml u v w x y z) = + (u, double_just v, w, x, double_just y, double_just z) + @@ -393,24 +596,33 @@ early_line_tests = -- test does not mean that unpickling succeeded. -- test_pickle_of_unpickle_is_identity :: TestTree -test_pickle_of_unpickle_is_identity = - testCase "pickle composed with unpickle is the identity" $ do - let path = "test/xml/earlylineXML.xml" - (expected, actual) <- pickle_unpickle pickle_message path - actual @?= expected +test_pickle_of_unpickle_is_identity = testGroup "pickle-unpickle tests" + [ check "pickle composed with unpickle is the identity" + "test/xml/earlylineXML.xml", + + check "pickle composed with unpickle is the identity (empty game time)" + "test/xml/earlylineXML-empty-game-time.xml" ] + where + check desc path = testCase desc $ do + (expected, actual) <- pickle_unpickle pickle_message path + actual @?= expected -- | Make sure we can actually unpickle these things. -- test_unpickle_succeeds :: TestTree -test_unpickle_succeeds = - testCase "unpickling succeeds" $ do - let path = "test/xml/earlylineXML.xml" - actual <- unpickleable path pickle_message +test_unpickle_succeeds = testGroup "unpickle tests" + [ check "unpickling succeeds" + "test/xml/earlylineXML.xml", - let expected = True - actual @?= expected + check "unpickling succeeds (empty game time)" + "test/xml/earlylineXML-empty-game-time.xml" ] + where + check desc path = testCase desc $ do + actual <- unpickleable path pickle_message + let expected = True + actual @?= expected @@ -418,21 +630,26 @@ test_unpickle_succeeds = -- record. -- test_on_delete_cascade :: TestTree -test_on_delete_cascade = - testCase "deleting early_lines deletes its children" $ do - let path = "test/xml/earlylineXML.xml" - results <- unsafe_unpickle path pickle_message - let a = undefined :: EarlyLine - let b = undefined :: EarlyLineGame - - actual <- withSqliteConn ":memory:" $ runDbConn $ do - runMigration silentMigrationLogger $ do - migrate a - migrate b - _ <- dbimport results - deleteAll a - count_a <- countAll a - count_b <- countAll b - return $ sum [count_a, count_b] - let expected = 0 - actual @?= expected +test_on_delete_cascade = testGroup "cascading delete tests" + [ check "deleting early_lines deletes its children" + "test/xml/earlylineXML.xml", + + check "deleting early_lines deletes its children (empty game time)" + "test/xml/earlylineXML-empty-game-time.xml" ] + where + check desc path = testCase desc $ do + results <- unsafe_unpickle path pickle_message + let a = undefined :: EarlyLine + let b = undefined :: EarlyLineGame + + actual <- withSqliteConn ":memory:" $ runDbConn $ do + runMigrationSilent $ do + migrate a + migrate b + _ <- dbimport results + deleteAll a + count_a <- countAll a + count_b <- countAll b + return $ sum [count_a, count_b] + let expected = 0 + actual @?= expected