X-Git-Url: http://gitweb.michael.orlitzky.com/?a=blobdiff_plain;f=src%2FTSN%2FXML%2FSportInfo.hs;h=115fe105c58a5941b5479568d8cfaf166d2cb868;hb=ea92a52cc5f044d63a6e9ed348bf85ec9917f969;hp=b7e269ee792f91df09925dde854fcb3590d40016;hpb=dcaa338a8e638ff20890f949fd116fab0228e050;p=dead%2Fhtsn-import.git diff --git a/src/TSN/XML/SportInfo.hs b/src/TSN/XML/SportInfo.hs index b7e269e..115fe10 100644 --- a/src/TSN/XML/SportInfo.hs +++ b/src/TSN/XML/SportInfo.hs @@ -1,14 +1,64 @@ +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE QuasiQuotes #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE TypeFamilies #-} + -- | SportInfo represents a collection of DTDs that we don't really -- handle but want to make available. The raw XML gets stored in the -- database along with the XML_File_ID, but we don't parse any of it. -- --- See also: TSN.XML.GameInfo +-- This is almost completely redundant with "TSN.XML.GameInfo", but +-- the redundancy is necessary: we need separate 'Message' types so +-- that we can have separate 'DbImport' instances. It would take +-- more code/work to abstract (if it's even possible) than to +-- duplicate. -- module TSN.XML.SportInfo ( - dtds ) + dtds, + parse_xml, + sport_info_tests, + -- * WARNING: these are private but exported to silence warnings + SportInfoConstructor(..) ) where --- | The DTDs for everything that we consider "Sport Info." +-- System imports. +import Data.Either ( rights ) +import Data.Time.Clock ( UTCTime ) +import Database.Groundhog ( + countAll, + migrate, + runMigration, + silentMigrationLogger ) +import Database.Groundhog.Generic ( runDbConn ) +import Database.Groundhog.Sqlite ( withSqliteConn ) +import Database.Groundhog.TH ( + groundhog, + mkPersist ) +import Test.Tasty ( TestTree, testGroup ) +import Test.Tasty.HUnit ( (@?=), testCase ) +import Text.XML.HXT.Core ( XmlTree ) +import Text.XML.HXT.DOM.ShowXml ( xshow ) + +-- Local imports. +import TSN.Codegen ( tsn_codegen_config ) +import TSN.DbImport ( + DbImport(..), + ImportResult(..), + run_dbmigrate ) +import TSN.Parse ( + parse_message, + parse_xmlfid, + parse_xml_time_stamp ) +import TSN.XmlImport ( XmlImport(..) ) +import Xml ( + FromXml(..), + ToDb(..), + unsafe_read_document ) + + +-- | The DTDs for everything that we consider \"Sport Info.\" -- -- TODO: This is the list from the old implementation. We need to -- make sure that we are really receiving XML for these DTDs @@ -21,26 +71,24 @@ dtds = "CBASK_AssistsXML.dtd", "Cbask_Awards_XML.dtd", "CBASK_BlocksXML.dtd", - "CBask_BlocksXML.dtd", "Cbask_Conf_Standings_XML.dtd", "Cbask_DivII_III_Indv_Stats_XML.dtd", "Cbask_DivII_Team_Stats_XML.dtd", "Cbask_DivIII_Team_Stats_XML.dtd", "CBASK_FGPctXML.dtd", - "CBask_FGPctXML.dtd", "CBASK_FoulsXML.dtd", "CBASK_FTPctXML.dtd", - "Cbask_Indv_No_Avg_XML.dtd", + "Cbask_Indv_No_Avg_XML.dtd", -- no dtd "Cbask_Indv_Scoring_XML.dtd", - "Cbask_Indv_Shooting_XML.dtd", + "Cbask_Indv_Shooting_XML.dtd", -- no dtd "CBASK_MinutesXML.dtd", "Cbask_Polls_XML.dtd", "CBASK_ReboundsXML.dtd", "CBASK_ScoringLeadersXML.dtd", - "CBASK_StealsXML.dtd", - "Cbask_Team_Scoring_Rebound_Margin_XML.dtd", - "Cbask_Team_Scoring_XML.dtd", - "Cbask_Team_Shooting_Pct_XML.dtd", + "CBASK_StealsXML.dtd", -- no dtd + "Cbask_Team_Scoring_Rebound_Margin_XML.dtd", -- no dtd + "Cbask_Team_Scoring_XML.dtd", -- no dtd + "Cbask_Team_Shooting_Pct_XML.dtd", -- no dtd "Cbask_Team_ThreePT_Made_XML.dtd", "Cbask_Team_ThreePT_PCT_XML.dtd", "Cbask_Team_Win_Pct_XML.dtd", @@ -85,83 +133,287 @@ dtds = "mlbwalksleadersxml.dtd", "MLBXtraBaseHitsXML.dtd", "MLB_ERA_Leaders.dtd", - "MLB_Fielding_XML.dtd", + "MLB_Fielding_XML.dtd", -- no dtd "MLB_Pitching_Appearances_Leaders.dtd", - "MLB_Pitching_Balks_Leaders.dtd", - "MLB_Pitching_CG_Leaders.dtd", - "MLB_Pitching_ER_Allowed_Leaders.dtd", - "MLB_Pitching_Hits_Allowed_Leaders.dtd", - "MLB_Pitching_Hit_Batters_Leaders.dtd", - "MLB_Pitching_HR_Allowed_Leaders.dtd", - "MLB_Pitching_IP_Leaders.dtd", - "MLB_Pitching_Runs_Allowed_Leaders.dtd", - "MLB_Pitching_Saves_Leaders.dtd", - "MLB_Pitching_Shut_Outs_Leaders.dtd", - "MLB_Pitching_Starts_Leaders.dtd", - "MLB_Pitching_Strike_Outs_Leaders.dtd", - "MLB_Pitching_Walks_Leaders.dtd", - "MLB_Pitching_WHIP_Leaders.dtd", - "MLB_Pitching_Wild_Pitches_Leaders.dtd", - "MLB_Pitching_Win_Percentage_Leaders.dtd", - "MLB_Pitching_WL_Leaders.dtd", - "NBA_Team_Stats_XML.dtd", - "NBA3PPctXML.dtd", - "NBAAssistsXML.dtd", - "NBABlocksXML.dtd", - "nbaconfrecxml.dtd", - "nbadaysxml.dtd", - "nbadivisionsxml.dtd", - "NBAFGPctXML.dtd", - "NBAFoulsXML.dtd", - "NBAFTPctXML.dtd", - "NBAMinutesXML.dtd", - "NBAReboundsXML.dtd", - "NBAScorersXML.dtd", - "nbastandxml.dtd", - "NBAStealsXML.dtd", - "nbateamleadersxml.dtd", - "nbatripledoublexml.dtd", - "NBATurnoversXML.dtd", - "NCAA_Conference_Schedule_XML.dtd", - "nflfirstdownxml.dtd", - "NFLFumbleLeaderXML.dtd", - "NFLGiveTakeXML.dtd", - "NFLGrassTurfDomeOutsideXML.dtd", - "NFLInside20XML.dtd", - "NFLInterceptionLeadersXML.dtd", - "NFLKickoffsXML.dtd", - "NFLMondayNightXML.dtd", - "NFLPassingLeadersXML.dtd", - "NFLPassLeadXML.dtd", - "NFLQBStartsXML.dtd", - "NFLReceivingLeadersXML.dtd", - "NFLRushingLeadersXML.dtd", - "NFLSackLeadersXML.dtd", - "nflstandxml.dtd", - "NFLTackleFFLeadersXML.dtd", - "NFLTeamRankingsXML.dtd", - "NFLTopKickoffReturnXML.dtd", - "NFLTopPerformanceXML.dtd", - "NFLTopPuntReturnXML.dtd", - "NFLTotalYardageXML.dtd", - "NFLYardsXML.dtd", - "NFL_KickingLeaders_XML.dtd", - "NFL_NBA_Draft_XML.dtd", - "NFL_PuntingLeaders_XML.dtd", - "NFL_Roster_XML.dtd", - "NFL_Team_Stats_XML.dtd", - "Transactions_XML.dtd", - "Weekly_Sched_XML.dtd", - "WNBA_Team_Leaders_XML.dtd", - "WNBA3PPctXML.dtd", - "WNBAAssistsXML.dtd", - "WNBABlocksXML.dtd", - "WNBAFGPctXML.dtd", - "WNBAFoulsXML.dtd", - "WNBAFTPctXML.dtd", - "WNBAMinutesXML.dtd", - "WNBAReboundsXML.dtd", - "WNBAScorersXML.dtd", - "wnbastandxml.dtd", - "WNBAStealsXML.dtd", - "WNBATurnoversXML.dtd" ] + "MLB_Pitching_Balks_Leaders.dtd", -- no dtd + "MLB_Pitching_CG_Leaders.dtd", -- no dtd + "MLB_Pitching_ER_Allowed_Leaders.dtd", -- no dtd + "MLB_Pitching_Hits_Allowed_Leaders.dtd", -- no dtd + "MLB_Pitching_Hit_Batters_Leaders.dtd", -- no dtd + "MLB_Pitching_HR_Allowed_Leaders.dtd", -- no dtd + "MLB_Pitching_IP_Leaders.dtd", -- no dtd + "MLB_Pitching_Runs_Allowed_Leaders.dtd", -- no dtd + "MLB_Pitching_Saves_Leaders.dtd", -- no dtd + "MLB_Pitching_Shut_Outs_Leaders.dtd", -- no dtd + "MLB_Pitching_Starts_Leaders.dtd", -- no dtd + "MLB_Pitching_Strike_Outs_Leaders.dtd", -- no dtd + "MLB_Pitching_Walks_Leaders.dtd", -- no dtd + "MLB_Pitching_WHIP_Leaders.dtd", -- no dtd + "MLB_Pitching_Wild_Pitches_Leaders.dtd", -- no dtd + "MLB_Pitching_Win_Percentage_Leaders.dtd", -- no dtd + "MLB_Pitching_WL_Leaders.dtd", -- no dtd + "NBA_Team_Stats_XML.dtd", -- no dtd + "NBA3PPctXML.dtd", -- no dtd + "NBAAssistsXML.dtd", -- no dtd + "NBABlocksXML.dtd", -- no dtd + "nbaconfrecxml.dtd", -- no dtd + "nbadaysxml.dtd", -- no dtd + "nbadivisionsxml.dtd", -- no dtd + "NBAFGPctXML.dtd", -- no dtd + "NBAFoulsXML.dtd", -- no dtd + "NBAFTPctXML.dtd", -- no dtd + "NBAMinutesXML.dtd", -- no dtd + "NBAReboundsXML.dtd", -- no dtd + "NBAScorersXML.dtd", -- no dtd + "nbastandxml.dtd", -- no dtd + "NBAStealsXML.dtd", -- no dtd + "nbateamleadersxml.dtd", -- no dtd + "nbatripledoublexml.dtd", -- no dtd + "NBATurnoversXML.dtd", -- no dtd + "NCAA_Conference_Schedule_XML.dtd", -- no dtd + "nflfirstdownxml.dtd", -- no dtd + "NFLFumbleLeaderXML.dtd", -- no dtd + "NFLGiveTakeXML.dtd", -- no dtd + "NFLGrassTurfDomeOutsideXML.dtd", -- no dtd + "NFLInside20XML.dtd", -- no dtd + "NFLInterceptionLeadersXML.dtd", -- no dtd + "NFLKickoffsXML.dtd", -- no dtd + "NFLMondayNightXML.dtd", -- no dtd + "NFLPassingLeadersXML.dtd", -- no dtd + "NFLPassLeadXML.dtd", -- no dtd + "NFLQBStartsXML.dtd", -- no dtd + "NFLReceivingLeadersXML.dtd", -- no dtd + "NFLRushingLeadersXML.dtd", -- no dtd + "NFLSackLeadersXML.dtd", -- no dtd + "nflstandxml.dtd", -- no dtd + "NFLTackleFFLeadersXML.dtd", -- no dtd + "NFLTeamRankingsXML.dtd", -- no dtd + "NFLTopKickoffReturnXML.dtd", -- no dtd + "NFLTopPerformanceXML.dtd", -- no dtd + "NFLTopPuntReturnXML.dtd", -- no dtd + "NFLTotalYardageXML.dtd", -- no dtd + "NFLYardsXML.dtd", -- no dtd + "NFL_KickingLeaders_XML.dtd", -- no dtd + "NFL_NBA_Draft_XML.dtd", -- no dtd + "NFL_PuntingLeaders_XML.dtd", -- no dtd + "NFL_Roster_XML.dtd", -- no dtd + "NFL_Team_Stats_XML.dtd", -- no dtd + "Transactions_XML.dtd", -- no dtd + "Weekly_Sched_XML.dtd", -- no dtd + "WNBA_Team_Leaders_XML.dtd", -- no dtd + "WNBA3PPctXML.dtd", -- no dtd + "WNBAAssistsXML.dtd", -- no dtd + "WNBABlocksXML.dtd", -- no dtd + "WNBAFGPctXML.dtd", -- no dtd + "WNBAFoulsXML.dtd", -- no dtd + "WNBAFTPctXML.dtd", -- no dtd + "WNBAMinutesXML.dtd", -- no dtd + "WNBAReboundsXML.dtd", -- no dtd + "WNBAScorersXML.dtd", -- no dtd + "wnbastandxml.dtd", -- no dtd + "WNBAStealsXML.dtd", -- no dtd + "WNBATurnoversXML.dtd" -- no dtd + ] + + +-- | XML representation of a SportInfo \. +-- +data Message = + Message { + xml_dtd :: String, + xml_xml_file_id :: Int, + xml_time_stamp :: UTCTime, + xml_xml :: String } + deriving (Eq, Show) + + +-- | Attempt to parse a 'Message' from an 'XmlTree'. If we cannot, +-- we fail with an error message. +-- +parse_xml :: String -> XmlTree -> Either String Message +parse_xml dtdname xmltree = do + xmlfid <- parse_xmlfid xmltree + timestamp <- parse_xml_time_stamp xmltree + message <- parse_message xmltree + return $ Message dtdname (fromInteger xmlfid) timestamp (xshow [message]) + + +-- | Database representation of a 'Message'. +-- +data SportInfo = + SportInfo { + db_dtd :: String, + db_xml_file_id :: Int, + db_time_stamp :: UTCTime, + db_xml :: String } + + +instance ToDb Message where + -- | The database analogue of a 'Message' is an 'SportInfo'. + type Db Message = SportInfo + +instance FromXml Message where + -- | The XML to DB conversion is trivial here. + -- + from_xml Message{..} = SportInfo { + db_dtd = xml_dtd, + db_xml_file_id = xml_xml_file_id, + db_time_stamp = xml_time_stamp, + db_xml = xml_xml } + + +-- | This allows us to insert the XML representation 'Message' +-- directly. +-- +instance XmlImport Message + + +-- +-- Database code +-- + +instance DbImport Message where + dbmigrate _ = + run_dbmigrate $ migrate (undefined :: SportInfo) + + -- | We import a 'Message' by inserting the whole thing at + -- once. Nothing fancy going on here. + dbimport msg = do + insert_xml_ msg + return ImportSucceeded + + +-- | The database schema for SportInfo is trivial; all we need is for +-- the XML_File_ID to be unique. +-- +mkPersist tsn_codegen_config [groundhog| +- entity: SportInfo + constructors: + - name: SportInfo + uniques: + - name: unique_sport_info + type: constraint + # Prevent multiple imports of the same message. + fields: [db_xml_file_id] +|] + + +-- +-- Tasty Tests +-- + +-- | A list of all tests for this module. +-- +sport_info_tests :: TestTree +sport_info_tests = + testGroup + "SportInfo tests" + [ test_parse_xml_succeeds, + test_dbimport_succeeds ] + + +-- | Sample XML documents for SportInfo types. +-- +sport_info_test_files :: [FilePath] +sport_info_test_files = + map ("test/xml/sportinfo/" ++) [ + "CBASK_3PPctXML.xml", + "Cbask_All_Tourn_Teams_XML.xml", + "CBASK_AssistsXML.xml", + "Cbask_Awards_XML.xml", + "CBASK_BlocksXML.xml", + "Cbask_Conf_Standings_XML.xml", + "Cbask_DivII_III_Indv_Stats_XML.xml", + "Cbask_DivII_Team_Stats_XML.xml", + "Cbask_DivIII_Team_Stats_XML.xml", + "CBASK_FGPctXML.xml", + "CBASK_FoulsXML.xml", + "CBASK_FTPctXML.xml", + "Cbask_Indv_Scoring_XML.xml", + "CBASK_MinutesXML.xml", + "Cbask_Polls_XML.xml", + "CBASK_ReboundsXML.xml", + "CBASK_ScoringLeadersXML.xml", + "Cbask_Team_ThreePT_Made_XML.xml", + "Cbask_Team_ThreePT_PCT_XML.xml", + "Cbask_Team_Win_Pct_XML.xml", + "Cbask_Top_Twenty_Five_XML.xml", + "CBASK_TopTwentyFiveResult_XML.xml", + "Cbask_Tourn_Awards_XML.xml", + "Cbask_Tourn_Champs_XML.xml", + "Cbask_Tourn_Indiv_XML.xml", + "Cbask_Tourn_Leaders_XML.xml", + "Cbask_Tourn_MVP_XML.xml", + "Cbask_Tourn_Records_XML.xml", + "LeagueScheduleXML.xml", + "minorscoresxml.xml", + "Minor_Baseball_League_Leaders_XML.xml", + "Minor_Baseball_Standings_XML.xml", + "Minor_Baseball_Transactions_XML.xml", + "mlbbattingavgxml.xml", + "mlbdoublesleadersxml.xml", + "MLBGamesPlayedXML.xml", + "MLBGIDPXML.xml", + "MLBHitByPitchXML.xml", + "mlbhitsleadersxml.xml", + "mlbhomerunsxml.xml", + "MLBHRFreqXML.xml", + "MLBIntWalksXML.xml", + "MLBKORateXML.xml", + "mlbonbasepctxml.xml", + "MLBOPSXML.xml", + "MLBPlateAppsXML.xml", + "mlbrbisxml.xml", + "mlbrunsleadersxml.xml", + "MLBSacFliesXML.xml", + "MLBSacrificesXML.xml", + "MLBSBSuccessXML.xml", + "mlbsluggingpctxml.xml", + "mlbstandxml.xml", + "mlbstandxml_preseason.xml", + "mlbstolenbasexml.xml", + "mlbtotalbasesleadersxml.xml", + "mlbtriplesleadersxml.xml", + "MLBWalkRateXML.xml", + "mlbwalksleadersxml.xml", + "MLBXtraBaseHitsXML.xml", + "MLB_Pitching_Appearances_Leaders.xml", + "MLB_ERA_Leaders.xml" + ] + + + +-- | Make sure we can parse every element of 'sport_info_test_files'. +-- +test_parse_xml_succeeds :: TestTree +test_parse_xml_succeeds = + testGroup "parse_xml" $ map check sport_info_test_files + where + check t = testCase t $ do + x <- unsafe_read_document t + let result = parse_xml "dummy" x + let actual = case result of -- isRight appears in base-4.7 + Left _ -> False + Right _ -> True + let expected = True + actual @?= expected + + +-- | Ensure that each element of 'sport_info_test_files' can be imported +-- by counting the total number of database records (after +-- importing) and comparing it against the length of +-- 'sport_info_test_files'. +-- +test_dbimport_succeeds :: TestTree +test_dbimport_succeeds = testCase "dbimport succeeds" $ do + xmltrees <- mapM unsafe_read_document sport_info_test_files + let msgs = rights $ map (parse_xml "dummy") xmltrees + actual <- withSqliteConn ":memory:" $ runDbConn $ do + runMigration silentMigrationLogger $ do + migrate (undefined :: SportInfo) + mapM_ dbimport msgs + countAll (undefined :: SportInfo) + + actual @?= expected + where + expected = length sport_info_test_files