X-Git-Url: http://gitweb.michael.orlitzky.com/?a=blobdiff_plain;f=src%2FTSN%2FXML%2FSportInfo.hs;h=99a5705c54c5d331a4269238f18301c643871d0d;hb=631c83d02fda46726b8712ff345dbdbac26eebfc;hp=b7e269ee792f91df09925dde854fcb3590d40016;hpb=dcaa338a8e638ff20890f949fd116fab0228e050;p=dead%2Fhtsn-import.git diff --git a/src/TSN/XML/SportInfo.hs b/src/TSN/XML/SportInfo.hs index b7e269e..99a5705 100644 --- a/src/TSN/XML/SportInfo.hs +++ b/src/TSN/XML/SportInfo.hs @@ -1,13 +1,63 @@ +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE QuasiQuotes #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE TypeFamilies #-} + -- | SportInfo represents a collection of DTDs that we don't really -- handle but want to make available. The raw XML gets stored in the -- database along with the XML_File_ID, but we don't parse any of it. -- --- See also: TSN.XML.GameInfo +-- This is almost completely redundant with "TSN.XML.GameInfo", but +-- the redundancy is necessary: we need separate 'Message' types so +-- that we can have separate 'DbImport' instances. It would take +-- more code/work to abstract (if it's even possible) than to +-- duplicate. -- module TSN.XML.SportInfo ( - dtds ) + dtds, + parse_xml, + sportinfo_tests, + -- * WARNING: these are private but exported to silence warnings + SportInfoConstructor(..) ) where +-- System imports. +import Data.Either ( rights ) +import Data.Time.Clock ( UTCTime ) +import Database.Groundhog ( + countAll, + migrate, + runMigration, + silentMigrationLogger ) +import Database.Groundhog.Generic ( runDbConn ) +import Database.Groundhog.Sqlite ( withSqliteConn ) +import Database.Groundhog.TH ( + groundhog, + mkPersist ) +import Test.Tasty ( TestTree, testGroup ) +import Test.Tasty.HUnit ( (@?=), testCase ) +import Text.XML.HXT.Core ( XmlTree ) +import Text.XML.HXT.DOM.ShowXml ( xshow ) + +-- Local imports. +import TSN.Codegen ( tsn_codegen_config ) +import TSN.DbImport ( + DbImport(..), + ImportResult(..), + run_dbmigrate ) +import TSN.Parse ( + parse_message, + parse_xmlfid, + parse_xml_time_stamp ) +import TSN.XmlImport ( XmlImport(..) ) +import Xml ( + FromXml(..), + ToDb(..), + unsafe_read_document ) + + -- | The DTDs for everything that we consider "Sport Info." -- -- TODO: This is the list from the old implementation. We need to @@ -17,151 +67,292 @@ where dtds :: [String] dtds = [ "CBASK_3PPctXML.dtd", - "Cbask_All_Tourn_Teams_XML.dtd", - "CBASK_AssistsXML.dtd", - "Cbask_Awards_XML.dtd", - "CBASK_BlocksXML.dtd", - "CBask_BlocksXML.dtd", - "Cbask_Conf_Standings_XML.dtd", - "Cbask_DivII_III_Indv_Stats_XML.dtd", - "Cbask_DivII_Team_Stats_XML.dtd", - "Cbask_DivIII_Team_Stats_XML.dtd", - "CBASK_FGPctXML.dtd", - "CBask_FGPctXML.dtd", - "CBASK_FoulsXML.dtd", - "CBASK_FTPctXML.dtd", - "Cbask_Indv_No_Avg_XML.dtd", - "Cbask_Indv_Scoring_XML.dtd", - "Cbask_Indv_Shooting_XML.dtd", - "CBASK_MinutesXML.dtd", - "Cbask_Polls_XML.dtd", - "CBASK_ReboundsXML.dtd", - "CBASK_ScoringLeadersXML.dtd", - "CBASK_StealsXML.dtd", - "Cbask_Team_Scoring_Rebound_Margin_XML.dtd", - "Cbask_Team_Scoring_XML.dtd", - "Cbask_Team_Shooting_Pct_XML.dtd", - "Cbask_Team_ThreePT_Made_XML.dtd", - "Cbask_Team_ThreePT_PCT_XML.dtd", - "Cbask_Team_Win_Pct_XML.dtd", - "Cbask_Top_Twenty_Five_XML.dtd", - "CBASK_TopTwentyFiveResult_XML.dtd", - "Cbask_Tourn_Awards_XML.dtd", - "Cbask_Tourn_Champs_XML.dtd", - "Cbask_Tourn_Indiv_XML.dtd", - "Cbask_Tourn_Leaders_XML.dtd", - "Cbask_Tourn_MVP_XML.dtd", - "Cbask_Tourn_Records_XML.dtd", - "LeagueScheduleXML.dtd", - "minorscoresxml.dtd", - "Minor_Baseball_League_Leaders_XML.dtd", - "Minor_Baseball_Standings_XML.dtd", - "Minor_Baseball_Transactions_XML.dtd", - "mlbbattingavgxml.dtd", - "mlbdoublesleadersxml.dtd", - "MLBGamesPlayedXML.dtd", - "MLBGIDPXML.dtd", - "MLBHitByPitchXML.dtd", - "mlbhitsleadersxml.dtd", - "mlbhomerunsxml.dtd", - "MLBHRFreqXML.dtd", - "MLBIntWalksXML.dtd", - "MLBKORateXML.dtd", - "mlbonbasepctxml.dtd", - "MLBOPSXML.dtd", - "MLBPlateAppsXML.dtd", - "mlbrbisxml.dtd", - "mlbrunsleadersxml.dtd", - "MLBSacFliesXML.dtd", - "MLBSacrificesXML.dtd", - "MLBSBSuccessXML.dtd", - "mlbsluggingpctxml.dtd", - "mlbstandxml.dtd", - "mlbstandxml_preseason.dtd", - "mlbstolenbasexml.dtd", - "mlbtotalbasesleadersxml.dtd", - "mlbtriplesleadersxml.dtd", - "MLBWalkRateXML.dtd", - "mlbwalksleadersxml.dtd", - "MLBXtraBaseHitsXML.dtd", - "MLB_ERA_Leaders.dtd", - "MLB_Fielding_XML.dtd", - "MLB_Pitching_Appearances_Leaders.dtd", - "MLB_Pitching_Balks_Leaders.dtd", - "MLB_Pitching_CG_Leaders.dtd", - "MLB_Pitching_ER_Allowed_Leaders.dtd", - "MLB_Pitching_Hits_Allowed_Leaders.dtd", - "MLB_Pitching_Hit_Batters_Leaders.dtd", - "MLB_Pitching_HR_Allowed_Leaders.dtd", - "MLB_Pitching_IP_Leaders.dtd", - "MLB_Pitching_Runs_Allowed_Leaders.dtd", - "MLB_Pitching_Saves_Leaders.dtd", - "MLB_Pitching_Shut_Outs_Leaders.dtd", - "MLB_Pitching_Starts_Leaders.dtd", - "MLB_Pitching_Strike_Outs_Leaders.dtd", - "MLB_Pitching_Walks_Leaders.dtd", - "MLB_Pitching_WHIP_Leaders.dtd", - "MLB_Pitching_Wild_Pitches_Leaders.dtd", - "MLB_Pitching_Win_Percentage_Leaders.dtd", - "MLB_Pitching_WL_Leaders.dtd", - "NBA_Team_Stats_XML.dtd", - "NBA3PPctXML.dtd", - "NBAAssistsXML.dtd", - "NBABlocksXML.dtd", - "nbaconfrecxml.dtd", - "nbadaysxml.dtd", - "nbadivisionsxml.dtd", - "NBAFGPctXML.dtd", - "NBAFoulsXML.dtd", - "NBAFTPctXML.dtd", - "NBAMinutesXML.dtd", - "NBAReboundsXML.dtd", - "NBAScorersXML.dtd", - "nbastandxml.dtd", - "NBAStealsXML.dtd", - "nbateamleadersxml.dtd", - "nbatripledoublexml.dtd", - "NBATurnoversXML.dtd", - "NCAA_Conference_Schedule_XML.dtd", - "nflfirstdownxml.dtd", - "NFLFumbleLeaderXML.dtd", - "NFLGiveTakeXML.dtd", - "NFLGrassTurfDomeOutsideXML.dtd", - "NFLInside20XML.dtd", - "NFLInterceptionLeadersXML.dtd", - "NFLKickoffsXML.dtd", - "NFLMondayNightXML.dtd", - "NFLPassingLeadersXML.dtd", - "NFLPassLeadXML.dtd", - "NFLQBStartsXML.dtd", - "NFLReceivingLeadersXML.dtd", - "NFLRushingLeadersXML.dtd", - "NFLSackLeadersXML.dtd", - "nflstandxml.dtd", - "NFLTackleFFLeadersXML.dtd", - "NFLTeamRankingsXML.dtd", - "NFLTopKickoffReturnXML.dtd", - "NFLTopPerformanceXML.dtd", - "NFLTopPuntReturnXML.dtd", - "NFLTotalYardageXML.dtd", - "NFLYardsXML.dtd", - "NFL_KickingLeaders_XML.dtd", - "NFL_NBA_Draft_XML.dtd", - "NFL_PuntingLeaders_XML.dtd", - "NFL_Roster_XML.dtd", - "NFL_Team_Stats_XML.dtd", - "Transactions_XML.dtd", - "Weekly_Sched_XML.dtd", - "WNBA_Team_Leaders_XML.dtd", - "WNBA3PPctXML.dtd", - "WNBAAssistsXML.dtd", - "WNBABlocksXML.dtd", - "WNBAFGPctXML.dtd", - "WNBAFoulsXML.dtd", - "WNBAFTPctXML.dtd", - "WNBAMinutesXML.dtd", - "WNBAReboundsXML.dtd", - "WNBAScorersXML.dtd", - "wnbastandxml.dtd", - "WNBAStealsXML.dtd", - "WNBATurnoversXML.dtd" ] + "Cbask_All_Tourn_Teams_XML.dtd", -- no dtd + "CBASK_AssistsXML.dtd", -- no dtd + "Cbask_Awards_XML.dtd", -- no dtd + "CBASK_BlocksXML.dtd", -- no dtd + "CBask_BlocksXML.dtd", -- no dtd + "Cbask_Conf_Standings_XML.dtd", -- no dtd + "Cbask_DivII_III_Indv_Stats_XML.dtd", -- no dtd + "Cbask_DivII_Team_Stats_XML.dtd", -- no dtd + "Cbask_DivIII_Team_Stats_XML.dtd", -- no dtd + "CBASK_FGPctXML.dtd", -- no dtd + "CBask_FGPctXML.dtd", -- no dtd + "CBASK_FoulsXML.dtd", -- no dtd + "CBASK_FTPctXML.dtd", -- no dtd + "Cbask_Indv_No_Avg_XML.dtd", -- no dtd + "Cbask_Indv_Scoring_XML.dtd", -- no dtd + "Cbask_Indv_Shooting_XML.dtd", -- no dtd + "CBASK_MinutesXML.dtd", -- no dtd + "Cbask_Polls_XML.dtd", -- no dtd + "CBASK_ReboundsXML.dtd", -- no dtd + "CBASK_ScoringLeadersXML.dtd", -- no dtd + "CBASK_StealsXML.dtd", -- no dtd + "Cbask_Team_Scoring_Rebound_Margin_XML.dtd", -- no dtd + "Cbask_Team_Scoring_XML.dtd", -- no dtd + "Cbask_Team_Shooting_Pct_XML.dtd", -- no dtd + "Cbask_Team_ThreePT_Made_XML.dtd", -- no dtd + "Cbask_Team_ThreePT_PCT_XML.dtd", -- no dtd + "Cbask_Team_Win_Pct_XML.dtd", -- no dtd + "Cbask_Top_Twenty_Five_XML.dtd", -- no dtd + "CBASK_TopTwentyFiveResult_XML.dtd", -- no dtd + "Cbask_Tourn_Awards_XML.dtd", -- no dtd + "Cbask_Tourn_Champs_XML.dtd", -- no dtd + "Cbask_Tourn_Indiv_XML.dtd", -- no dtd + "Cbask_Tourn_Leaders_XML.dtd", -- no dtd + "Cbask_Tourn_MVP_XML.dtd", -- no dtd + "Cbask_Tourn_Records_XML.dtd", -- no dtd + "LeagueScheduleXML.dtd", -- no dtd + "minorscoresxml.dtd", -- no dtd + "Minor_Baseball_League_Leaders_XML.dtd", -- no dtd + "Minor_Baseball_Standings_XML.dtd", -- no dtd + "Minor_Baseball_Transactions_XML.dtd", -- no dtd + "mlbbattingavgxml.dtd", -- no dtd + "mlbdoublesleadersxml.dtd", -- no dtd + "MLBGamesPlayedXML.dtd", -- no dtd + "MLBGIDPXML.dtd", -- no dtd + "MLBHitByPitchXML.dtd", -- no dtd + "mlbhitsleadersxml.dtd", -- no dtd + "mlbhomerunsxml.dtd", -- no dtd + "MLBHRFreqXML.dtd", -- no dtd + "MLBIntWalksXML.dtd", -- no dtd + "MLBKORateXML.dtd", -- no dtd + "mlbonbasepctxml.dtd", -- no dtd + "MLBOPSXML.dtd", -- no dtd + "MLBPlateAppsXML.dtd", -- no dtd + "mlbrbisxml.dtd", -- no dtd + "mlbrunsleadersxml.dtd", -- no dtd + "MLBSacFliesXML.dtd", -- no dtd + "MLBSacrificesXML.dtd", -- no dtd + "MLBSBSuccessXML.dtd", -- no dtd + "mlbsluggingpctxml.dtd", -- no dtd + "mlbstandxml.dtd", -- no dtd + "mlbstandxml_preseason.dtd", -- no dtd + "mlbstolenbasexml.dtd", -- no dtd + "mlbtotalbasesleadersxml.dtd", -- no dtd + "mlbtriplesleadersxml.dtd", -- no dtd + "MLBWalkRateXML.dtd", -- no dtd + "mlbwalksleadersxml.dtd", -- no dtd + "MLBXtraBaseHitsXML.dtd", -- no dtd + "MLB_ERA_Leaders.dtd", -- no dtd + "MLB_Fielding_XML.dtd", -- no dtd + "MLB_Pitching_Appearances_Leaders.dtd", -- no dtd + "MLB_Pitching_Balks_Leaders.dtd", -- no dtd + "MLB_Pitching_CG_Leaders.dtd", -- no dtd + "MLB_Pitching_ER_Allowed_Leaders.dtd", -- no dtd + "MLB_Pitching_Hits_Allowed_Leaders.dtd", -- no dtd + "MLB_Pitching_Hit_Batters_Leaders.dtd", -- no dtd + "MLB_Pitching_HR_Allowed_Leaders.dtd", -- no dtd + "MLB_Pitching_IP_Leaders.dtd", -- no dtd + "MLB_Pitching_Runs_Allowed_Leaders.dtd", -- no dtd + "MLB_Pitching_Saves_Leaders.dtd", -- no dtd + "MLB_Pitching_Shut_Outs_Leaders.dtd", -- no dtd + "MLB_Pitching_Starts_Leaders.dtd", -- no dtd + "MLB_Pitching_Strike_Outs_Leaders.dtd", -- no dtd + "MLB_Pitching_Walks_Leaders.dtd", -- no dtd + "MLB_Pitching_WHIP_Leaders.dtd", -- no dtd + "MLB_Pitching_Wild_Pitches_Leaders.dtd", -- no dtd + "MLB_Pitching_Win_Percentage_Leaders.dtd", -- no dtd + "MLB_Pitching_WL_Leaders.dtd", -- no dtd + "NBA_Team_Stats_XML.dtd", -- no dtd + "NBA3PPctXML.dtd", -- no dtd + "NBAAssistsXML.dtd", -- no dtd + "NBABlocksXML.dtd", -- no dtd + "nbaconfrecxml.dtd", -- no dtd + "nbadaysxml.dtd", -- no dtd + "nbadivisionsxml.dtd", -- no dtd + "NBAFGPctXML.dtd", -- no dtd + "NBAFoulsXML.dtd", -- no dtd + "NBAFTPctXML.dtd", -- no dtd + "NBAMinutesXML.dtd", -- no dtd + "NBAReboundsXML.dtd", -- no dtd + "NBAScorersXML.dtd", -- no dtd + "nbastandxml.dtd", -- no dtd + "NBAStealsXML.dtd", -- no dtd + "nbateamleadersxml.dtd", -- no dtd + "nbatripledoublexml.dtd", -- no dtd + "NBATurnoversXML.dtd", -- no dtd + "NCAA_Conference_Schedule_XML.dtd", -- no dtd + "nflfirstdownxml.dtd", -- no dtd + "NFLFumbleLeaderXML.dtd", -- no dtd + "NFLGiveTakeXML.dtd", -- no dtd + "NFLGrassTurfDomeOutsideXML.dtd", -- no dtd + "NFLInside20XML.dtd", -- no dtd + "NFLInterceptionLeadersXML.dtd", -- no dtd + "NFLKickoffsXML.dtd", -- no dtd + "NFLMondayNightXML.dtd", -- no dtd + "NFLPassingLeadersXML.dtd", -- no dtd + "NFLPassLeadXML.dtd", -- no dtd + "NFLQBStartsXML.dtd", -- no dtd + "NFLReceivingLeadersXML.dtd", -- no dtd + "NFLRushingLeadersXML.dtd", -- no dtd + "NFLSackLeadersXML.dtd", -- no dtd + "nflstandxml.dtd", -- no dtd + "NFLTackleFFLeadersXML.dtd", -- no dtd + "NFLTeamRankingsXML.dtd", -- no dtd + "NFLTopKickoffReturnXML.dtd", -- no dtd + "NFLTopPerformanceXML.dtd", -- no dtd + "NFLTopPuntReturnXML.dtd", -- no dtd + "NFLTotalYardageXML.dtd", -- no dtd + "NFLYardsXML.dtd", -- no dtd + "NFL_KickingLeaders_XML.dtd", -- no dtd + "NFL_NBA_Draft_XML.dtd", -- no dtd + "NFL_PuntingLeaders_XML.dtd", -- no dtd + "NFL_Roster_XML.dtd", -- no dtd + "NFL_Team_Stats_XML.dtd", -- no dtd + "Transactions_XML.dtd", -- no dtd + "Weekly_Sched_XML.dtd", -- no dtd + "WNBA_Team_Leaders_XML.dtd", -- no dtd + "WNBA3PPctXML.dtd", -- no dtd + "WNBAAssistsXML.dtd", -- no dtd + "WNBABlocksXML.dtd", -- no dtd + "WNBAFGPctXML.dtd", -- no dtd + "WNBAFoulsXML.dtd", -- no dtd + "WNBAFTPctXML.dtd", -- no dtd + "WNBAMinutesXML.dtd", -- no dtd + "WNBAReboundsXML.dtd", -- no dtd + "WNBAScorersXML.dtd", -- no dtd + "wnbastandxml.dtd", -- no dtd + "WNBAStealsXML.dtd", -- no dtd + "WNBATurnoversXML.dtd" -- no dtd + ] + + +-- | XML representation of a SportInfo \. +-- +data Message = + Message { + xml_dtd :: String, + xml_xml_file_id :: Int, + xml_time_stamp :: UTCTime, + xml_xml :: String } + deriving (Eq, Show) + + +-- | Attempt to parse a 'Message' from an 'XmlTree'. If we cannot, +-- we fail with an error message. +-- +parse_xml :: String -> XmlTree -> Either String Message +parse_xml dtdname xmltree = do + xmlfid <- parse_xmlfid xmltree + timestamp <- parse_xml_time_stamp xmltree + message <- parse_message xmltree + return $ Message dtdname (fromInteger xmlfid) timestamp (xshow [message]) + + +-- | Database representation of a 'Message'. +-- +data SportInfo = + SportInfo { + db_dtd :: String, + db_xml_file_id :: Int, + db_time_stamp :: UTCTime, + db_xml :: String } + + +instance ToDb Message where + -- | The database analogue of a 'Message' is an 'SportInfo'. + type Db Message = SportInfo + +instance FromXml Message where + -- | The XML to DB conversion is trivial here. + -- + from_xml Message{..} = SportInfo { + db_dtd = xml_dtd, + db_xml_file_id = xml_xml_file_id, + db_time_stamp = xml_time_stamp, + db_xml = xml_xml } + + +-- | This allows us to insert the XML representation 'Message' +-- directly. +-- +instance XmlImport Message + + +-- +-- Database code +-- + +instance DbImport Message where + dbmigrate _ = + run_dbmigrate $ migrate (undefined :: SportInfo) + + -- | We import a 'Message' by inserting the whole thing at + -- once. Nothing fancy going on here. + dbimport msg = do + insert_xml_ msg + return ImportSucceeded + + +-- | The database schema for SportInfo is trivial; all we need is for +-- the XML_File_ID to be unique. +-- +mkPersist tsn_codegen_config [groundhog| +- entity: SportInfo + constructors: + - name: SportInfo + uniques: + - name: unique_sport_info + type: constraint + # Prevent multiple imports of the same message. + fields: [db_xml_file_id] +|] + + +-- +-- Tasty Tests +-- + +-- | A list of all tests for this module. +-- +sportinfo_tests :: TestTree +sportinfo_tests = + testGroup + "SportInfo tests" + [ test_parse_xml_succeeds, + test_dbimport_succeeds ] + + +-- | Sample XML documents for SportInfo types. +-- +sportinfo_test_files :: [FilePath] +sportinfo_test_files = + [ "test/xml/sportinfo/CBASK_3PPctXML.xml" ] + + + +-- | Make sure we can parse every element of 'sportinfo_test_files'. +-- +test_parse_xml_succeeds :: TestTree +test_parse_xml_succeeds = + testGroup "parse_xml" $ map check sportinfo_test_files + where + check t = testCase t $ do + x <- unsafe_read_document t + let result = parse_xml "dummy" x + let actual = case result of -- isRight appears in base-4.7 + Left _ -> False + Right _ -> True + let expected = True + actual @?= expected + + +-- | Ensure that each element of 'sportinfo_test_files' can be imported +-- by counting the total number of database records (after +-- importing) and comparing it against the length of +-- 'sportinfo_test_files'. +-- +test_dbimport_succeeds :: TestTree +test_dbimport_succeeds = testCase "dbimport succeeds" $ do + xmltrees <- mapM unsafe_read_document sportinfo_test_files + let msgs = rights $ map (parse_xml "dummy") xmltrees + actual <- withSqliteConn ":memory:" $ runDbConn $ do + runMigration silentMigrationLogger $ do + migrate (undefined :: SportInfo) + mapM_ dbimport msgs + countAll (undefined :: SportInfo) + + actual @?= expected + where + expected = length sportinfo_test_files