X-Git-Url: http://gitweb.michael.orlitzky.com/?p=dead%2Fhtsn-import.git;a=blobdiff_plain;f=src%2FTSN%2FXML%2FSportInfo.hs;h=76a7c94a4910b261e740eb1135b16fb839fb5af0;hp=ef19804bf16618535a79ddac95dc0921af7a03a6;hb=a13dffb2bed8ca56164430e1b11731f4ab1e7d5b;hpb=93187157206341167da29536186f28653091dd55 diff --git a/src/TSN/XML/SportInfo.hs b/src/TSN/XML/SportInfo.hs index ef19804..76a7c94 100644 --- a/src/TSN/XML/SportInfo.hs +++ b/src/TSN/XML/SportInfo.hs @@ -10,7 +10,7 @@ -- database along with the XML_File_ID, but we don't parse any of it. -- -- This is almost completely redundant with "TSN.XML.GameInfo", but --- the redundancy is necessary: we need separate 'Message' types so +-- the redundancy is necessary: we need separate message types so -- that we can have separate 'DbImport' instances. It would take -- more code/work to abstract (if it's even possible) than to -- duplicate. @@ -25,15 +25,18 @@ where -- System imports. import Data.Either ( rights ) +import Data.String.Utils ( replace ) import Data.Time.Clock ( UTCTime ) import Database.Groundhog ( countAll, + insert_, migrate, runMigration, silentMigrationLogger ) import Database.Groundhog.Generic ( runDbConn ) import Database.Groundhog.Sqlite ( withSqliteConn ) import Database.Groundhog.TH ( + defaultCodegenConfig, groundhog, mkPersist ) import Test.Tasty ( TestTree, testGroup ) @@ -42,28 +45,20 @@ import Text.XML.HXT.Core ( XmlTree ) import Text.XML.HXT.DOM.ShowXml ( xshow ) -- Local imports. -import TSN.Codegen ( tsn_codegen_config ) import TSN.DbImport ( DbImport(..), ImportResult(..), run_dbmigrate ) import TSN.Parse ( + ParseError, parse_message, parse_xmlfid, parse_xml_time_stamp ) -import TSN.XmlImport ( XmlImport(..) ) -import Xml ( - FromXml(..), - ToDb(..), - unsafe_read_document ) +import Xml ( unsafe_read_document ) -- | The DTDs for everything that we consider \"Sport Info.\" -- --- TODO: This is the list from the old implementation. We need to --- make sure that we are really receiving XML for these DTDs --- (i.e. the names are correct). --- dtds :: [String] dtds = [ "CBASK_3PPctXML.dtd", @@ -78,17 +73,11 @@ dtds = "CBASK_FGPctXML.dtd", "CBASK_FoulsXML.dtd", "CBASK_FTPctXML.dtd", - "Cbask_Indv_No_Avg_XML.dtd", -- no dtd "Cbask_Indv_Scoring_XML.dtd", - "Cbask_Indv_Shooting_XML.dtd", -- no dtd "CBASK_MinutesXML.dtd", "Cbask_Polls_XML.dtd", "CBASK_ReboundsXML.dtd", "CBASK_ScoringLeadersXML.dtd", - "CBASK_StealsXML.dtd", -- no dtd - "Cbask_Team_Scoring_Rebound_Margin_XML.dtd", -- no dtd - "Cbask_Team_Scoring_XML.dtd", -- no dtd - "Cbask_Team_Shooting_Pct_XML.dtd", -- no dtd "Cbask_Team_ThreePT_Made_XML.dtd", "Cbask_Team_ThreePT_PCT_XML.dtd", "Cbask_Team_Win_Pct_XML.dtd", @@ -163,138 +152,107 @@ dtds = "NBAFoulsXML.dtd", "NBAFTPctXML.dtd", "NBAMinutesXML.dtd", - "NBAReboundsXML.dtd", -- no dtd - "NBAScorersXML.dtd", -- no dtd - "nbastandxml.dtd", -- no dtd - "NBAStealsXML.dtd", -- no dtd - "nbateamleadersxml.dtd", -- no dtd - "nbatripledoublexml.dtd", -- no dtd - "NBATurnoversXML.dtd", -- no dtd - "NCAA_Conference_Schedule_XML.dtd", -- no dtd - "nflfirstdownxml.dtd", -- no dtd - "NFLFumbleLeaderXML.dtd", -- no dtd - "NFLGiveTakeXML.dtd", -- no dtd - "NFLGrassTurfDomeOutsideXML.dtd", -- no dtd - "NFLInside20XML.dtd", -- no dtd - "NFLInterceptionLeadersXML.dtd", -- no dtd - "NFLKickoffsXML.dtd", -- no dtd - "NFLMondayNightXML.dtd", -- no dtd - "NFLPassingLeadersXML.dtd", -- no dtd - "NFLPassLeadXML.dtd", -- no dtd - "NFLQBStartsXML.dtd", -- no dtd - "NFLReceivingLeadersXML.dtd", -- no dtd - "NFLRushingLeadersXML.dtd", -- no dtd - "NFLSackLeadersXML.dtd", -- no dtd - "nflstandxml.dtd", -- no dtd - "NFLTackleFFLeadersXML.dtd", -- no dtd - "NFLTeamRankingsXML.dtd", -- no dtd - "NFLTopKickoffReturnXML.dtd", -- no dtd - "NFLTopPerformanceXML.dtd", -- no dtd - "NFLTopPuntReturnXML.dtd", -- no dtd - "NFLTotalYardageXML.dtd", -- no dtd - "NFLYardsXML.dtd", -- no dtd - "NFL_KickingLeaders_XML.dtd", -- no dtd - "NFL_NBA_Draft_XML.dtd", -- no dtd - "NFL_PuntingLeaders_XML.dtd", -- no dtd - "NFL_Roster_XML.dtd", -- no dtd - "NFL_Team_Stats_XML.dtd", -- no dtd - "Transactions_XML.dtd", -- no dtd - "Weekly_Sched_XML.dtd", -- no dtd - "WNBA_Team_Leaders_XML.dtd", -- no dtd - "WNBA3PPctXML.dtd", -- no dtd - "WNBAAssistsXML.dtd", -- no dtd - "WNBABlocksXML.dtd", -- no dtd - "WNBAFGPctXML.dtd", -- no dtd - "WNBAFoulsXML.dtd", -- no dtd - "WNBAFTPctXML.dtd", -- no dtd - "WNBAMinutesXML.dtd", -- no dtd - "WNBAReboundsXML.dtd", -- no dtd - "WNBAScorersXML.dtd", -- no dtd - "wnbastandxml.dtd", -- no dtd - "WNBAStealsXML.dtd", -- no dtd - "WNBATurnoversXML.dtd" -- no dtd - ] - - --- | XML representation of a SportInfo \. + "NBAReboundsXML.dtd", + "NBAScorersXML.dtd", + "nbastandxml.dtd", + "NBAStealsXML.dtd", + "nbateamleadersxml.dtd", + "nbatripledoublexml.dtd", + "NBATurnoversXML.dtd", + "NCAA_Conference_Schedule_XML.dtd", + "nflfirstdownxml.dtd", + "NFLFumbleLeaderXML.dtd", + "NFLGrassTurfDomeOutsideXML.dtd", + "NFLGiveTakeXML.dtd", + "NFLInside20XML.dtd", + "NFLInterceptionLeadersXML.dtd", + "NFLKickoffsXML.dtd", + "NFLMondayNightXML.dtd", + "NFLPassingLeadersXML.dtd", + "NFLPassLeadXML.dtd", + "NFLQBStartsXML.dtd", + "NFLReceivingLeadersXML.dtd", + "NFLRushingLeadersXML.dtd", + "NFLSackLeadersXML.dtd", + "nflstandxml.dtd", + "NFLTeamRankingsXML.dtd", + "NFLTopKickoffReturnXML.dtd", + "NFLTopPerformanceXML.dtd", + "NFLTopPuntReturnXML.dtd", + "NFLTotalYardageXML.dtd", + "NFL_KickingLeaders_XML.dtd", + "NFL_NBA_Draft_XML.dtd", + "NFL_Roster_XML.dtd", + "NFLTackleFFLeadersXML.dtd", + "NFL_Team_Stats_XML.dtd", + "Transactions_XML.dtd", + "Weekly_Sched_XML.dtd", + "WNBA_Team_Leaders_XML.dtd", + "WNBA3PPctXML.dtd", + "WNBAAssistsXML.dtd", + "WNBABlocksXML.dtd", + "WNBAFGPctXML.dtd", + "WNBAFoulsXML.dtd", + "WNBAFTPctXML.dtd", + "WNBAMinutesXML.dtd", + "WNBAReboundsXML.dtd", + "WNBAScorersXML.dtd", + "wnbastandxml.dtd", + "WNBAStealsXML.dtd", + "WNBATurnoversXML.dtd" ] + + +-- | This serves as both the database and XML representation of a +-- SportInfo \. -- -data Message = - Message { - xml_dtd :: String, - xml_xml_file_id :: Int, - xml_time_stamp :: UTCTime, - xml_xml :: String } +data SportInfo = + SportInfo { + dtd :: String, + xml_file_id :: Int, + time_stamp :: UTCTime, + xml :: String } deriving (Eq, Show) --- | Attempt to parse a 'Message' from an 'XmlTree'. If we cannot, +-- | Attempt to parse a 'SportInfo' from an 'XmlTree'. If we cannot, -- we fail with an error message. -- -parse_xml :: String -> XmlTree -> Either String Message +parse_xml :: String -> XmlTree -> Either ParseError SportInfo parse_xml dtdname xmltree = do xmlfid <- parse_xmlfid xmltree timestamp <- parse_xml_time_stamp xmltree message <- parse_message xmltree - return $ Message dtdname (fromInteger xmlfid) timestamp (xshow [message]) - - --- | Database representation of a 'Message'. --- -data SportInfo = - SportInfo { - db_dtd :: String, - db_xml_file_id :: Int, - db_time_stamp :: UTCTime, - db_xml :: String } - - -instance ToDb Message where - -- | The database analogue of a 'Message' is an 'SportInfo'. - type Db Message = SportInfo - -instance FromXml Message where - -- | The XML to DB conversion is trivial here. - -- - from_xml Message{..} = SportInfo { - db_dtd = xml_dtd, - db_xml_file_id = xml_xml_file_id, - db_time_stamp = xml_time_stamp, - db_xml = xml_xml } - - --- | This allows us to insert the XML representation 'Message' --- directly. --- -instance XmlImport Message + return $ SportInfo dtdname xmlfid timestamp (xshow [message]) -- -- Database code -- -instance DbImport Message where +instance DbImport SportInfo where dbmigrate _ = run_dbmigrate $ migrate (undefined :: SportInfo) - -- | We import a 'Message' by inserting the whole thing at + -- | We import a 'SportInfo' by inserting the whole thing at -- once. Nothing fancy going on here. dbimport msg = do - insert_xml_ msg + insert_ msg return ImportSucceeded -- | The database schema for SportInfo is trivial; all we need is for -- the XML_File_ID to be unique. -- -mkPersist tsn_codegen_config [groundhog| +mkPersist defaultCodegenConfig [groundhog| - entity: SportInfo + dbName: sport_info constructors: - name: SportInfo uniques: - name: unique_sport_info type: constraint # Prevent multiple imports of the same message. - fields: [db_xml_file_id] + fields: [xml_file_id] |] @@ -308,107 +266,40 @@ sport_info_tests :: TestTree sport_info_tests = testGroup "SportInfo tests" - [ test_parse_xml_succeeds, + [ test_accessors, + test_parse_xml_succeeds, test_dbimport_succeeds ] +-- | Make sure the accessors work and that we can parse one file. Ok, +-- so the real point of this is to make the unused fields (dtd, xml, +-- ...) warning go away without having to mangle the groundhog code. +-- +test_accessors :: TestTree +test_accessors = testCase "we can access a parsed sport_info" $ do + xmltree <- unsafe_read_document "test/xml/sportinfo/wnbastandxml.xml" + let Right t = parse_xml "wnbastandxml.dtd" xmltree + let a1 = dtd t + let ex1 = "wnbastandxml.dtd" + let a2 = xml_file_id t + let ex2 = 2011 + let a3 = show $ time_stamp t + let ex3 = "2009-09-27 19:50:00 UTC" + let a4 = take 9 (xml t) + let ex4 = "" + let actual = (a1,a2,a3,a4) + let expected = (ex1,ex2,ex3,ex4) + actual @?= expected + + -- | Sample XML documents for SportInfo types. -- sport_info_test_files :: [FilePath] sport_info_test_files = - map ("test/xml/sportinfo/" ++) [ - "CBASK_3PPctXML.xml", - "Cbask_All_Tourn_Teams_XML.xml", - "CBASK_AssistsXML.xml", - "Cbask_Awards_XML.xml", - "CBASK_BlocksXML.xml", - "Cbask_Conf_Standings_XML.xml", - "Cbask_DivII_III_Indv_Stats_XML.xml", - "Cbask_DivII_Team_Stats_XML.xml", - "Cbask_DivIII_Team_Stats_XML.xml", - "CBASK_FGPctXML.xml", - "CBASK_FoulsXML.xml", - "CBASK_FTPctXML.xml", - "Cbask_Indv_Scoring_XML.xml", - "CBASK_MinutesXML.xml", - "Cbask_Polls_XML.xml", - "CBASK_ReboundsXML.xml", - "CBASK_ScoringLeadersXML.xml", - "Cbask_Team_ThreePT_Made_XML.xml", - "Cbask_Team_ThreePT_PCT_XML.xml", - "Cbask_Team_Win_Pct_XML.xml", - "Cbask_Top_Twenty_Five_XML.xml", - "CBASK_TopTwentyFiveResult_XML.xml", - "Cbask_Tourn_Awards_XML.xml", - "Cbask_Tourn_Champs_XML.xml", - "Cbask_Tourn_Indiv_XML.xml", - "Cbask_Tourn_Leaders_XML.xml", - "Cbask_Tourn_MVP_XML.xml", - "Cbask_Tourn_Records_XML.xml", - "LeagueScheduleXML.xml", - "minorscoresxml.xml", - "Minor_Baseball_League_Leaders_XML.xml", - "Minor_Baseball_Standings_XML.xml", - "Minor_Baseball_Transactions_XML.xml", - "mlbbattingavgxml.xml", - "mlbdoublesleadersxml.xml", - "MLBGamesPlayedXML.xml", - "MLBGIDPXML.xml", - "MLBHitByPitchXML.xml", - "mlbhitsleadersxml.xml", - "mlbhomerunsxml.xml", - "MLBHRFreqXML.xml", - "MLBIntWalksXML.xml", - "MLBKORateXML.xml", - "mlbonbasepctxml.xml", - "MLBOPSXML.xml", - "MLBPlateAppsXML.xml", - "mlbrbisxml.xml", - "mlbrunsleadersxml.xml", - "MLBSacFliesXML.xml", - "MLBSacrificesXML.xml", - "MLBSBSuccessXML.xml", - "mlbsluggingpctxml.xml", - "mlbstandxml.xml", - "mlbstandxml_preseason.xml", - "mlbstolenbasexml.xml", - "mlbtotalbasesleadersxml.xml", - "mlbtriplesleadersxml.xml", - "MLBWalkRateXML.xml", - "mlbwalksleadersxml.xml", - "MLBXtraBaseHitsXML.xml", - "MLB_ERA_Leaders.xml", - "MLB_Pitching_Appearances_Leaders.xml", - "MLB_Pitching_Balks_Leaders.xml", - "MLB_Pitching_CG_Leaders.xml", - "MLB_Pitching_ER_Allowed_Leaders.xml", - "MLB_Pitching_Hits_Allowed_Leaders.xml", - "MLB_Pitching_Hit_Batters_Leaders.xml", - "MLB_Pitching_HR_Allowed_Leaders.xml", - "MLB_Pitching_IP_Leaders.xml", - "MLB_Pitching_Runs_Allowed_Leaders.xml", - "MLB_Pitching_Saves_Leaders.xml", - "MLB_Pitching_Shut_Outs_Leaders.xml", - "MLB_Pitching_Starts_Leaders.xml", - "MLB_Pitching_Strike_Outs_Leaders.xml", - "MLB_Pitching_Walks_Leaders.xml", - "MLB_Pitching_WHIP_Leaders.xml", - "MLB_Pitching_Wild_Pitches_Leaders.xml", - "MLB_Pitching_Win_Percentage_Leaders.xml", - "MLB_Pitching_WL_Leaders.xml", - "NBA_Team_Stats_XML.xml", - "NBA3PPctXML.xml", - "NBAAssistsXML.xml", - "NBABlocksXML.xml", - "nbaconfrecxml.xml", - "nbadaysxml.xml", - "nbadivisionsxml.xml", - "NBAFGPctXML.xml", - "NBAFoulsXML.xml", - "NBAFTPctXML.xml", - "NBAMinutesXML.xml" - ] - + map (change_suffix . add_path) dtds + where + add_path = ("test/xml/sportinfo/" ++ ) + change_suffix = replace ".dtd" ".xml" -- | Make sure we can parse every element of 'sport_info_test_files'. @@ -437,7 +328,7 @@ test_dbimport_succeeds = testCase "dbimport succeeds" $ do xmltrees <- mapM unsafe_read_document sport_info_test_files let msgs = rights $ map (parse_xml "dummy") xmltrees actual <- withSqliteConn ":memory:" $ runDbConn $ do - runMigration silentMigrationLogger $ do + runMigration silentMigrationLogger $ migrate (undefined :: SportInfo) mapM_ dbimport msgs countAll (undefined :: SportInfo)