]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blobdiff - src/TSN/XML/SportInfo.hs
Add SportInfo support for MLB_ERA_Leaders.dtd.
[dead/htsn-import.git] / src / TSN / XML / SportInfo.hs
index b7e269ee792f91df09925dde854fcb3590d40016..115fe105c58a5941b5479568d8cfaf166d2cb868 100644 (file)
@@ -1,14 +1,64 @@
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE QuasiQuotes #-}
+{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE TemplateHaskell #-}
+{-# LANGUAGE TypeFamilies #-}
+
 -- | SportInfo represents a collection of DTDs that we don't really
 --   handle but want to make available. The raw XML gets stored in the
 --   database along with the XML_File_ID, but we don't parse any of it.
 --
---   See also: TSN.XML.GameInfo
+--   This is almost completely redundant with "TSN.XML.GameInfo", but
+--   the redundancy is necessary: we need separate 'Message' types so
+--   that we can have separate 'DbImport' instances. It would take
+--   more code/work to abstract (if it's even possible) than to
+--   duplicate.
 --
 module TSN.XML.SportInfo (
-  dtds )
+  dtds,
+  parse_xml,
+  sport_info_tests,
+  -- * WARNING: these are private but exported to silence warnings
+  SportInfoConstructor(..) )
 where
 
--- | The DTDs for everything that we consider "Sport Info."
+-- System imports.
+import Data.Either ( rights )
+import Data.Time.Clock ( UTCTime )
+import Database.Groundhog (
+  countAll,
+  migrate,
+  runMigration,
+  silentMigrationLogger )
+import Database.Groundhog.Generic ( runDbConn )
+import Database.Groundhog.Sqlite ( withSqliteConn )
+import Database.Groundhog.TH (
+  groundhog,
+  mkPersist )
+import Test.Tasty ( TestTree, testGroup )
+import Test.Tasty.HUnit ( (@?=), testCase )
+import Text.XML.HXT.Core ( XmlTree )
+import Text.XML.HXT.DOM.ShowXml ( xshow )
+
+-- Local imports.
+import TSN.Codegen ( tsn_codegen_config )
+import TSN.DbImport (
+  DbImport(..),
+  ImportResult(..),
+  run_dbmigrate )
+import TSN.Parse (
+  parse_message,
+  parse_xmlfid,
+  parse_xml_time_stamp )
+import TSN.XmlImport ( XmlImport(..) )
+import Xml (
+  FromXml(..),
+  ToDb(..),
+  unsafe_read_document )
+
+
+-- | The DTDs for everything that we consider \"Sport Info.\"
 --
 --   TODO: This is the list from the old implementation. We need to
 --   make sure that we are really receiving XML for these DTDs
@@ -21,26 +71,24 @@ dtds =
     "CBASK_AssistsXML.dtd",
     "Cbask_Awards_XML.dtd",
     "CBASK_BlocksXML.dtd",
-    "CBask_BlocksXML.dtd",
     "Cbask_Conf_Standings_XML.dtd",
     "Cbask_DivII_III_Indv_Stats_XML.dtd",
     "Cbask_DivII_Team_Stats_XML.dtd",
     "Cbask_DivIII_Team_Stats_XML.dtd",
     "CBASK_FGPctXML.dtd",
-    "CBask_FGPctXML.dtd",
     "CBASK_FoulsXML.dtd",
     "CBASK_FTPctXML.dtd",
-    "Cbask_Indv_No_Avg_XML.dtd",
+    "Cbask_Indv_No_Avg_XML.dtd", -- no dtd
     "Cbask_Indv_Scoring_XML.dtd",
-    "Cbask_Indv_Shooting_XML.dtd",
+    "Cbask_Indv_Shooting_XML.dtd", -- no dtd
     "CBASK_MinutesXML.dtd",
     "Cbask_Polls_XML.dtd",
     "CBASK_ReboundsXML.dtd",
     "CBASK_ScoringLeadersXML.dtd",
-    "CBASK_StealsXML.dtd",
-    "Cbask_Team_Scoring_Rebound_Margin_XML.dtd",
-    "Cbask_Team_Scoring_XML.dtd",
-    "Cbask_Team_Shooting_Pct_XML.dtd",
+    "CBASK_StealsXML.dtd", -- no dtd
+    "Cbask_Team_Scoring_Rebound_Margin_XML.dtd", -- no dtd
+    "Cbask_Team_Scoring_XML.dtd", -- no dtd
+    "Cbask_Team_Shooting_Pct_XML.dtd", -- no dtd
     "Cbask_Team_ThreePT_Made_XML.dtd",
     "Cbask_Team_ThreePT_PCT_XML.dtd",
     "Cbask_Team_Win_Pct_XML.dtd",
@@ -85,83 +133,287 @@ dtds =
     "mlbwalksleadersxml.dtd",
     "MLBXtraBaseHitsXML.dtd",
     "MLB_ERA_Leaders.dtd",
-    "MLB_Fielding_XML.dtd",
+    "MLB_Fielding_XML.dtd", -- no dtd
     "MLB_Pitching_Appearances_Leaders.dtd",
-    "MLB_Pitching_Balks_Leaders.dtd",
-    "MLB_Pitching_CG_Leaders.dtd",
-    "MLB_Pitching_ER_Allowed_Leaders.dtd",
-    "MLB_Pitching_Hits_Allowed_Leaders.dtd",
-    "MLB_Pitching_Hit_Batters_Leaders.dtd",
-    "MLB_Pitching_HR_Allowed_Leaders.dtd",
-    "MLB_Pitching_IP_Leaders.dtd",
-    "MLB_Pitching_Runs_Allowed_Leaders.dtd",
-    "MLB_Pitching_Saves_Leaders.dtd",
-    "MLB_Pitching_Shut_Outs_Leaders.dtd",
-    "MLB_Pitching_Starts_Leaders.dtd",
-    "MLB_Pitching_Strike_Outs_Leaders.dtd",
-    "MLB_Pitching_Walks_Leaders.dtd",
-    "MLB_Pitching_WHIP_Leaders.dtd",
-    "MLB_Pitching_Wild_Pitches_Leaders.dtd",
-    "MLB_Pitching_Win_Percentage_Leaders.dtd",
-    "MLB_Pitching_WL_Leaders.dtd",
-    "NBA_Team_Stats_XML.dtd",
-    "NBA3PPctXML.dtd",
-    "NBAAssistsXML.dtd",
-    "NBABlocksXML.dtd",
-    "nbaconfrecxml.dtd",
-    "nbadaysxml.dtd",
-    "nbadivisionsxml.dtd",
-    "NBAFGPctXML.dtd",
-    "NBAFoulsXML.dtd",
-    "NBAFTPctXML.dtd",
-    "NBAMinutesXML.dtd",
-    "NBAReboundsXML.dtd",
-    "NBAScorersXML.dtd",
-    "nbastandxml.dtd",
-    "NBAStealsXML.dtd",
-    "nbateamleadersxml.dtd",
-    "nbatripledoublexml.dtd",
-    "NBATurnoversXML.dtd",
-    "NCAA_Conference_Schedule_XML.dtd",
-    "nflfirstdownxml.dtd",
-    "NFLFumbleLeaderXML.dtd",
-    "NFLGiveTakeXML.dtd",
-    "NFLGrassTurfDomeOutsideXML.dtd",
-    "NFLInside20XML.dtd",
-    "NFLInterceptionLeadersXML.dtd",
-    "NFLKickoffsXML.dtd",
-    "NFLMondayNightXML.dtd",
-    "NFLPassingLeadersXML.dtd",
-    "NFLPassLeadXML.dtd",
-    "NFLQBStartsXML.dtd",
-    "NFLReceivingLeadersXML.dtd",
-    "NFLRushingLeadersXML.dtd",
-    "NFLSackLeadersXML.dtd",
-    "nflstandxml.dtd",
-    "NFLTackleFFLeadersXML.dtd",
-    "NFLTeamRankingsXML.dtd",
-    "NFLTopKickoffReturnXML.dtd",
-    "NFLTopPerformanceXML.dtd",
-    "NFLTopPuntReturnXML.dtd",
-    "NFLTotalYardageXML.dtd",
-    "NFLYardsXML.dtd",
-    "NFL_KickingLeaders_XML.dtd",
-    "NFL_NBA_Draft_XML.dtd",
-    "NFL_PuntingLeaders_XML.dtd",
-    "NFL_Roster_XML.dtd",
-    "NFL_Team_Stats_XML.dtd",
-    "Transactions_XML.dtd",
-    "Weekly_Sched_XML.dtd",
-    "WNBA_Team_Leaders_XML.dtd",
-    "WNBA3PPctXML.dtd",
-    "WNBAAssistsXML.dtd",
-    "WNBABlocksXML.dtd",
-    "WNBAFGPctXML.dtd",
-    "WNBAFoulsXML.dtd",
-    "WNBAFTPctXML.dtd",
-    "WNBAMinutesXML.dtd",
-    "WNBAReboundsXML.dtd",
-    "WNBAScorersXML.dtd",
-    "wnbastandxml.dtd",
-    "WNBAStealsXML.dtd",
-    "WNBATurnoversXML.dtd" ]
+    "MLB_Pitching_Balks_Leaders.dtd", -- no dtd
+    "MLB_Pitching_CG_Leaders.dtd", -- no dtd
+    "MLB_Pitching_ER_Allowed_Leaders.dtd", -- no dtd
+    "MLB_Pitching_Hits_Allowed_Leaders.dtd", -- no dtd
+    "MLB_Pitching_Hit_Batters_Leaders.dtd", -- no dtd
+    "MLB_Pitching_HR_Allowed_Leaders.dtd", -- no dtd
+    "MLB_Pitching_IP_Leaders.dtd", -- no dtd
+    "MLB_Pitching_Runs_Allowed_Leaders.dtd", -- no dtd
+    "MLB_Pitching_Saves_Leaders.dtd", -- no dtd
+    "MLB_Pitching_Shut_Outs_Leaders.dtd", -- no dtd
+    "MLB_Pitching_Starts_Leaders.dtd", -- no dtd
+    "MLB_Pitching_Strike_Outs_Leaders.dtd", -- no dtd
+    "MLB_Pitching_Walks_Leaders.dtd", -- no dtd
+    "MLB_Pitching_WHIP_Leaders.dtd", -- no dtd
+    "MLB_Pitching_Wild_Pitches_Leaders.dtd", -- no dtd
+    "MLB_Pitching_Win_Percentage_Leaders.dtd", -- no dtd
+    "MLB_Pitching_WL_Leaders.dtd", -- no dtd
+    "NBA_Team_Stats_XML.dtd", -- no dtd
+    "NBA3PPctXML.dtd", -- no dtd
+    "NBAAssistsXML.dtd", -- no dtd
+    "NBABlocksXML.dtd", -- no dtd
+    "nbaconfrecxml.dtd", -- no dtd
+    "nbadaysxml.dtd", -- no dtd
+    "nbadivisionsxml.dtd", -- no dtd
+    "NBAFGPctXML.dtd", -- no dtd
+    "NBAFoulsXML.dtd", -- no dtd
+    "NBAFTPctXML.dtd", -- no dtd
+    "NBAMinutesXML.dtd", -- no dtd
+    "NBAReboundsXML.dtd", -- no dtd
+    "NBAScorersXML.dtd", -- no dtd
+    "nbastandxml.dtd", -- no dtd
+    "NBAStealsXML.dtd", -- no dtd
+    "nbateamleadersxml.dtd", -- no dtd
+    "nbatripledoublexml.dtd", -- no dtd
+    "NBATurnoversXML.dtd", -- no dtd
+    "NCAA_Conference_Schedule_XML.dtd", -- no dtd
+    "nflfirstdownxml.dtd", -- no dtd
+    "NFLFumbleLeaderXML.dtd", -- no dtd
+    "NFLGiveTakeXML.dtd", -- no dtd
+    "NFLGrassTurfDomeOutsideXML.dtd", -- no dtd
+    "NFLInside20XML.dtd", -- no dtd
+    "NFLInterceptionLeadersXML.dtd", -- no dtd
+    "NFLKickoffsXML.dtd", -- no dtd
+    "NFLMondayNightXML.dtd", -- no dtd
+    "NFLPassingLeadersXML.dtd", -- no dtd
+    "NFLPassLeadXML.dtd", -- no dtd
+    "NFLQBStartsXML.dtd", -- no dtd
+    "NFLReceivingLeadersXML.dtd", -- no dtd
+    "NFLRushingLeadersXML.dtd", -- no dtd
+    "NFLSackLeadersXML.dtd", -- no dtd
+    "nflstandxml.dtd", -- no dtd
+    "NFLTackleFFLeadersXML.dtd", -- no dtd
+    "NFLTeamRankingsXML.dtd", -- no dtd
+    "NFLTopKickoffReturnXML.dtd", -- no dtd
+    "NFLTopPerformanceXML.dtd", -- no dtd
+    "NFLTopPuntReturnXML.dtd", -- no dtd
+    "NFLTotalYardageXML.dtd", -- no dtd
+    "NFLYardsXML.dtd", -- no dtd
+    "NFL_KickingLeaders_XML.dtd", -- no dtd
+    "NFL_NBA_Draft_XML.dtd", -- no dtd
+    "NFL_PuntingLeaders_XML.dtd", -- no dtd
+    "NFL_Roster_XML.dtd", -- no dtd
+    "NFL_Team_Stats_XML.dtd", -- no dtd
+    "Transactions_XML.dtd", -- no dtd
+    "Weekly_Sched_XML.dtd", -- no dtd
+    "WNBA_Team_Leaders_XML.dtd", -- no dtd
+    "WNBA3PPctXML.dtd", -- no dtd
+    "WNBAAssistsXML.dtd", -- no dtd
+    "WNBABlocksXML.dtd", -- no dtd
+    "WNBAFGPctXML.dtd", -- no dtd
+    "WNBAFoulsXML.dtd", -- no dtd
+    "WNBAFTPctXML.dtd", -- no dtd
+    "WNBAMinutesXML.dtd", -- no dtd
+    "WNBAReboundsXML.dtd", -- no dtd
+    "WNBAScorersXML.dtd", -- no dtd
+    "wnbastandxml.dtd", -- no dtd
+    "WNBAStealsXML.dtd", -- no dtd
+    "WNBATurnoversXML.dtd" -- no dtd
+  ]
+
+
+-- | XML representation of a SportInfo \<message\>.
+--
+data Message =
+  Message {
+    xml_dtd :: String,
+    xml_xml_file_id :: Int,
+    xml_time_stamp :: UTCTime,
+    xml_xml :: String }
+  deriving (Eq, Show)
+
+
+-- | Attempt to parse a 'Message' from an 'XmlTree'. If we cannot,
+--   we fail with an error message.
+--
+parse_xml :: String -> XmlTree -> Either String Message
+parse_xml dtdname xmltree = do
+  xmlfid <- parse_xmlfid xmltree
+  timestamp <- parse_xml_time_stamp xmltree
+  message <- parse_message xmltree
+  return $ Message dtdname (fromInteger xmlfid) timestamp (xshow [message])
+
+
+-- | Database representation of a 'Message'.
+--
+data SportInfo =
+  SportInfo {
+    db_dtd :: String,
+    db_xml_file_id :: Int,
+    db_time_stamp :: UTCTime,
+    db_xml :: String }
+
+
+instance ToDb Message where
+  -- | The database analogue of a 'Message' is an 'SportInfo'.
+  type Db Message = SportInfo
+
+instance FromXml Message where
+  -- | The XML to DB conversion is trivial here.
+  --
+  from_xml Message{..} = SportInfo {
+                           db_dtd = xml_dtd,
+                           db_xml_file_id = xml_xml_file_id,
+                           db_time_stamp = xml_time_stamp,
+                           db_xml = xml_xml }
+
+
+-- | This allows us to insert the XML representation 'Message'
+--   directly.
+--
+instance XmlImport Message
+
+
+--
+-- Database code
+--
+
+instance DbImport Message where
+  dbmigrate _ =
+    run_dbmigrate $ migrate (undefined :: SportInfo)
+
+  -- | We import a 'Message' by inserting the whole thing at
+  --   once. Nothing fancy going on here.
+  dbimport msg = do
+    insert_xml_ msg
+    return ImportSucceeded
+
+
+-- | The database schema for SportInfo is trivial; all we need is for
+--   the XML_File_ID to be unique.
+--
+mkPersist tsn_codegen_config [groundhog|
+- entity: SportInfo
+  constructors:
+    - name: SportInfo
+      uniques:
+        - name: unique_sport_info
+          type: constraint
+          # Prevent multiple imports of the same message.
+          fields: [db_xml_file_id]
+|]
+
+
+--
+-- Tasty Tests
+--
+
+-- | A list of all tests for this module.
+--
+sport_info_tests :: TestTree
+sport_info_tests =
+  testGroup
+    "SportInfo tests"
+    [ test_parse_xml_succeeds,
+      test_dbimport_succeeds ]
+
+
+-- | Sample XML documents for SportInfo types.
+--
+sport_info_test_files :: [FilePath]
+sport_info_test_files =
+  map ("test/xml/sportinfo/" ++) [
+    "CBASK_3PPctXML.xml",
+    "Cbask_All_Tourn_Teams_XML.xml",
+    "CBASK_AssistsXML.xml",
+    "Cbask_Awards_XML.xml",
+    "CBASK_BlocksXML.xml",
+    "Cbask_Conf_Standings_XML.xml",
+    "Cbask_DivII_III_Indv_Stats_XML.xml",
+    "Cbask_DivII_Team_Stats_XML.xml",
+    "Cbask_DivIII_Team_Stats_XML.xml",
+    "CBASK_FGPctXML.xml",
+    "CBASK_FoulsXML.xml",
+    "CBASK_FTPctXML.xml",
+    "Cbask_Indv_Scoring_XML.xml",
+    "CBASK_MinutesXML.xml",
+    "Cbask_Polls_XML.xml",
+    "CBASK_ReboundsXML.xml",
+    "CBASK_ScoringLeadersXML.xml",
+    "Cbask_Team_ThreePT_Made_XML.xml",
+    "Cbask_Team_ThreePT_PCT_XML.xml",
+    "Cbask_Team_Win_Pct_XML.xml",
+    "Cbask_Top_Twenty_Five_XML.xml",
+    "CBASK_TopTwentyFiveResult_XML.xml",
+    "Cbask_Tourn_Awards_XML.xml",
+    "Cbask_Tourn_Champs_XML.xml",
+    "Cbask_Tourn_Indiv_XML.xml",
+    "Cbask_Tourn_Leaders_XML.xml",
+    "Cbask_Tourn_MVP_XML.xml",
+    "Cbask_Tourn_Records_XML.xml",
+    "LeagueScheduleXML.xml",
+    "minorscoresxml.xml",
+    "Minor_Baseball_League_Leaders_XML.xml",
+    "Minor_Baseball_Standings_XML.xml",
+    "Minor_Baseball_Transactions_XML.xml",
+    "mlbbattingavgxml.xml",
+    "mlbdoublesleadersxml.xml",
+    "MLBGamesPlayedXML.xml",
+    "MLBGIDPXML.xml",
+    "MLBHitByPitchXML.xml",
+    "mlbhitsleadersxml.xml",
+    "mlbhomerunsxml.xml",
+    "MLBHRFreqXML.xml",
+    "MLBIntWalksXML.xml",
+    "MLBKORateXML.xml",
+    "mlbonbasepctxml.xml",
+    "MLBOPSXML.xml",
+    "MLBPlateAppsXML.xml",
+    "mlbrbisxml.xml",
+    "mlbrunsleadersxml.xml",
+    "MLBSacFliesXML.xml",
+    "MLBSacrificesXML.xml",
+    "MLBSBSuccessXML.xml",
+    "mlbsluggingpctxml.xml",
+    "mlbstandxml.xml",
+    "mlbstandxml_preseason.xml",
+    "mlbstolenbasexml.xml",
+    "mlbtotalbasesleadersxml.xml",
+    "mlbtriplesleadersxml.xml",
+    "MLBWalkRateXML.xml",
+    "mlbwalksleadersxml.xml",
+    "MLBXtraBaseHitsXML.xml",
+    "MLB_Pitching_Appearances_Leaders.xml",
+    "MLB_ERA_Leaders.xml"
+  ]
+
+
+
+-- | Make sure we can parse every element of 'sport_info_test_files'.
+--
+test_parse_xml_succeeds :: TestTree
+test_parse_xml_succeeds =
+  testGroup "parse_xml" $ map check sport_info_test_files
+  where
+    check t = testCase t $ do
+      x <- unsafe_read_document t
+      let result = parse_xml "dummy" x
+      let actual = case result of -- isRight appears in base-4.7
+                     Left _  -> False
+                     Right _ -> True
+      let expected = True
+      actual @?= expected
+
+
+-- | Ensure that each element of 'sport_info_test_files' can be imported
+--   by counting the total number of database records (after
+--   importing) and comparing it against the length of
+--   'sport_info_test_files'.
+--
+test_dbimport_succeeds :: TestTree
+test_dbimport_succeeds = testCase "dbimport succeeds" $ do
+  xmltrees <- mapM unsafe_read_document sport_info_test_files
+  let msgs = rights $ map (parse_xml "dummy") xmltrees
+  actual <- withSqliteConn ":memory:" $ runDbConn $ do
+                runMigration silentMigrationLogger $ do
+                  migrate (undefined :: SportInfo)
+                mapM_ dbimport msgs
+                countAll (undefined :: SportInfo)
+
+  actual @?= expected
+  where
+    expected = length sport_info_test_files