]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blobdiff - src/TSN/XML/SportInfo.hs
Add SportInfo support for NFLTopPuntReturnXML.dtd.
[dead/htsn-import.git] / src / TSN / XML / SportInfo.hs
index b1f42ced07c3f35e2873cd6f2d87b57d53a0fc36..76a7c94a4910b261e740eb1135b16fb839fb5af0 100644 (file)
@@ -10,7 +10,7 @@
 --   database along with the XML_File_ID, but we don't parse any of it.
 --
 --   This is almost completely redundant with "TSN.XML.GameInfo", but
---   the redundancy is necessary: we need separate 'Message' types so
+--   the redundancy is necessary: we need separate message types so
 --   that we can have separate 'DbImport' instances. It would take
 --   more code/work to abstract (if it's even possible) than to
 --   duplicate.
@@ -25,15 +25,18 @@ where
 
 -- System imports.
 import Data.Either ( rights )
+import Data.String.Utils ( replace )
 import Data.Time.Clock ( UTCTime )
 import Database.Groundhog (
   countAll,
+  insert_,
   migrate,
   runMigration,
   silentMigrationLogger )
 import Database.Groundhog.Generic ( runDbConn )
 import Database.Groundhog.Sqlite ( withSqliteConn )
 import Database.Groundhog.TH (
+  defaultCodegenConfig,
   groundhog,
   mkPersist )
 import Test.Tasty ( TestTree, testGroup )
@@ -42,28 +45,20 @@ import Text.XML.HXT.Core ( XmlTree )
 import Text.XML.HXT.DOM.ShowXml ( xshow )
 
 -- Local imports.
-import TSN.Codegen ( tsn_codegen_config )
 import TSN.DbImport (
   DbImport(..),
   ImportResult(..),
   run_dbmigrate )
 import TSN.Parse (
+  ParseError,
   parse_message,
   parse_xmlfid,
   parse_xml_time_stamp )
-import TSN.XmlImport ( XmlImport(..) )
-import Xml (
-  FromXml(..),
-  ToDb(..),
-  unsafe_read_document )
+import Xml ( unsafe_read_document )
 
 
 -- | The DTDs for everything that we consider \"Sport Info.\"
 --
---   TODO: This is the list from the old implementation. We need to
---   make sure that we are really receiving XML for these DTDs
---   (i.e. the names are correct).
---
 dtds :: [String]
 dtds =
   [ "CBASK_3PPctXML.dtd",
@@ -78,17 +73,11 @@ dtds =
     "CBASK_FGPctXML.dtd",
     "CBASK_FoulsXML.dtd",
     "CBASK_FTPctXML.dtd",
-    "Cbask_Indv_No_Avg_XML.dtd", -- no dtd
     "Cbask_Indv_Scoring_XML.dtd",
-    "Cbask_Indv_Shooting_XML.dtd", -- no dtd
     "CBASK_MinutesXML.dtd",
     "Cbask_Polls_XML.dtd",
     "CBASK_ReboundsXML.dtd",
     "CBASK_ScoringLeadersXML.dtd",
-    "CBASK_StealsXML.dtd", -- no dtd
-    "Cbask_Team_Scoring_Rebound_Margin_XML.dtd", -- no dtd
-    "Cbask_Team_Scoring_XML.dtd", -- no dtd
-    "Cbask_Team_Shooting_Pct_XML.dtd", -- no dtd
     "Cbask_Team_ThreePT_Made_XML.dtd",
     "Cbask_Team_ThreePT_PCT_XML.dtd",
     "Cbask_Team_Win_Pct_XML.dtd",
@@ -173,128 +162,97 @@ dtds =
     "NCAA_Conference_Schedule_XML.dtd",
     "nflfirstdownxml.dtd",
     "NFLFumbleLeaderXML.dtd",
-    "NFLGiveTakeXML.dtd", -- no dtd
-    "NFLGrassTurfDomeOutsideXML.dtd", -- no dtd
-    "NFLInside20XML.dtd", -- no dtd
-    "NFLInterceptionLeadersXML.dtd", -- no dtd
-    "NFLKickoffsXML.dtd", -- no dtd
-    "NFLMondayNightXML.dtd", -- no dtd
-    "NFLPassingLeadersXML.dtd", -- no dtd
-    "NFLPassLeadXML.dtd", -- no dtd
-    "NFLQBStartsXML.dtd", -- no dtd
-    "NFLReceivingLeadersXML.dtd", -- no dtd
-    "NFLRushingLeadersXML.dtd", -- no dtd
-    "NFLSackLeadersXML.dtd", -- no dtd
-    "nflstandxml.dtd", -- no dtd
-    "NFLTackleFFLeadersXML.dtd", -- no dtd
-    "NFLTeamRankingsXML.dtd", -- no dtd
-    "NFLTopKickoffReturnXML.dtd", -- no dtd
-    "NFLTopPerformanceXML.dtd", -- no dtd
-    "NFLTopPuntReturnXML.dtd", -- no dtd
-    "NFLTotalYardageXML.dtd", -- no dtd
-    "NFLYardsXML.dtd", -- no dtd
-    "NFL_KickingLeaders_XML.dtd", -- no dtd
-    "NFL_NBA_Draft_XML.dtd", -- no dtd
-    "NFL_PuntingLeaders_XML.dtd", -- no dtd
-    "NFL_Roster_XML.dtd", -- no dtd
-    "NFL_Team_Stats_XML.dtd", -- no dtd
-    "Transactions_XML.dtd", -- no dtd
-    "Weekly_Sched_XML.dtd", -- no dtd
-    "WNBA_Team_Leaders_XML.dtd", -- no dtd
-    "WNBA3PPctXML.dtd", -- no dtd
-    "WNBAAssistsXML.dtd", -- no dtd
-    "WNBABlocksXML.dtd", -- no dtd
-    "WNBAFGPctXML.dtd", -- no dtd
-    "WNBAFoulsXML.dtd", -- no dtd
-    "WNBAFTPctXML.dtd", -- no dtd
-    "WNBAMinutesXML.dtd", -- no dtd
-    "WNBAReboundsXML.dtd", -- no dtd
-    "WNBAScorersXML.dtd", -- no dtd
-    "wnbastandxml.dtd", -- no dtd
-    "WNBAStealsXML.dtd", -- no dtd
-    "WNBATurnoversXML.dtd" -- no dtd
-  ]
-
-
--- | XML representation of a SportInfo \<message\>.
+    "NFLGrassTurfDomeOutsideXML.dtd",
+    "NFLGiveTakeXML.dtd",
+    "NFLInside20XML.dtd",
+    "NFLInterceptionLeadersXML.dtd",
+    "NFLKickoffsXML.dtd",
+    "NFLMondayNightXML.dtd",
+    "NFLPassingLeadersXML.dtd",
+    "NFLPassLeadXML.dtd",
+    "NFLQBStartsXML.dtd",
+    "NFLReceivingLeadersXML.dtd",
+    "NFLRushingLeadersXML.dtd",
+    "NFLSackLeadersXML.dtd",
+    "nflstandxml.dtd",
+    "NFLTeamRankingsXML.dtd",
+    "NFLTopKickoffReturnXML.dtd",
+    "NFLTopPerformanceXML.dtd",
+    "NFLTopPuntReturnXML.dtd",
+    "NFLTotalYardageXML.dtd",
+    "NFL_KickingLeaders_XML.dtd",
+    "NFL_NBA_Draft_XML.dtd",
+    "NFL_Roster_XML.dtd",
+    "NFLTackleFFLeadersXML.dtd",
+    "NFL_Team_Stats_XML.dtd",
+    "Transactions_XML.dtd",
+    "Weekly_Sched_XML.dtd",
+    "WNBA_Team_Leaders_XML.dtd",
+    "WNBA3PPctXML.dtd",
+    "WNBAAssistsXML.dtd",
+    "WNBABlocksXML.dtd",
+    "WNBAFGPctXML.dtd",
+    "WNBAFoulsXML.dtd",
+    "WNBAFTPctXML.dtd",
+    "WNBAMinutesXML.dtd",
+    "WNBAReboundsXML.dtd",
+    "WNBAScorersXML.dtd",
+    "wnbastandxml.dtd",
+    "WNBAStealsXML.dtd",
+    "WNBATurnoversXML.dtd" ]
+
+
+-- | This serves as both the database and XML representation of a
+--   SportInfo \<message\>.
 --
-data Message =
-  Message {
-    xml_dtd :: String,
-    xml_xml_file_id :: Int,
-    xml_time_stamp :: UTCTime,
-    xml_xml :: String }
+data SportInfo =
+  SportInfo {
+    dtd :: String,
+    xml_file_id :: Int,
+    time_stamp :: UTCTime,
+    xml :: String }
   deriving (Eq, Show)
 
 
--- | Attempt to parse a 'Message' from an 'XmlTree'. If we cannot,
+-- | Attempt to parse a 'SportInfo' from an 'XmlTree'. If we cannot,
 --   we fail with an error message.
 --
-parse_xml :: String -> XmlTree -> Either String Message
+parse_xml :: String -> XmlTree -> Either ParseError SportInfo
 parse_xml dtdname xmltree = do
   xmlfid <- parse_xmlfid xmltree
   timestamp <- parse_xml_time_stamp xmltree
   message <- parse_message xmltree
-  return $ Message dtdname (fromInteger xmlfid) timestamp (xshow [message])
-
-
--- | Database representation of a 'Message'.
---
-data SportInfo =
-  SportInfo {
-    db_dtd :: String,
-    db_xml_file_id :: Int,
-    db_time_stamp :: UTCTime,
-    db_xml :: String }
-
-
-instance ToDb Message where
-  -- | The database analogue of a 'Message' is an 'SportInfo'.
-  type Db Message = SportInfo
-
-instance FromXml Message where
-  -- | The XML to DB conversion is trivial here.
-  --
-  from_xml Message{..} = SportInfo {
-                           db_dtd = xml_dtd,
-                           db_xml_file_id = xml_xml_file_id,
-                           db_time_stamp = xml_time_stamp,
-                           db_xml = xml_xml }
-
-
--- | This allows us to insert the XML representation 'Message'
---   directly.
---
-instance XmlImport Message
+  return $ SportInfo dtdname xmlfid timestamp (xshow [message])
 
 
 --
 -- Database code
 --
 
-instance DbImport Message where
+instance DbImport SportInfo where
   dbmigrate _ =
     run_dbmigrate $ migrate (undefined :: SportInfo)
 
-  -- | We import a 'Message' by inserting the whole thing at
+  -- | We import a 'SportInfo' by inserting the whole thing at
   --   once. Nothing fancy going on here.
   dbimport msg = do
-    insert_xml_ msg
+    insert_ msg
     return ImportSucceeded
 
 
 -- | The database schema for SportInfo is trivial; all we need is for
 --   the XML_File_ID to be unique.
 --
-mkPersist tsn_codegen_config [groundhog|
+mkPersist defaultCodegenConfig [groundhog|
 - entity: SportInfo
+  dbName: sport_info
   constructors:
     - name: SportInfo
       uniques:
         - name: unique_sport_info
           type: constraint
           # Prevent multiple imports of the same message.
-          fields: [db_xml_file_id]
+          fields: [xml_file_id]
 |]
 
 
@@ -308,117 +266,40 @@ sport_info_tests :: TestTree
 sport_info_tests =
   testGroup
     "SportInfo tests"
-    [ test_parse_xml_succeeds,
+    [ test_accessors,
+      test_parse_xml_succeeds,
       test_dbimport_succeeds ]
 
 
+-- | Make sure the accessors work and that we can parse one file. Ok,
+--   so the real point of this is to make the unused fields (dtd, xml,
+--   ...) warning go away without having to mangle the groundhog code.
+--
+test_accessors :: TestTree
+test_accessors = testCase "we can access a parsed sport_info" $ do
+  xmltree <- unsafe_read_document "test/xml/sportinfo/wnbastandxml.xml"
+  let Right t = parse_xml "wnbastandxml.dtd" xmltree
+  let a1  = dtd t
+  let ex1 = "wnbastandxml.dtd"
+  let a2  = xml_file_id t
+  let ex2 = 2011
+  let a3  = show $ time_stamp t
+  let ex3 = "2009-09-27 19:50:00 UTC"
+  let a4  = take 9 (xml t)
+  let ex4 = "<message>"
+  let actual = (a1,a2,a3,a4)
+  let expected = (ex1,ex2,ex3,ex4)
+  actual @?= expected
+
+
 -- | Sample XML documents for SportInfo types.
 --
 sport_info_test_files :: [FilePath]
 sport_info_test_files =
-  map ("test/xml/sportinfo/" ++) [
-    "CBASK_3PPctXML.xml",
-    "Cbask_All_Tourn_Teams_XML.xml",
-    "CBASK_AssistsXML.xml",
-    "Cbask_Awards_XML.xml",
-    "CBASK_BlocksXML.xml",
-    "Cbask_Conf_Standings_XML.xml",
-    "Cbask_DivII_III_Indv_Stats_XML.xml",
-    "Cbask_DivII_Team_Stats_XML.xml",
-    "Cbask_DivIII_Team_Stats_XML.xml",
-    "CBASK_FGPctXML.xml",
-    "CBASK_FoulsXML.xml",
-    "CBASK_FTPctXML.xml",
-    "Cbask_Indv_Scoring_XML.xml",
-    "CBASK_MinutesXML.xml",
-    "Cbask_Polls_XML.xml",
-    "CBASK_ReboundsXML.xml",
-    "CBASK_ScoringLeadersXML.xml",
-    "Cbask_Team_ThreePT_Made_XML.xml",
-    "Cbask_Team_ThreePT_PCT_XML.xml",
-    "Cbask_Team_Win_Pct_XML.xml",
-    "Cbask_Top_Twenty_Five_XML.xml",
-    "CBASK_TopTwentyFiveResult_XML.xml",
-    "Cbask_Tourn_Awards_XML.xml",
-    "Cbask_Tourn_Champs_XML.xml",
-    "Cbask_Tourn_Indiv_XML.xml",
-    "Cbask_Tourn_Leaders_XML.xml",
-    "Cbask_Tourn_MVP_XML.xml",
-    "Cbask_Tourn_Records_XML.xml",
-    "LeagueScheduleXML.xml",
-    "minorscoresxml.xml",
-    "Minor_Baseball_League_Leaders_XML.xml",
-    "Minor_Baseball_Standings_XML.xml",
-    "Minor_Baseball_Transactions_XML.xml",
-    "mlbbattingavgxml.xml",
-    "mlbdoublesleadersxml.xml",
-    "MLBGamesPlayedXML.xml",
-    "MLBGIDPXML.xml",
-    "MLBHitByPitchXML.xml",
-    "mlbhitsleadersxml.xml",
-    "mlbhomerunsxml.xml",
-    "MLBHRFreqXML.xml",
-    "MLBIntWalksXML.xml",
-    "MLBKORateXML.xml",
-    "mlbonbasepctxml.xml",
-    "MLBOPSXML.xml",
-    "MLBPlateAppsXML.xml",
-    "mlbrbisxml.xml",
-    "mlbrunsleadersxml.xml",
-    "MLBSacFliesXML.xml",
-    "MLBSacrificesXML.xml",
-    "MLBSBSuccessXML.xml",
-    "mlbsluggingpctxml.xml",
-    "mlbstandxml.xml",
-    "mlbstandxml_preseason.xml",
-    "mlbstolenbasexml.xml",
-    "mlbtotalbasesleadersxml.xml",
-    "mlbtriplesleadersxml.xml",
-    "MLBWalkRateXML.xml",
-    "mlbwalksleadersxml.xml",
-    "MLBXtraBaseHitsXML.xml",
-    "MLB_ERA_Leaders.xml",
-    "MLB_Pitching_Appearances_Leaders.xml",
-    "MLB_Pitching_Balks_Leaders.xml",
-    "MLB_Pitching_CG_Leaders.xml",
-    "MLB_Pitching_ER_Allowed_Leaders.xml",
-    "MLB_Pitching_Hits_Allowed_Leaders.xml",
-    "MLB_Pitching_Hit_Batters_Leaders.xml",
-    "MLB_Pitching_HR_Allowed_Leaders.xml",
-    "MLB_Pitching_IP_Leaders.xml",
-    "MLB_Pitching_Runs_Allowed_Leaders.xml",
-    "MLB_Pitching_Saves_Leaders.xml",
-    "MLB_Pitching_Shut_Outs_Leaders.xml",
-    "MLB_Pitching_Starts_Leaders.xml",
-    "MLB_Pitching_Strike_Outs_Leaders.xml",
-    "MLB_Pitching_Walks_Leaders.xml",
-    "MLB_Pitching_WHIP_Leaders.xml",
-    "MLB_Pitching_Wild_Pitches_Leaders.xml",
-    "MLB_Pitching_Win_Percentage_Leaders.xml",
-    "MLB_Pitching_WL_Leaders.xml",
-    "NBA_Team_Stats_XML.xml",
-    "NBA3PPctXML.xml",
-    "NBAAssistsXML.xml",
-    "NBABlocksXML.xml",
-    "nbaconfrecxml.xml",
-    "nbadaysxml.xml",
-    "nbadivisionsxml.xml",
-    "NBAFGPctXML.xml",
-    "NBAFoulsXML.xml",
-    "NBAFTPctXML.xml",
-    "NBAMinutesXML.xml",
-    "NBAReboundsXML.xml",
-    "NBAScorersXML.xml",
-    "nbastandxml.xml",
-    "NBAStealsXML.xml",
-    "nbateamleadersxml.xml",
-    "nbatripledoublexml.xml",
-    "NBATurnoversXML.xml",
-    "NCAA_Conference_Schedule_XML.xml",
-    "nflfirstdownxml.xml",
-    "NFLFumbleLeaderXML.xml"
-  ]
-
+  map (change_suffix . add_path) dtds
+  where
+     add_path = ("test/xml/sportinfo/" ++ )
+     change_suffix = replace ".dtd" ".xml"
 
 
 -- | Make sure we can parse every element of 'sport_info_test_files'.
@@ -447,7 +328,7 @@ test_dbimport_succeeds = testCase "dbimport succeeds" $ do
   xmltrees <- mapM unsafe_read_document sport_info_test_files
   let msgs = rights $ map (parse_xml "dummy") xmltrees
   actual <- withSqliteConn ":memory:" $ runDbConn $ do
-                runMigration silentMigrationLogger $ do
+                runMigration silentMigrationLogger $
                   migrate (undefined :: SportInfo)
                 mapM_ dbimport msgs
                 countAll (undefined :: SportInfo)