X-Git-Url: http://gitweb.michael.orlitzky.com/?a=blobdiff_plain;f=src%2FTSN%2FXML%2FSportInfo.hs;h=44688317fa9b04414d2bfa16d9546a4f9392f817;hb=a13dffb2bed8ca56164430e1b11731f4ab1e7d5b;hp=b7e269ee792f91df09925dde854fcb3590d40016;hpb=dcaa338a8e638ff20890f949fd116fab0228e050;p=dead%2Fhtsn-import.git diff --git a/src/TSN/XML/SportInfo.hs b/src/TSN/XML/SportInfo.hs index b7e269e..76a7c94 100644 --- a/src/TSN/XML/SportInfo.hs +++ b/src/TSN/XML/SportInfo.hs @@ -1,18 +1,63 @@ +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE QuasiQuotes #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE TypeFamilies #-} + -- | SportInfo represents a collection of DTDs that we don't really -- handle but want to make available. The raw XML gets stored in the -- database along with the XML_File_ID, but we don't parse any of it. -- --- See also: TSN.XML.GameInfo +-- This is almost completely redundant with "TSN.XML.GameInfo", but +-- the redundancy is necessary: we need separate message types so +-- that we can have separate 'DbImport' instances. It would take +-- more code/work to abstract (if it's even possible) than to +-- duplicate. -- module TSN.XML.SportInfo ( - dtds ) + dtds, + parse_xml, + sport_info_tests, + -- * WARNING: these are private but exported to silence warnings + SportInfoConstructor(..) ) where --- | The DTDs for everything that we consider "Sport Info." --- --- TODO: This is the list from the old implementation. We need to --- make sure that we are really receiving XML for these DTDs --- (i.e. the names are correct). +-- System imports. +import Data.Either ( rights ) +import Data.String.Utils ( replace ) +import Data.Time.Clock ( UTCTime ) +import Database.Groundhog ( + countAll, + insert_, + migrate, + runMigration, + silentMigrationLogger ) +import Database.Groundhog.Generic ( runDbConn ) +import Database.Groundhog.Sqlite ( withSqliteConn ) +import Database.Groundhog.TH ( + defaultCodegenConfig, + groundhog, + mkPersist ) +import Test.Tasty ( TestTree, testGroup ) +import Test.Tasty.HUnit ( (@?=), testCase ) +import Text.XML.HXT.Core ( XmlTree ) +import Text.XML.HXT.DOM.ShowXml ( xshow ) + +-- Local imports. +import TSN.DbImport ( + DbImport(..), + ImportResult(..), + run_dbmigrate ) +import TSN.Parse ( + ParseError, + parse_message, + parse_xmlfid, + parse_xml_time_stamp ) +import Xml ( unsafe_read_document ) + + +-- | The DTDs for everything that we consider \"Sport Info.\" -- dtds :: [String] dtds = @@ -21,26 +66,18 @@ dtds = "CBASK_AssistsXML.dtd", "Cbask_Awards_XML.dtd", "CBASK_BlocksXML.dtd", - "CBask_BlocksXML.dtd", "Cbask_Conf_Standings_XML.dtd", "Cbask_DivII_III_Indv_Stats_XML.dtd", "Cbask_DivII_Team_Stats_XML.dtd", "Cbask_DivIII_Team_Stats_XML.dtd", "CBASK_FGPctXML.dtd", - "CBask_FGPctXML.dtd", "CBASK_FoulsXML.dtd", "CBASK_FTPctXML.dtd", - "Cbask_Indv_No_Avg_XML.dtd", "Cbask_Indv_Scoring_XML.dtd", - "Cbask_Indv_Shooting_XML.dtd", "CBASK_MinutesXML.dtd", "Cbask_Polls_XML.dtd", "CBASK_ReboundsXML.dtd", "CBASK_ScoringLeadersXML.dtd", - "CBASK_StealsXML.dtd", - "Cbask_Team_Scoring_Rebound_Margin_XML.dtd", - "Cbask_Team_Scoring_XML.dtd", - "Cbask_Team_Shooting_Pct_XML.dtd", "Cbask_Team_ThreePT_Made_XML.dtd", "Cbask_Team_ThreePT_PCT_XML.dtd", "Cbask_Team_Win_Pct_XML.dtd", @@ -125,8 +162,8 @@ dtds = "NCAA_Conference_Schedule_XML.dtd", "nflfirstdownxml.dtd", "NFLFumbleLeaderXML.dtd", - "NFLGiveTakeXML.dtd", "NFLGrassTurfDomeOutsideXML.dtd", + "NFLGiveTakeXML.dtd", "NFLInside20XML.dtd", "NFLInterceptionLeadersXML.dtd", "NFLKickoffsXML.dtd", @@ -138,17 +175,15 @@ dtds = "NFLRushingLeadersXML.dtd", "NFLSackLeadersXML.dtd", "nflstandxml.dtd", - "NFLTackleFFLeadersXML.dtd", "NFLTeamRankingsXML.dtd", "NFLTopKickoffReturnXML.dtd", "NFLTopPerformanceXML.dtd", "NFLTopPuntReturnXML.dtd", "NFLTotalYardageXML.dtd", - "NFLYardsXML.dtd", "NFL_KickingLeaders_XML.dtd", "NFL_NBA_Draft_XML.dtd", - "NFL_PuntingLeaders_XML.dtd", "NFL_Roster_XML.dtd", + "NFLTackleFFLeadersXML.dtd", "NFL_Team_Stats_XML.dtd", "Transactions_XML.dtd", "Weekly_Sched_XML.dtd", @@ -165,3 +200,139 @@ dtds = "wnbastandxml.dtd", "WNBAStealsXML.dtd", "WNBATurnoversXML.dtd" ] + + +-- | This serves as both the database and XML representation of a +-- SportInfo \. +-- +data SportInfo = + SportInfo { + dtd :: String, + xml_file_id :: Int, + time_stamp :: UTCTime, + xml :: String } + deriving (Eq, Show) + + +-- | Attempt to parse a 'SportInfo' from an 'XmlTree'. If we cannot, +-- we fail with an error message. +-- +parse_xml :: String -> XmlTree -> Either ParseError SportInfo +parse_xml dtdname xmltree = do + xmlfid <- parse_xmlfid xmltree + timestamp <- parse_xml_time_stamp xmltree + message <- parse_message xmltree + return $ SportInfo dtdname xmlfid timestamp (xshow [message]) + + +-- +-- Database code +-- + +instance DbImport SportInfo where + dbmigrate _ = + run_dbmigrate $ migrate (undefined :: SportInfo) + + -- | We import a 'SportInfo' by inserting the whole thing at + -- once. Nothing fancy going on here. + dbimport msg = do + insert_ msg + return ImportSucceeded + + +-- | The database schema for SportInfo is trivial; all we need is for +-- the XML_File_ID to be unique. +-- +mkPersist defaultCodegenConfig [groundhog| +- entity: SportInfo + dbName: sport_info + constructors: + - name: SportInfo + uniques: + - name: unique_sport_info + type: constraint + # Prevent multiple imports of the same message. + fields: [xml_file_id] +|] + + +-- +-- Tasty Tests +-- + +-- | A list of all tests for this module. +-- +sport_info_tests :: TestTree +sport_info_tests = + testGroup + "SportInfo tests" + [ test_accessors, + test_parse_xml_succeeds, + test_dbimport_succeeds ] + + +-- | Make sure the accessors work and that we can parse one file. Ok, +-- so the real point of this is to make the unused fields (dtd, xml, +-- ...) warning go away without having to mangle the groundhog code. +-- +test_accessors :: TestTree +test_accessors = testCase "we can access a parsed sport_info" $ do + xmltree <- unsafe_read_document "test/xml/sportinfo/wnbastandxml.xml" + let Right t = parse_xml "wnbastandxml.dtd" xmltree + let a1 = dtd t + let ex1 = "wnbastandxml.dtd" + let a2 = xml_file_id t + let ex2 = 2011 + let a3 = show $ time_stamp t + let ex3 = "2009-09-27 19:50:00 UTC" + let a4 = take 9 (xml t) + let ex4 = "" + let actual = (a1,a2,a3,a4) + let expected = (ex1,ex2,ex3,ex4) + actual @?= expected + + +-- | Sample XML documents for SportInfo types. +-- +sport_info_test_files :: [FilePath] +sport_info_test_files = + map (change_suffix . add_path) dtds + where + add_path = ("test/xml/sportinfo/" ++ ) + change_suffix = replace ".dtd" ".xml" + + +-- | Make sure we can parse every element of 'sport_info_test_files'. +-- +test_parse_xml_succeeds :: TestTree +test_parse_xml_succeeds = + testGroup "parse_xml" $ map check sport_info_test_files + where + check t = testCase t $ do + x <- unsafe_read_document t + let result = parse_xml "dummy" x + let actual = case result of -- isRight appears in base-4.7 + Left _ -> False + Right _ -> True + let expected = True + actual @?= expected + + +-- | Ensure that each element of 'sport_info_test_files' can be imported +-- by counting the total number of database records (after +-- importing) and comparing it against the length of +-- 'sport_info_test_files'. +-- +test_dbimport_succeeds :: TestTree +test_dbimport_succeeds = testCase "dbimport succeeds" $ do + xmltrees <- mapM unsafe_read_document sport_info_test_files + let msgs = rights $ map (parse_xml "dummy") xmltrees + actual <- withSqliteConn ":memory:" $ runDbConn $ do + runMigration silentMigrationLogger $ + migrate (undefined :: SportInfo) + mapM_ dbimport msgs + countAll (undefined :: SportInfo) + + actual @?= expected + where + expected = length sport_info_test_files