From a9a8667246a544705d85698ec967437e4770be2c Mon Sep 17 00:00:00 2001 From: Michael Orlitzky Date: Fri, 4 Jul 2014 00:28:00 -0400 Subject: [PATCH] Add the initial implementation of TSN.XML.ScheduleChanges. --- .ghci | 2 + doc/man1/htsn-import.1 | 2 + htsn-import.cabal | 1 + src/Main.hs | 5 + src/TSN/XML/ScheduleChanges.hs | 586 ++++++++++++++++++++++++++++++ test/TestSuite.hs | 2 + test/shell/import-duplicates.test | 6 +- 7 files changed, 601 insertions(+), 3 deletions(-) create mode 100644 src/TSN/XML/ScheduleChanges.hs diff --git a/.ghci b/.ghci index d02fc64..e03be18 100644 --- a/.ghci +++ b/.ghci @@ -25,6 +25,7 @@ src/TSN/XML/JFile.hs src/TSN/XML/News.hs src/TSN/XML/Odds.hs + src/TSN/XML/ScheduleChanges.hs src/TSN/XML/Scores.hs src/TSN/XML/SportInfo.hs src/TSN/XML/Weather.hs @@ -53,6 +54,7 @@ import TSN.XML.InjuriesDetail import TSN.XML.JFile import TSN.XML.News import TSN.XML.Odds +import TSN.XML.ScheduleChanges import TSN.XML.Scores import TSN.XML.SportInfo import TSN.XML.Weather diff --git a/doc/man1/htsn-import.1 b/doc/man1/htsn-import.1 index 3dcf2be..352eb2b 100644 --- a/doc/man1/htsn-import.1 +++ b/doc/man1/htsn-import.1 @@ -386,6 +386,8 @@ newsxml.dtd .IP \[bu] Odds_XML.dtd .IP \[bu] +Schedule_Changes_XML.dtd +.IP \[bu] scoresxml.dtd .IP \[bu] weatherxml.dtd diff --git a/htsn-import.cabal b/htsn-import.cabal index 2d267fb..928ff54 100644 --- a/htsn-import.cabal +++ b/htsn-import.cabal @@ -275,6 +275,7 @@ executable htsn-import TSN.XML.JFile TSN.XML.News TSN.XML.Odds + TSN.XML.ScheduleChanges TSN.XML.Scores TSN.XML.SportInfo TSN.XML.Weather diff --git a/src/Main.hs b/src/Main.hs index 38f3930..90f5f91 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -60,6 +60,9 @@ import qualified TSN.XML.InjuriesDetail as InjuriesDetail ( import qualified TSN.XML.JFile as JFile ( dtd, pickle_message ) import qualified TSN.XML.News as News ( dtd, pickle_message ) import qualified TSN.XML.Odds as Odds ( dtd, pickle_message ) +import qualified TSN.XML.ScheduleChanges as ScheduleChanges ( + dtd, + pickle_message ) import qualified TSN.XML.Scores as Scores ( dtd, pickle_message ) import qualified TSN.XML.SportInfo as SportInfo ( dtds, parse_xml ) import qualified TSN.XML.Weather as Weather ( dtd, pickle_message ) @@ -195,6 +198,8 @@ import_file cfg path = do | dtd == Odds.dtd = go Odds.pickle_message + | dtd == ScheduleChanges.dtd = go ScheduleChanges.pickle_message + | dtd == Scores.dtd = go Scores.pickle_message -- SportInfo and GameInfo appear last in the guards diff --git a/src/TSN/XML/ScheduleChanges.hs b/src/TSN/XML/ScheduleChanges.hs new file mode 100644 index 0000000..b3d134e --- /dev/null +++ b/src/TSN/XML/ScheduleChanges.hs @@ -0,0 +1,586 @@ +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE QuasiQuotes #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE TypeFamilies #-} + +-- | Parse TSN XML for the DTD \"Schedule_Changes_XML.dtd\". Each +-- \ element contains zero or more \ +-- which are just a wrapper around zero or more \s. +-- +-- The teams appear to use the shared "TSN.Team" representation. +-- +module TSN.XML.ScheduleChanges ( + dtd, + pickle_message, + -- * Tests + schedule_changes_tests, + -- * WARNING: these are private but exported to silence warnings + ScheduleChangesConstructor(..), + ScheduleChangesListingConstructor(..), + ScheduleChangesListing_TeamConstructor(..) ) +where + +-- System imports. +import Control.Monad ( forM_ ) +import Data.Time ( UTCTime(..) ) +import Data.Tuple.Curry ( uncurryN ) +import Database.Groundhog ( + countAll, + deleteAll, + insert, + insert_, + migrate, + runMigration, + silentMigrationLogger ) +import Database.Groundhog.Core ( DefaultKey ) +import Database.Groundhog.Generic ( runDbConn ) +import Database.Groundhog.Sqlite ( withSqliteConn ) +import Database.Groundhog.TH ( + groundhog, + mkPersist ) +import Test.Tasty ( TestTree, testGroup ) +import Test.Tasty.HUnit ( (@?=), testCase ) +import Text.XML.HXT.Core ( + PU, + xp6Tuple, + xp11Tuple, + xpAttr, + xpElem, + xpInt, + xpList, + xpOption, + xpPair, + xpText, + xpWrap ) + +-- Local imports. +import TSN.Codegen ( + tsn_codegen_config ) +import TSN.DbImport ( DbImport(..), ImportResult(..), run_dbmigrate ) +import TSN.Picklers ( xp_date_padded, xp_time, xp_time_stamp ) +import TSN.Team ( Team(..) ) +import TSN.XmlImport ( XmlImport(..) ) +import Xml ( + FromXml(..), + ToDb(..), + pickle_unpickle, + unpickleable, + unsafe_unpickle ) + + + +-- | The DTD to which this module corresponds. Used to invoke dbimport. +-- +dtd :: String +dtd = "Schedule_Changes_XML.dtd" + + +-- +-- DB/XML data types +-- + +-- * ScheduleChanges/Message + +-- | Database representation of a 'Message'. Comparatively, it lacks +-- the listings since they are linked via a foreign key. +-- +data ScheduleChanges = + ScheduleChanges { + db_xml_file_id :: Int, + db_heading :: String, + db_category :: String, + db_sport :: String, + db_time_stamp :: UTCTime } + deriving (Eq, Show) + + +-- | XML representation of a \ within a +-- \. These are wrappers around a bunch of +-- \s, but they also contain the sport name for all of +-- the contained listings. +-- +data ScheduleChangeXml = + ScheduleChangeXml { + xml_sc_sport :: String, + xml_sc_listings :: [ScheduleChangesListingXml] } + deriving (Eq, Show) + + +-- | XML representation of a 'ScheduleChanges'. It has the same +-- fields, but in addition contains the 'xml_listings'. +-- +data Message = + Message { + xml_xml_file_id :: Int, + xml_heading :: String, + xml_category :: String, + xml_sport :: String, + xml_schedule_changes :: [ScheduleChangeXml], + xml_time_stamp :: UTCTime } + deriving (Eq, Show) + + + +instance ToDb Message where + -- | The database analogue of a 'Message' is a 'ScheduleChanges'. + -- + type Db Message = ScheduleChanges + + +-- | The 'FromXml' instance for 'Message' is required for the +-- 'XmlImport' instance. +-- +instance FromXml Message where + -- | To convert a 'Message' to an 'ScheduleChanges', we just drop + -- the 'xml_schedule_changes'. + -- + from_xml Message{..} = + ScheduleChanges { + db_xml_file_id = xml_xml_file_id, + db_heading = xml_heading, + db_category = xml_category, + db_sport = xml_sport, + db_time_stamp = xml_time_stamp } + + +-- | This allows us to insert the XML representation 'Message' +-- directly. +-- +instance XmlImport Message + + + +-- * ScheduleChangesListing/ScheduleChangesListingXml + +-- | An embedded type within 'ScheduleChangesListing'. These look +-- like, \FINAL\ within the XML, +-- but they're in one-to-one correspondence with the listings. +-- +data ScheduleChangesListingStatus = + ScheduleChangesListingStatus { + db_status_numeral :: Int, + db_status :: Maybe String } + deriving (Eq, Show) + + +-- | Database representation of a \ contained within a +-- \, within a \. During the transition +-- to the database, we drop the intermediate \ +-- leaving the listing keyed to the 'ScheduleChanges' itself. +-- +-- The home/away teams reuse the 'Team' representation and are +-- connected via 'ScheduleChangesListing_Team'. +-- +-- The sport name (sc_sport) is pulled out of the containing +-- \ and embedded into the listings themselves. +-- +data ScheduleChangesListing = + ScheduleChangesListing { + db_schedule_changes_id :: DefaultKey ScheduleChanges, + db_type :: String, + db_sc_sport :: String, + db_schedule_id :: Int, + db_game_time :: UTCTime, + db_location :: String, + db_vscore :: Int, + db_hscore :: Int, + db_listing_status :: ScheduleChangesListingStatus, + db_notes :: Maybe String } + + +-- | XML representation of a \ contained within a +-- \, within a \. +-- +data ScheduleChangesListingXml = + ScheduleChangesListingXml { + xml_type :: String, + xml_schedule_id :: Int, + xml_game_date :: UTCTime, + xml_game_time :: UTCTime, + xml_location :: String, + xml_away_team :: ScheduleChangesListingAwayTeamXml, + xml_home_team :: ScheduleChangesListingHomeTeamXml, + xml_vscore :: Int, + xml_hscore :: Int, + xml_listing_status :: ScheduleChangesListingStatus, + xml_notes :: Maybe String } + deriving (Eq, Show) + + +instance ToDb ScheduleChangesListingXml where + -- | The database analogue of an 'ScheduleChangesListingXml' is + -- an 'ScheduleChangesListing'. + -- + type Db ScheduleChangesListingXml = ScheduleChangesListing + + + +-- | We don't make 'ScheduleChangesListingXml' an instance of +-- 'FromXmlFk' or 'XmlImportFk' because it needs some additional +-- information, namely the sport name from its containing +-- \. +-- +from_xml_fk_sport :: (DefaultKey ScheduleChanges) + -> String + -> ScheduleChangesListingXml + -> ScheduleChangesListing +from_xml_fk_sport fk sport ScheduleChangesListingXml{..} = + ScheduleChangesListing { + db_schedule_changes_id = fk, + db_type = xml_type, + db_sc_sport = sport, + db_schedule_id = xml_schedule_id, + db_game_time = make_game_time xml_game_date xml_game_time, + db_location = xml_location, + db_vscore = xml_vscore, + db_hscore = xml_hscore, + db_listing_status = xml_listing_status, + db_notes = xml_notes } + + where + -- | Make the database \"game time\" from the XML + -- date/time. Simply take the day part from one and the time + -- from the other. + -- + make_game_time d t = UTCTime (utctDay d) (utctDayTime t) + + + +-- * ScheduleChangesListing_Team + +-- | Database mapping between listings and their home/away teams. +-- +data ScheduleChangesListing_Team = + ScheduleChangesListing_Team { + sclt_schedule_changes_listings_id :: DefaultKey ScheduleChangesListing, + sclt_away_team_id :: DefaultKey Team, + sclt_home_team_id :: DefaultKey Team } + + +-- * ScheduleChangesListingAwayTeamXml / ScheduleChangesListingHomeTeamXml + +-- | The XML representation of a 'ScheduleChangesListing' away +-- team. Its corresponding database representation (along with that +-- of the home team) is a "TSN.Team", but their XML representations +-- are different. +data ScheduleChangesListingAwayTeamXml = + ScheduleChangesListingAwayTeamXml { + away_team_id :: String, + away_team_name :: String } + deriving (Eq, Show) + +instance ToDb ScheduleChangesListingAwayTeamXml where + -- | The database analogue of an 'ScheduleChangesListingAwayTeamXml' is + -- a 'Team'. + -- + type Db ScheduleChangesListingAwayTeamXml = Team + +instance FromXml ScheduleChangesListingAwayTeamXml where + -- | To convert a 'ScheduleChangesListingAwayTeamXml' to a 'Team', + -- we set the non-existent abbreviation to \"Nothing\" and wrap + -- the always-present name field in \"Just\". + -- + from_xml ScheduleChangesListingAwayTeamXml{..} = + Team { + team_id = away_team_id, + team_abbreviation = Nothing, + team_name = Just away_team_name } + +-- | Allow us to import ScheduleChangesListingAwayTeamXml directly. +instance XmlImport ScheduleChangesListingAwayTeamXml + + +-- | The XML representation of a 'ScheduleChangesListing' home +-- team. Its corresponding database representation (along with that +-- of the away team) is a "TSN.Team", but their XML representations +-- are different. +-- +data ScheduleChangesListingHomeTeamXml = + ScheduleChangesListingHomeTeamXml { + home_team_id :: String, + home_team_name :: String } + deriving (Eq, Show) + +instance ToDb ScheduleChangesListingHomeTeamXml where + -- | The database analogue of an 'ScheduleChangesListingHomeTeamXml' + -- is a 'Team'. + -- + type Db ScheduleChangesListingHomeTeamXml = Team + +instance FromXml ScheduleChangesListingHomeTeamXml where + -- | To convert a 'ScheduleChangesListingHomeTeamXml' to a 'Team', + -- we set the non-existent abbreviation to \"Nothing\" and wrap + -- the always-present name field in \"Just\". + -- + from_xml ScheduleChangesListingHomeTeamXml{..} = + Team { + team_id = home_team_id, + team_abbreviation = Nothing, + team_name = Just home_team_name } + +-- | Allow us to import ScheduleChangesListingHomeTeamXml directly. +instance XmlImport ScheduleChangesListingHomeTeamXml + + +-- +-- * Database stuff. +-- + +instance DbImport Message where + dbmigrate _ = + run_dbmigrate $ do + migrate (undefined :: Team) + migrate (undefined :: ScheduleChanges) + migrate (undefined :: ScheduleChangesListing) + migrate (undefined :: ScheduleChangesListing_Team) + + dbimport m = do + -- Insert the top-level message + msg_id <- insert_xml m + + -- Now loop through the message's schedule changes + forM_ (xml_schedule_changes m) $ \sc -> do + -- Construct the function that will turn an XML listing into a DB one. + let listing_xml_to_db = from_xml_fk_sport msg_id (xml_sc_sport sc) + + -- Now loop through the listings so that we can handle the teams + -- one listing at a time. + forM_ (xml_sc_listings sc) $ \listing -> do + let db_listing = listing_xml_to_db listing + listing_id <- insert db_listing + + away_team_id <- insert_xml_or_select (xml_away_team listing) + home_team_id <- insert_xml_or_select (xml_home_team listing) + + -- Insert a record into schedule_changes_listings__teams + -- mapping the home/away teams to this game. Use the full + -- record syntax because the types would let us mix up the + -- home/away teams. + insert_ ScheduleChangesListing_Team { + sclt_schedule_changes_listings_id = listing_id, + sclt_away_team_id = away_team_id, + sclt_home_team_id = home_team_id } + + return ImportSucceeded + + +mkPersist tsn_codegen_config [groundhog| +- entity: ScheduleChanges + dbName: schedule_changes + constructors: + - name: ScheduleChanges + uniques: + - name: unique_schedule_changes + type: constraint + # Prevent multiple imports of the same message. + fields: [db_xml_file_id] + + +- entity: ScheduleChangesListing + dbName: schedule_changes_listings + constructors: + - name: ScheduleChangesListing + fields: + - name: db_schedule_changes_id + reference: + onDelete: cascade + +- embedded: ScheduleChangesListingStatus + fields: + - name: db_status_numeral + dbName: status_numeral + - name: db_status + dbName: status + + +- entity: ScheduleChangesListing_Team + dbName: schedule_changes_listings__teams + constructors: + - name: ScheduleChangesListing_Team + fields: + - name: sclt_schedule_changes_listings_id + reference: + onDelete: cascade + - name: sclt_away_team_id + reference: + onDelete: cascade + - name: sclt_home_team_id + reference: + onDelete: cascade +|] + + + +-- +-- * Pickling +-- + +pickle_away_team :: PU ScheduleChangesListingAwayTeamXml +pickle_away_team = + xpElem "Away_Team" $ + xpWrap (from_tuple, to_tuple) $ + xpPair (xpAttr "AT_ID" xpText) + xpText + where + from_tuple = uncurry ScheduleChangesListingAwayTeamXml + to_tuple t = (away_team_id t, + away_team_name t) + +pickle_home_team :: PU ScheduleChangesListingHomeTeamXml +pickle_home_team = + xpElem "Home_Team" $ + xpWrap (from_tuple, to_tuple) $ + xpPair (xpAttr "HT_ID" xpText) + xpText + where + from_tuple = uncurry ScheduleChangesListingHomeTeamXml + to_tuple t = (home_team_id t, + home_team_name t) + + +pickle_status :: PU ScheduleChangesListingStatus +pickle_status = + xpElem "status" $ + xpWrap (from_tuple, to_tuple) $ + xpPair (xpAttr "numeral" xpInt) + (xpOption xpText) + where + from_tuple = uncurry ScheduleChangesListingStatus + to_tuple s = (db_status_numeral s, + db_status s) + + +pickle_listing :: PU ScheduleChangesListingXml +pickle_listing = + xpElem "SC_Listing" $ + xpWrap (from_tuple, to_tuple) $ + xp11Tuple (xpAttr "type" xpText) + (xpElem "Schedule_ID" xpInt) + (xpElem "Game_Date" xp_date_padded) + (xpElem "Game_Time" xp_time) + (xpElem "Location" xpText) + pickle_away_team + pickle_home_team + (xpElem "vscore" xpInt) + (xpElem "hscore" xpInt) + pickle_status + (xpElem "notes" (xpOption xpText)) + where + from_tuple = uncurryN ScheduleChangesListingXml + to_tuple l = (xml_type l, + xml_schedule_id l, + xml_game_date l, + xml_game_time l, + xml_location l, + xml_away_team l, + xml_home_team l, + xml_vscore l, + xml_hscore l, + xml_listing_status l, + xml_notes l) + +pickle_schedule_change :: PU ScheduleChangeXml +pickle_schedule_change = + xpElem "Schedule_Change" $ + xpWrap (from_tuple, to_tuple) $ + xpPair (xpAttr "Sport" xpText) + (xpList pickle_listing) + where + from_tuple = uncurry ScheduleChangeXml + to_tuple sc = (xml_sc_sport sc, + xml_sc_listings sc) + + +-- | Pickler for the top-level 'Message'. +-- +pickle_message :: PU Message +pickle_message = + xpElem "message" $ + xpWrap (from_tuple, to_tuple) $ + xp6Tuple (xpElem "XML_File_ID" xpInt) + (xpElem "heading" xpText) + (xpElem "category" xpText) + (xpElem "sport" xpText) + (xpList pickle_schedule_change) + (xpElem "time_stamp" xp_time_stamp) + where + from_tuple = uncurryN Message + to_tuple m = (xml_xml_file_id m, + xml_heading m, + xml_category m, + xml_sport m, + xml_schedule_changes m, + xml_time_stamp m) + + + +-- +-- * Tests +-- +-- | A list of all tests for this module. +-- +schedule_changes_tests :: TestTree +schedule_changes_tests = + testGroup + "ScheduleChanges tests" + [ test_on_delete_cascade, + test_pickle_of_unpickle_is_identity, + test_unpickle_succeeds ] + +-- | If we unpickle something and then pickle it, we should wind up +-- with the same thing we started with. WARNING: success of this +-- test does not mean that unpickling succeeded. +-- +test_pickle_of_unpickle_is_identity :: TestTree +test_pickle_of_unpickle_is_identity = + testCase "pickle composed with unpickle is the identity" $ do + let path = "test/xml/Schedule_Changes_XML.xml" + (expected, actual) <- pickle_unpickle pickle_message path + actual @?= expected + + + +-- | Make sure we can actually unpickle these things. +-- +test_unpickle_succeeds :: TestTree +test_unpickle_succeeds = + testCase "unpickling succeeds" $ do + let path = "test/xml/Schedule_Changes_XML.xml" + actual <- unpickleable path pickle_message + + let expected = True + actual @?= expected + + + +-- | Make sure everything gets deleted when we delete the top-level +-- record. +-- +test_on_delete_cascade :: TestTree +test_on_delete_cascade = + testCase "deleting auto_racing_results deletes its children" $ do + let path = "test/xml/Schedule_Changes_XML.xml" + results <- unsafe_unpickle path pickle_message + let a = undefined :: Team + let b = undefined :: ScheduleChanges + let c = undefined :: ScheduleChangesListing + let d = undefined :: ScheduleChangesListing_Team + + actual <- withSqliteConn ":memory:" $ runDbConn $ do + runMigration silentMigrationLogger $ do + migrate a + migrate b + migrate c + migrate d + _ <- dbimport results + deleteAll b + count_a <- countAll a + count_b <- countAll b + count_c <- countAll c + count_d <- countAll d + return $ sum [count_a, count_b, count_c, count_d] + let expected = 12 -- There are 16 team elements, but 4 are dupes, + -- so 12 unique teams should be left over. + actual @?= expected diff --git a/test/TestSuite.hs b/test/TestSuite.hs index a67ee2a..e153076 100644 --- a/test/TestSuite.hs +++ b/test/TestSuite.hs @@ -9,6 +9,7 @@ import TSN.XML.InjuriesDetail ( injuries_detail_tests ) import TSN.XML.JFile ( jfile_tests ) import TSN.XML.News ( news_tests ) import TSN.XML.Odds ( odds_tests ) +import TSN.XML.ScheduleChanges ( schedule_changes_tests ) import TSN.XML.Scores ( scores_tests ) import TSN.XML.SportInfo ( sport_info_tests ) import TSN.XML.Weather ( weather_tests ) @@ -25,6 +26,7 @@ tests = testGroup jfile_tests, news_tests, odds_tests, + schedule_changes_tests, scores_tests, sport_info_tests, weather_tests ] diff --git a/test/shell/import-duplicates.test b/test/shell/import-duplicates.test index a496248..1cefc53 100644 --- a/test/shell/import-duplicates.test +++ b/test/shell/import-duplicates.test @@ -15,15 +15,15 @@ rm -f shelltest.sqlite3 # Heartbeat.xml that doesn't really count. find ./test/xml -maxdepth 1 -name '*.xml' | wc -l >>> -20 +21 >>>= 0 # Run the imports again; we should get complaints about the duplicate -# xml_file_ids. There are 2 errors for each violation, so we expect 2*19 +# xml_file_ids. There are 2 errors for each violation, so we expect 2*20 # occurrences of the string 'ERROR'. ./dist/build/htsn-import/htsn-import -c 'shelltest.sqlite3' test/xml/*.xml 2>&1 | grep ERROR | wc -l >>> -38 +40 >>>= 0 # Finally, clean up after ourselves. -- 2.43.2