From ce9fabd584f2e8844b8b1ede9b29bb573e2033f7 Mon Sep 17 00:00:00 2001 From: Michael Orlitzky Date: Fri, 10 Jan 2014 23:10:24 -0500 Subject: [PATCH] Rewrite everything to use XmlImport/DbImport classes making things much more easy to understand. The docs need an update now. --- src/Main.hs | 70 ++++++++++-------- src/TSN/DbImport.hs | 57 ++++----------- src/TSN/XML/Injuries.hs | 28 +++++--- src/TSN/XML/InjuriesDetail.hs | 31 +++++--- src/TSN/XML/News.hs | 129 +++++++++++++++------------------- src/Xml.hs | 37 +++------- 6 files changed, 162 insertions(+), 190 deletions(-) diff --git a/src/Main.hs b/src/Main.hs index ed28f91..6a03800 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -28,7 +28,9 @@ import Text.XML.HXT.Core ( getText, hasName, readDocument, - runX ) + runX, + unpickleDoc, + xpickle) import Backend ( Backend(..) ) import CommandLine ( get_args ) @@ -43,10 +45,10 @@ import Network.Services.TSN.Report ( report_error ) import TSN.DbImport ( DbImport(..), ImportResult(..) ) import qualified TSN.XML.Heartbeat as Heartbeat ( verify ) -import qualified TSN.XML.Injuries as Injuries ( Listing ) -import qualified TSN.XML.InjuriesDetail as InjuriesDetail ( PlayerListing ) -import qualified TSN.XML.News as News ( News ) -import qualified TSN.XML.Odds as Odds ( Odds ) +import qualified TSN.XML.Injuries as Injuries ( Message ) +import qualified TSN.XML.InjuriesDetail as InjuriesDetail ( Message ) +import qualified TSN.XML.News as News ( Message ) +--import qualified TSN.XML.Odds as Odds ( Odds ) import Xml ( DtdName(..), parse_opts ) @@ -68,32 +70,30 @@ import_file :: Configuration -- ^ A configuration object needed for the -> FilePath -- ^ The path of the XML file to import. - -> IO (Maybe Int) -- ^ If we processed the file, Just the number - -- of records imported. Otherwise, Nothing. + -> IO Bool -- ^ True if we processed the file, False otherwise. import_file cfg path = do results <- parse_and_import `catch` exception_handler case results of [] -> do -- One of the arrows returned "nothing." report_error $ "Unable to determine DTD for file " ++ path ++ "." - return Nothing + return False (ImportFailed errmsg:_) -> do report_error errmsg - return Nothing + return False (ImportSkipped infomsg:_) -> do -- We processed the message but didn't import anything. Return -- "success" so that the XML file is deleted. report_info infomsg - return $ Just 0 - (ImportSucceeded count:_) -> do - report_info $ "Successfully imported " ++ (show count) ++ - " records from " ++ path ++ "." - return $ Just count + return True + (ImportSucceeded:_) -> do + report_info $ "Successfully imported " ++ path ++ "." + return True (ImportUnsupported infomsg:_) -> do -- For now we return "success" for these too, since we know we don't -- support a bunch of DTDs and we want them to get deleted. report_info infomsg - return $ Just 0 + return True where -- | This will catch *any* exception, even the ones thrown by -- Haskell's 'error' (which should never occur under normal @@ -135,29 +135,39 @@ import_file cfg path = do -- We special-case the heartbeat so it doesn't have to run in -- the database monad. | dtd == "Heartbeat.dtd" = Heartbeat.verify xml - | otherwise = + | otherwise = do -- We need NoMonomorphismRestriction here. if backend cfg == Postgres - then withPostgresqlConn cs $ runDbConn $ importer xml - else withSqliteConn cs $ runDbConn $ importer xml + then withPostgresqlConn cs $ runDbConn $ importer + else withSqliteConn cs $ runDbConn $ importer where -- | Pull the real connection String out of the configuration. cs :: String cs = get_connection_string $ connection_string cfg + -- | Convenience; we use this everywhere below in 'importer'. + migrate_and_import m = dbmigrate m >> dbimport m + importer - | dtd == "injuriesxml.dtd" = - dbimport (undefined :: Injuries.Listing) + | dtd == "injuriesxml.dtd" = do + let m = unpickleDoc xpickle xml :: Maybe Injuries.Message + let errmsg = "Could not unpickle injuriesxml." + maybe (return $ ImportFailed errmsg) migrate_and_import m + + | dtd == "Injuries_Detail_XML.dtd" = do + let m = unpickleDoc xpickle xml :: Maybe InjuriesDetail.Message + let errmsg = "Could not unpickle Injuries_Detail_XML." + maybe (return $ ImportFailed errmsg) migrate_and_import m - | dtd == "Injuries_Detail_XML.dtd" = - dbimport (undefined :: InjuriesDetail.PlayerListing) - | dtd == "newsxml.dtd" = - dbimport (undefined :: News.News) + | dtd == "newsxml.dtd" = do + let m = unpickleDoc xpickle xml :: Maybe News.Message + let errmsg = "Could not unpickle newsxml." + maybe (return $ ImportFailed errmsg) migrate_and_import m - | dtd == "Odds_XML.dtd" = undefined + -- | dtd == "Odds_XML.dtd" = undefined - | otherwise = \_ -> do -- Dummy arg simplifies the other cases. + | otherwise = do let infomsg = "Unrecognized DTD in " ++ path ++ ": " ++ dtd ++ "." return $ ImportUnsupported infomsg @@ -197,10 +207,10 @@ main = do -- Zip the results with the files list to find out which ones can be -- deleted. let result_pairs = zip (OC.xml_files opt_config) results - let victims = [ (p,c) | (p, Just c) <- result_pairs ] - let imported_count = sum $ map snd victims - report_info $ "Imported " ++ (show imported_count) ++ " records total." - mapM_ ((kill True) . fst) victims + let victims = [ p | (p, True) <- result_pairs ] + let imported_count = length victims + report_info $ "Imported " ++ (show imported_count) ++ " document(s) total." + mapM_ (kill True) victims where -- | Wrap these two actions into one function so that we don't diff --git a/src/TSN/DbImport.hs b/src/TSN/DbImport.hs index 3aeb29f..e0dd134 100644 --- a/src/TSN/DbImport.hs +++ b/src/TSN/DbImport.hs @@ -3,61 +3,32 @@ module TSN.DbImport where import Control.Monad.IO.Class ( MonadIO ) -import Database.Groundhog ( - defaultMigrationLogger, - insert, - migrate, - runMigration ) -import Database.Groundhog.Core ( PersistBackend, PersistEntity ) -import Text.XML.HXT.Core ( - XmlPickler, - XmlTree, - unpickleDoc, - xpickle ) +import Database.Groundhog.Core ( PersistBackend ) +import TSN.XmlImport ( XmlImport(..) ) --- | The type that will be returned from every file import attempt. If --- there was an error, its description will be wrapped in an Err. If --- we successfully imported records, the number of records imported --- will be wrapped in a Succ. --- --- Anything else will be wrapped in a "Info" constructor; --- i.e. somewhere between success and failure. This is like an --- 'Either' with three choices. A "Info" return value means that --- the XML document *was* processed, so it should be removed. +-- | The type that will be returned from every file import attempt. -- data ImportResult = ImportFailed String -- ^ Failure with an error message. | ImportSkipped String -- ^ We processed the file, but didn't import it. -- The reason is contained in the second field. - | ImportSucceeded Int -- ^ We did import records, and here's how many. + | ImportSucceeded -- ^ We did import records. | ImportUnsupported String -- ^ We didn't know how to process this file. -- The second field should contain info. -- | Instances of this type know how to insert themselves into a -- Groundhog database. +-- class DbImport a where - dbimport :: (MonadIO m, PersistBackend m) - => a - -> XmlTree - -> m ImportResult + -- | Import an instance of type @a@. + dbimport :: (PersistBackend m) => a -> m ImportResult + -- | This must migrate *all* stuffs that can potentially be + -- created/used by the type @a@. + dbmigrate :: (MonadIO m, PersistBackend m) => a -> m () --- | We put the 'Configuration' and 'XmlTree' arguments last so that --- it's easy to eta reduce all of the import_foo functions that call --- this. --- -import_generic :: (XmlPickler a, MonadIO m, PersistEntity b, PersistBackend m) - => (a -> [b]) -- ^ listings getter - -> b -- ^ Dummy Listing instance needed for 'migrate' - -> XmlTree - -> m ImportResult -- ^ Return the number of records inserted. -import_generic g dummy xml = do - runMigration defaultMigrationLogger $ migrate dummy - let root_element = unpickleDoc xpickle xml - case root_element of - Nothing -> return $ - ImportFailed "Could not unpickle document in import_generic." - Just elt -> do - ids <- mapM insert (g elt) - return $ ImportSucceeded (length ids) +dbimport_generic :: (XmlImport a, MonadIO m, PersistBackend m) + => a + -> m ImportResult +dbimport_generic x = insert_xml x >> return ImportSucceeded diff --git a/src/TSN/XML/Injuries.hs b/src/TSN/XML/Injuries.hs index f425f89..7ec0a4d 100644 --- a/src/TSN/XML/Injuries.hs +++ b/src/TSN/XML/Injuries.hs @@ -15,14 +15,16 @@ -- automatically. The root message is not retained. -- module TSN.XML.Injuries ( - Listing, - Message( listings ), + Message, injuries_tests ) where import Data.Data ( Data ) import Data.Typeable ( Typeable ) -import Database.Groundhog() +import Database.Groundhog ( + defaultMigrationLogger, + migrate, + runMigration ) import Database.Groundhog.TH ( defaultCodegenConfig, groundhog, @@ -46,9 +48,9 @@ import Text.XML.HXT.Core ( xpWrap ) -import TSN.DbImport ( DbImport(..), import_generic ) -import Xml ( pickle_unpickle, unpickleable ) - +import TSN.DbImport ( DbImport(..), ImportResult(..) ) +import TSN.XmlImport ( XmlImport(..) ) +import Xml ( FromXml(..), pickle_unpickle, unpickleable ) data InjuriesTeam = InjuriesTeam { @@ -64,6 +66,12 @@ data Listing = updated :: Maybe Bool } deriving (Eq, Show) +instance FromXml Listing where + type Db Listing = Listing + from_xml = id + +instance XmlImport Listing + data Message = Message { xml_file_id :: Int, @@ -74,6 +82,11 @@ data Message = time_stamp :: String } deriving (Eq, Show) +instance DbImport Message where + dbimport msg = mapM_ insert_xml (listings msg) >> return ImportSucceeded + + dbmigrate _ = + runMigration defaultMigrationLogger $ migrate (undefined :: Listing) mkPersist defaultCodegenConfig [groundhog| - entity: Listing @@ -144,9 +157,6 @@ instance XmlPickler Message where -instance DbImport Listing where - dbimport = import_generic listings - -- * Tasty Tests injuries_tests :: TestTree injuries_tests = diff --git a/src/TSN/XML/InjuriesDetail.hs b/src/TSN/XML/InjuriesDetail.hs index bb529d0..a7a7da3 100644 --- a/src/TSN/XML/InjuriesDetail.hs +++ b/src/TSN/XML/InjuriesDetail.hs @@ -18,15 +18,16 @@ -- are not retained. -- module TSN.XML.InjuriesDetail ( - Listing ( player_listings ), - Message ( listings ), - PlayerListing, + Message, injuries_detail_tests ) where import Data.Time ( UTCTime ) import Data.Tuple.Curry ( uncurryN ) -import Database.Groundhog() +import Database.Groundhog ( + defaultMigrationLogger, + migrate, + runMigration ) import Database.Groundhog.TH ( defaultCodegenConfig, groundhog, @@ -47,9 +48,10 @@ import Text.XML.HXT.Core ( xpText0, xpWrap ) -import TSN.DbImport ( DbImport(..), import_generic ) import TSN.Picklers( xp_date, xp_team_id ) -import Xml ( pickle_unpickle, unpickleable ) +import TSN.DbImport ( DbImport(..), ImportResult(..) ) +import TSN.XmlImport ( XmlImport(..) ) +import Xml ( FromXml(..), pickle_unpickle, unpickleable ) data PlayerListing = @@ -67,6 +69,12 @@ data PlayerListing = } deriving (Eq, Show) +instance FromXml PlayerListing where + type Db PlayerListing = PlayerListing + from_xml = id + +instance XmlImport PlayerListing + data Listing = Listing { listing_team_id :: Int -- ^ Avoid conflict with PlayerListing's team_id @@ -74,6 +82,7 @@ data Listing = player_listings :: [PlayerListing] } deriving (Eq, Show) + data Message = Message { xml_file_id :: Int, @@ -84,6 +93,13 @@ data Message = time_stamp :: String } deriving (Eq, Show) +instance DbImport Message where + dbimport msg = do + mapM_ insert_xml (concatMap player_listings $ listings msg) + return ImportSucceeded + + dbmigrate _ = + runMigration defaultMigrationLogger $ migrate (undefined :: PlayerListing) mkPersist defaultCodegenConfig [groundhog| - entity: PlayerListing @@ -158,9 +174,6 @@ pickle_message = instance XmlPickler Message where xpickle = pickle_message -instance DbImport PlayerListing where - dbimport = import_generic ( (concatMap player_listings) . listings) - -- * Tasty Tests injuries_detail_tests :: TestTree diff --git a/src/TSN/XML/News.hs b/src/TSN/XML/News.hs index 550801b..8da4329 100644 --- a/src/TSN/XML/News.hs +++ b/src/TSN/XML/News.hs @@ -12,7 +12,7 @@ -- root element \ that contains an entire news item. -- module TSN.XML.News ( - News, + Message, news_tests ) where @@ -22,8 +22,7 @@ import Data.Tuple.Curry ( uncurryN ) import Data.Typeable ( Typeable ) import Database.Groundhog ( defaultMigrationLogger, - insert, - insertByAll, + insert_, migrate, runMigration ) import Database.Groundhog.Core ( DefaultKey ) @@ -31,13 +30,11 @@ import Database.Groundhog.TH ( defaultCodegenConfig, groundhog, mkPersist ) -import System.Console.CmdArgs.Default ( Default(..) ) import Test.Tasty ( TestTree, testGroup ) import Test.Tasty.HUnit ( (@?=), testCase ) import Text.XML.HXT.Core ( PU, XmlPickler(..), - unpickleDoc, xp13Tuple, xpAttr, xpElem, @@ -53,7 +50,8 @@ import TSN.Codegen ( tsn_codegen_config, tsn_db_field_namer ) -- Used in a test import TSN.DbImport ( DbImport(..), ImportResult(..) ) -import Xml ( ToFromXml(..), pickle_unpickle, unpickleable ) +import TSN.XmlImport ( XmlImport(..) ) +import Xml ( FromXml(..), pickle_unpickle, unpickleable ) @@ -62,6 +60,13 @@ data NewsTeam = NewsTeam { team_name :: String } deriving (Eq, Show) +instance FromXml NewsTeam where + type Db NewsTeam = NewsTeam + from_xml = id + +instance XmlImport NewsTeam + + -- | Mapping between News records and NewsTeam records in the -- database. We name the fields (even though they're never used) for -- Groundhog's benefit. @@ -78,6 +83,13 @@ data NewsLocation = country :: String } deriving (Eq, Show) +instance FromXml NewsLocation where + type Db NewsLocation = NewsLocation + from_xml = id + +instance XmlImport NewsLocation + + -- | Mapping between News records and NewsLocation records in the -- database. We name the fields (even though they're never used) for -- Groundhog's benefit. @@ -126,33 +138,51 @@ data News = db_continue :: Maybe String } deriving (Data, Eq, Show, Typeable) -instance ToFromXml News where - type Xml News = Message - type Container News = () - - -- Use a record wildcard here so GHC doesn't complain that we never - -- used our named fields. - to_xml (News {..}) = - Message - def - def - db_mid - def - db_sport - db_url - def - def - db_sms - db_editor - db_text - db_continue - def +instance FromXml Message where + type Db Message = News -- We don't need the key argument (from_xml_fk) since the XML type -- contains more information in this case. from_xml (Message _ _ c _ e f _ _ i j k l _) = News c e f i j k l +instance XmlImport Message + +instance DbImport Message where + dbmigrate _ = + runMigration defaultMigrationLogger $ do + migrate (undefined :: NewsTeam) + migrate (undefined :: NewsLocation) + migrate (undefined :: News) + migrate (undefined :: News_NewsTeam) + migrate (undefined :: News_NewsLocation) + + dbimport message = do + -- Insert the message and acquire its primary key (unique ID) + news_id <- insert_xml message + + -- And insert each one into its own table. We use insertByAll_xml + -- because we know that most teams will already exist, and we + -- want to get back a Left (id) for the existing team when + -- there's a collision. In fact, if the insert succeeds, we'll + -- get a Right (id) back, so we can disregard the Either + -- constructor entirely. That's what the (either id id) does. + either_nt_ids <- mapM insertByAll_xml (xml_teams message) + let nt_ids = map (either id id) either_nt_ids + + -- Now that the teams have been inserted, create + -- news__news_team records mapping beween the two. + let news_news_teams = map (News_NewsTeam news_id) nt_ids + mapM_ insert_ news_news_teams + + -- Do all of that over again for the NewsLocations. + either_loc_ids <- mapM insertByAll_xml (xml_locations message) + let loc_ids = map (either id id) either_loc_ids + let news_news_locations = map (News_NewsLocation news_id) loc_ids + mapM_ insert_ news_news_locations + + return ImportSucceeded + -- These types don't have special XML representations or field name -- collisions so we use the defaultCodegenConfig and give their fields @@ -296,51 +326,6 @@ instance XmlPickler Message where -instance DbImport News where - dbimport _ xml = do - runMigration defaultMigrationLogger $ do - migrate (undefined :: News) - migrate (undefined :: NewsTeam) - migrate (undefined :: NewsLocation) - migrate (undefined :: News_NewsTeam) - migrate (undefined :: News_NewsLocation) - let root_element = unpickleDoc xpickle xml :: Maybe Message - case root_element of - Nothing -> do - let errmsg = "Could not unpickle News message in dbimport." - return $ ImportFailed errmsg - Just message -> do - -- Insert the message and acquire its primary key (unique ID) - news_id <- insert (from_xml message :: News) - - -- And insert each one into its own table. We use insertByAll - -- because we know that most teams will already exist, and we - -- want to get back a Left (id) for the existing team when - -- there's a collision. In fact, if the insert succeeds, we'll - -- get a Right (id) back, so we can disregard the Either - -- constructor entirely. That's what the (either id id) does. - either_nt_ids <- mapM insertByAll (xml_teams message) - let nt_ids = map (either id id) either_nt_ids - - -- Now that the teams have been inserted, create - -- news__news_team records mapping beween the two. - let news_news_teams = map (News_NewsTeam news_id) nt_ids - nnt_ids <- mapM insert news_news_teams - - - -- Do all of that over again for the NewsLocations. - either_loc_ids <- mapM insertByAll (xml_locations message) - let loc_ids = map (either id id) either_loc_ids - let news_news_locations = map (News_NewsLocation news_id) loc_ids - nnl_ids <- mapM insertByAll news_news_locations - - return $ ImportSucceeded (1 + -- for the News - (length nt_ids) + - (length loc_ids) + - (length nnt_ids) + - (length nnl_ids)) - - -- * Tasty Tests news_tests :: TestTree news_tests = diff --git a/src/Xml.hs b/src/Xml.hs index c4d4049..95eddc0 100644 --- a/src/Xml.hs +++ b/src/Xml.hs @@ -4,14 +4,13 @@ -- module Xml ( DtdName(..), - ToFromXml(..), + FromXml(..), parse_opts, pickle_unpickle, unpickleable ) where import Control.Exception ( SomeException(..), catch ) -import Database.Groundhog ( AutoKey ) import Text.XML.HXT.Core ( (>>>), (/>), @@ -27,37 +26,21 @@ import Text.XML.HXT.Core ( yes ) --- | A typeclass for types which can be converted into an associated --- XML type. The story behind this is long, but basically, we need +-- | A typeclass for XML types that can be converted into an associated +-- database type. The story behind this is long, but basically, we need -- to different types for each XML thingie we're going to import: a -- database type and an XML type. Both Groundhog and HXT are very -- particular about the types that they can use, and there's no way -- to reuse e.g. a type that HXT can pickle in Groundhog. So this --- typeclass gives us a way to get the XML type from the Groundhog --- type. +-- typeclass gives us a way to get the database type from the XML +-- type that we have to define for HXT. -- --- At first there appears to be an equally-valid approach, getting the --- Groundhog type from the XML one. But Groundhog won't use type family --- instances, so here we are. --- -class ToFromXml a where - -- | Each instance a must declare its associated XML type (Xml a) - type Xml a :: * - type Container a :: * - - -- | And provide a function for getting an (Xml a) out of an "a." - to_xml :: a -> Xml a - - -- | And provide a function for getting an "a" out of an (Xml a). - from_xml :: Xml a -> a +class FromXml a where + -- | Each instance a must declare its associated database type (Db a) + type Db a :: * - -- | Often we need to provide a key to use as a foreign key into - -- some container. If the instance "belongs" to some other object, - -- then it might need to be passed a key before it can un-XML - -- itself. For example, the XML version of 'NewsTeam' doesn't - -- contain a message ID which is part of its database type. - from_xml_fk :: AutoKey (Container a) -> Xml a -> a - from_xml_fk _ = from_xml + -- | And provide a function for getting a (Db a) out of an "a". + from_xml :: a -> Db a -- | Represents the DTD filename ("SYSTEM") part of the DOCTYPE -- 2.44.2