getText,
hasName,
readDocument,
- runX )
+ runX,
+ unpickleDoc,
+ xpickle)
import Backend ( Backend(..) )
import CommandLine ( get_args )
report_error )
import TSN.DbImport ( DbImport(..), ImportResult(..) )
import qualified TSN.XML.Heartbeat as Heartbeat ( verify )
-import qualified TSN.XML.Injuries as Injuries ( Listing )
-import qualified TSN.XML.InjuriesDetail as InjuriesDetail ( PlayerListing )
-import qualified TSN.XML.News as News ( News )
-import qualified TSN.XML.Odds as Odds ( Odds )
+import qualified TSN.XML.Injuries as Injuries ( Message )
+import qualified TSN.XML.InjuriesDetail as InjuriesDetail ( Message )
+import qualified TSN.XML.News as News ( Message )
+--import qualified TSN.XML.Odds as Odds ( Odds )
import Xml ( DtdName(..), parse_opts )
-> FilePath -- ^ The path of the XML file to import.
- -> IO (Maybe Int) -- ^ If we processed the file, Just the number
- -- of records imported. Otherwise, Nothing.
+ -> IO Bool -- ^ True if we processed the file, False otherwise.
import_file cfg path = do
results <- parse_and_import `catch` exception_handler
case results of
[] -> do
-- One of the arrows returned "nothing."
report_error $ "Unable to determine DTD for file " ++ path ++ "."
- return Nothing
+ return False
(ImportFailed errmsg:_) -> do
report_error errmsg
- return Nothing
+ return False
(ImportSkipped infomsg:_) -> do
-- We processed the message but didn't import anything. Return
-- "success" so that the XML file is deleted.
report_info infomsg
- return $ Just 0
- (ImportSucceeded count:_) -> do
- report_info $ "Successfully imported " ++ (show count) ++
- " records from " ++ path ++ "."
- return $ Just count
+ return True
+ (ImportSucceeded:_) -> do
+ report_info $ "Successfully imported " ++ path ++ "."
+ return True
(ImportUnsupported infomsg:_) -> do
-- For now we return "success" for these too, since we know we don't
-- support a bunch of DTDs and we want them to get deleted.
report_info infomsg
- return $ Just 0
+ return True
where
-- | This will catch *any* exception, even the ones thrown by
-- Haskell's 'error' (which should never occur under normal
-- We special-case the heartbeat so it doesn't have to run in
-- the database monad.
| dtd == "Heartbeat.dtd" = Heartbeat.verify xml
- | otherwise =
+ | otherwise = do
-- We need NoMonomorphismRestriction here.
if backend cfg == Postgres
- then withPostgresqlConn cs $ runDbConn $ importer xml
- else withSqliteConn cs $ runDbConn $ importer xml
+ then withPostgresqlConn cs $ runDbConn $ importer
+ else withSqliteConn cs $ runDbConn $ importer
where
-- | Pull the real connection String out of the configuration.
cs :: String
cs = get_connection_string $ connection_string cfg
+ -- | Convenience; we use this everywhere below in 'importer'.
+ migrate_and_import m = dbmigrate m >> dbimport m
+
importer
- | dtd == "injuriesxml.dtd" =
- dbimport (undefined :: Injuries.Listing)
+ | dtd == "injuriesxml.dtd" = do
+ let m = unpickleDoc xpickle xml :: Maybe Injuries.Message
+ let errmsg = "Could not unpickle injuriesxml."
+ maybe (return $ ImportFailed errmsg) migrate_and_import m
+
+ | dtd == "Injuries_Detail_XML.dtd" = do
+ let m = unpickleDoc xpickle xml :: Maybe InjuriesDetail.Message
+ let errmsg = "Could not unpickle Injuries_Detail_XML."
+ maybe (return $ ImportFailed errmsg) migrate_and_import m
- | dtd == "Injuries_Detail_XML.dtd" =
- dbimport (undefined :: InjuriesDetail.PlayerListing)
- | dtd == "newsxml.dtd" =
- dbimport (undefined :: News.News)
+ | dtd == "newsxml.dtd" = do
+ let m = unpickleDoc xpickle xml :: Maybe News.Message
+ let errmsg = "Could not unpickle newsxml."
+ maybe (return $ ImportFailed errmsg) migrate_and_import m
- | dtd == "Odds_XML.dtd" = undefined
+ -- | dtd == "Odds_XML.dtd" = undefined
- | otherwise = \_ -> do -- Dummy arg simplifies the other cases.
+ | otherwise = do
let infomsg =
"Unrecognized DTD in " ++ path ++ ": " ++ dtd ++ "."
return $ ImportUnsupported infomsg
-- Zip the results with the files list to find out which ones can be
-- deleted.
let result_pairs = zip (OC.xml_files opt_config) results
- let victims = [ (p,c) | (p, Just c) <- result_pairs ]
- let imported_count = sum $ map snd victims
- report_info $ "Imported " ++ (show imported_count) ++ " records total."
- mapM_ ((kill True) . fst) victims
+ let victims = [ p | (p, True) <- result_pairs ]
+ let imported_count = length victims
+ report_info $ "Imported " ++ (show imported_count) ++ " document(s) total."
+ mapM_ (kill True) victims
where
-- | Wrap these two actions into one function so that we don't
where
import Control.Monad.IO.Class ( MonadIO )
-import Database.Groundhog (
- defaultMigrationLogger,
- insert,
- migrate,
- runMigration )
-import Database.Groundhog.Core ( PersistBackend, PersistEntity )
-import Text.XML.HXT.Core (
- XmlPickler,
- XmlTree,
- unpickleDoc,
- xpickle )
+import Database.Groundhog.Core ( PersistBackend )
+import TSN.XmlImport ( XmlImport(..) )
--- | The type that will be returned from every file import attempt. If
--- there was an error, its description will be wrapped in an Err. If
--- we successfully imported records, the number of records imported
--- will be wrapped in a Succ.
---
--- Anything else will be wrapped in a "Info" constructor;
--- i.e. somewhere between success and failure. This is like an
--- 'Either' with three choices. A "Info" return value means that
--- the XML document *was* processed, so it should be removed.
+-- | The type that will be returned from every file import attempt.
--
data ImportResult =
ImportFailed String -- ^ Failure with an error message.
| ImportSkipped String -- ^ We processed the file, but didn't import it.
-- The reason is contained in the second field.
- | ImportSucceeded Int -- ^ We did import records, and here's how many.
+ | ImportSucceeded -- ^ We did import records.
| ImportUnsupported String -- ^ We didn't know how to process this file.
-- The second field should contain info.
-- | Instances of this type know how to insert themselves into a
-- Groundhog database.
+--
class DbImport a where
- dbimport :: (MonadIO m, PersistBackend m)
- => a
- -> XmlTree
- -> m ImportResult
+ -- | Import an instance of type @a@.
+ dbimport :: (PersistBackend m) => a -> m ImportResult
+ -- | This must migrate *all* stuffs that can potentially be
+ -- created/used by the type @a@.
+ dbmigrate :: (MonadIO m, PersistBackend m) => a -> m ()
--- | We put the 'Configuration' and 'XmlTree' arguments last so that
--- it's easy to eta reduce all of the import_foo functions that call
--- this.
---
-import_generic :: (XmlPickler a, MonadIO m, PersistEntity b, PersistBackend m)
- => (a -> [b]) -- ^ listings getter
- -> b -- ^ Dummy Listing instance needed for 'migrate'
- -> XmlTree
- -> m ImportResult -- ^ Return the number of records inserted.
-import_generic g dummy xml = do
- runMigration defaultMigrationLogger $ migrate dummy
- let root_element = unpickleDoc xpickle xml
- case root_element of
- Nothing -> return $
- ImportFailed "Could not unpickle document in import_generic."
- Just elt -> do
- ids <- mapM insert (g elt)
- return $ ImportSucceeded (length ids)
+dbimport_generic :: (XmlImport a, MonadIO m, PersistBackend m)
+ => a
+ -> m ImportResult
+dbimport_generic x = insert_xml x >> return ImportSucceeded
-- automatically. The root message is not retained.
--
module TSN.XML.Injuries (
- Listing,
- Message( listings ),
+ Message,
injuries_tests )
where
import Data.Data ( Data )
import Data.Typeable ( Typeable )
-import Database.Groundhog()
+import Database.Groundhog (
+ defaultMigrationLogger,
+ migrate,
+ runMigration )
import Database.Groundhog.TH (
defaultCodegenConfig,
groundhog,
xpWrap )
-import TSN.DbImport ( DbImport(..), import_generic )
-import Xml ( pickle_unpickle, unpickleable )
-
+import TSN.DbImport ( DbImport(..), ImportResult(..) )
+import TSN.XmlImport ( XmlImport(..) )
+import Xml ( FromXml(..), pickle_unpickle, unpickleable )
data InjuriesTeam =
InjuriesTeam {
updated :: Maybe Bool }
deriving (Eq, Show)
+instance FromXml Listing where
+ type Db Listing = Listing
+ from_xml = id
+
+instance XmlImport Listing
+
data Message =
Message {
xml_file_id :: Int,
time_stamp :: String }
deriving (Eq, Show)
+instance DbImport Message where
+ dbimport msg = mapM_ insert_xml (listings msg) >> return ImportSucceeded
+
+ dbmigrate _ =
+ runMigration defaultMigrationLogger $ migrate (undefined :: Listing)
mkPersist defaultCodegenConfig [groundhog|
- entity: Listing
-instance DbImport Listing where
- dbimport = import_generic listings
-
-- * Tasty Tests
injuries_tests :: TestTree
injuries_tests =
-- are not retained.
--
module TSN.XML.InjuriesDetail (
- Listing ( player_listings ),
- Message ( listings ),
- PlayerListing,
+ Message,
injuries_detail_tests )
where
import Data.Time ( UTCTime )
import Data.Tuple.Curry ( uncurryN )
-import Database.Groundhog()
+import Database.Groundhog (
+ defaultMigrationLogger,
+ migrate,
+ runMigration )
import Database.Groundhog.TH (
defaultCodegenConfig,
groundhog,
xpText0,
xpWrap )
-import TSN.DbImport ( DbImport(..), import_generic )
import TSN.Picklers( xp_date, xp_team_id )
-import Xml ( pickle_unpickle, unpickleable )
+import TSN.DbImport ( DbImport(..), ImportResult(..) )
+import TSN.XmlImport ( XmlImport(..) )
+import Xml ( FromXml(..), pickle_unpickle, unpickleable )
data PlayerListing =
}
deriving (Eq, Show)
+instance FromXml PlayerListing where
+ type Db PlayerListing = PlayerListing
+ from_xml = id
+
+instance XmlImport PlayerListing
+
data Listing =
Listing {
listing_team_id :: Int -- ^ Avoid conflict with PlayerListing's team_id
player_listings :: [PlayerListing] }
deriving (Eq, Show)
+
data Message =
Message {
xml_file_id :: Int,
time_stamp :: String }
deriving (Eq, Show)
+instance DbImport Message where
+ dbimport msg = do
+ mapM_ insert_xml (concatMap player_listings $ listings msg)
+ return ImportSucceeded
+
+ dbmigrate _ =
+ runMigration defaultMigrationLogger $ migrate (undefined :: PlayerListing)
mkPersist defaultCodegenConfig [groundhog|
- entity: PlayerListing
instance XmlPickler Message where
xpickle = pickle_message
-instance DbImport PlayerListing where
- dbimport = import_generic ( (concatMap player_listings) . listings)
-
-- * Tasty Tests
injuries_detail_tests :: TestTree
-- root element \<message\> that contains an entire news item.
--
module TSN.XML.News (
- News,
+ Message,
news_tests )
where
import Data.Typeable ( Typeable )
import Database.Groundhog (
defaultMigrationLogger,
- insert,
- insertByAll,
+ insert_,
migrate,
runMigration )
import Database.Groundhog.Core ( DefaultKey )
defaultCodegenConfig,
groundhog,
mkPersist )
-import System.Console.CmdArgs.Default ( Default(..) )
import Test.Tasty ( TestTree, testGroup )
import Test.Tasty.HUnit ( (@?=), testCase )
import Text.XML.HXT.Core (
PU,
XmlPickler(..),
- unpickleDoc,
xp13Tuple,
xpAttr,
xpElem,
tsn_codegen_config,
tsn_db_field_namer ) -- Used in a test
import TSN.DbImport ( DbImport(..), ImportResult(..) )
-import Xml ( ToFromXml(..), pickle_unpickle, unpickleable )
+import TSN.XmlImport ( XmlImport(..) )
+import Xml ( FromXml(..), pickle_unpickle, unpickleable )
NewsTeam { team_name :: String }
deriving (Eq, Show)
+instance FromXml NewsTeam where
+ type Db NewsTeam = NewsTeam
+ from_xml = id
+
+instance XmlImport NewsTeam
+
+
-- | Mapping between News records and NewsTeam records in the
-- database. We name the fields (even though they're never used) for
-- Groundhog's benefit.
country :: String }
deriving (Eq, Show)
+instance FromXml NewsLocation where
+ type Db NewsLocation = NewsLocation
+ from_xml = id
+
+instance XmlImport NewsLocation
+
+
-- | Mapping between News records and NewsLocation records in the
-- database. We name the fields (even though they're never used) for
-- Groundhog's benefit.
db_continue :: Maybe String }
deriving (Data, Eq, Show, Typeable)
-instance ToFromXml News where
- type Xml News = Message
- type Container News = ()
-
- -- Use a record wildcard here so GHC doesn't complain that we never
- -- used our named fields.
- to_xml (News {..}) =
- Message
- def
- def
- db_mid
- def
- db_sport
- db_url
- def
- def
- db_sms
- db_editor
- db_text
- db_continue
- def
+instance FromXml Message where
+ type Db Message = News
-- We don't need the key argument (from_xml_fk) since the XML type
-- contains more information in this case.
from_xml (Message _ _ c _ e f _ _ i j k l _) =
News c e f i j k l
+instance XmlImport Message
+
+instance DbImport Message where
+ dbmigrate _ =
+ runMigration defaultMigrationLogger $ do
+ migrate (undefined :: NewsTeam)
+ migrate (undefined :: NewsLocation)
+ migrate (undefined :: News)
+ migrate (undefined :: News_NewsTeam)
+ migrate (undefined :: News_NewsLocation)
+
+ dbimport message = do
+ -- Insert the message and acquire its primary key (unique ID)
+ news_id <- insert_xml message
+
+ -- And insert each one into its own table. We use insertByAll_xml
+ -- because we know that most teams will already exist, and we
+ -- want to get back a Left (id) for the existing team when
+ -- there's a collision. In fact, if the insert succeeds, we'll
+ -- get a Right (id) back, so we can disregard the Either
+ -- constructor entirely. That's what the (either id id) does.
+ either_nt_ids <- mapM insertByAll_xml (xml_teams message)
+ let nt_ids = map (either id id) either_nt_ids
+
+ -- Now that the teams have been inserted, create
+ -- news__news_team records mapping beween the two.
+ let news_news_teams = map (News_NewsTeam news_id) nt_ids
+ mapM_ insert_ news_news_teams
+
+ -- Do all of that over again for the NewsLocations.
+ either_loc_ids <- mapM insertByAll_xml (xml_locations message)
+ let loc_ids = map (either id id) either_loc_ids
+ let news_news_locations = map (News_NewsLocation news_id) loc_ids
+ mapM_ insert_ news_news_locations
+
+ return ImportSucceeded
+
-- These types don't have special XML representations or field name
-- collisions so we use the defaultCodegenConfig and give their fields
-instance DbImport News where
- dbimport _ xml = do
- runMigration defaultMigrationLogger $ do
- migrate (undefined :: News)
- migrate (undefined :: NewsTeam)
- migrate (undefined :: NewsLocation)
- migrate (undefined :: News_NewsTeam)
- migrate (undefined :: News_NewsLocation)
- let root_element = unpickleDoc xpickle xml :: Maybe Message
- case root_element of
- Nothing -> do
- let errmsg = "Could not unpickle News message in dbimport."
- return $ ImportFailed errmsg
- Just message -> do
- -- Insert the message and acquire its primary key (unique ID)
- news_id <- insert (from_xml message :: News)
-
- -- And insert each one into its own table. We use insertByAll
- -- because we know that most teams will already exist, and we
- -- want to get back a Left (id) for the existing team when
- -- there's a collision. In fact, if the insert succeeds, we'll
- -- get a Right (id) back, so we can disregard the Either
- -- constructor entirely. That's what the (either id id) does.
- either_nt_ids <- mapM insertByAll (xml_teams message)
- let nt_ids = map (either id id) either_nt_ids
-
- -- Now that the teams have been inserted, create
- -- news__news_team records mapping beween the two.
- let news_news_teams = map (News_NewsTeam news_id) nt_ids
- nnt_ids <- mapM insert news_news_teams
-
-
- -- Do all of that over again for the NewsLocations.
- either_loc_ids <- mapM insertByAll (xml_locations message)
- let loc_ids = map (either id id) either_loc_ids
- let news_news_locations = map (News_NewsLocation news_id) loc_ids
- nnl_ids <- mapM insertByAll news_news_locations
-
- return $ ImportSucceeded (1 + -- for the News
- (length nt_ids) +
- (length loc_ids) +
- (length nnt_ids) +
- (length nnl_ids))
-
-
-- * Tasty Tests
news_tests :: TestTree
news_tests =
--
module Xml (
DtdName(..),
- ToFromXml(..),
+ FromXml(..),
parse_opts,
pickle_unpickle,
unpickleable )
where
import Control.Exception ( SomeException(..), catch )
-import Database.Groundhog ( AutoKey )
import Text.XML.HXT.Core (
(>>>),
(/>),
yes )
--- | A typeclass for types which can be converted into an associated
--- XML type. The story behind this is long, but basically, we need
+-- | A typeclass for XML types that can be converted into an associated
+-- database type. The story behind this is long, but basically, we need
-- to different types for each XML thingie we're going to import: a
-- database type and an XML type. Both Groundhog and HXT are very
-- particular about the types that they can use, and there's no way
-- to reuse e.g. a type that HXT can pickle in Groundhog. So this
--- typeclass gives us a way to get the XML type from the Groundhog
--- type.
+-- typeclass gives us a way to get the database type from the XML
+-- type that we have to define for HXT.
--
--- At first there appears to be an equally-valid approach, getting the
--- Groundhog type from the XML one. But Groundhog won't use type family
--- instances, so here we are.
---
-class ToFromXml a where
- -- | Each instance a must declare its associated XML type (Xml a)
- type Xml a :: *
- type Container a :: *
-
- -- | And provide a function for getting an (Xml a) out of an "a."
- to_xml :: a -> Xml a
-
- -- | And provide a function for getting an "a" out of an (Xml a).
- from_xml :: Xml a -> a
+class FromXml a where
+ -- | Each instance a must declare its associated database type (Db a)
+ type Db a :: *
- -- | Often we need to provide a key to use as a foreign key into
- -- some container. If the instance "belongs" to some other object,
- -- then it might need to be passed a key before it can un-XML
- -- itself. For example, the XML version of 'NewsTeam' doesn't
- -- contain a message ID which is part of its database type.
- from_xml_fk :: AutoKey (Container a) -> Xml a -> a
- from_xml_fk _ = from_xml
+ -- | And provide a function for getting a (Db a) out of an "a".
+ from_xml :: a -> Db a
-- | Represents the DTD filename ("SYSTEM") part of the DOCTYPE