From: Michael Orlitzky Date: Sat, 28 Dec 2013 03:14:29 +0000 (-0500) Subject: Implement database insertion (generically!). X-Git-Tag: 0.0.1~157 X-Git-Url: https://gitweb.michael.orlitzky.com/?a=commitdiff_plain;h=767dc8dbacaf6dcdefd268c7c8c08cd0c23c3391;p=dead%2Fhtsn-import.git Implement database insertion (generically!). --- diff --git a/src/Main.hs b/src/Main.hs index 2ca3a83..11cbfae 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -1,60 +1,119 @@ -{-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE OverloadedStrings #-} - module Main where ---import Control.Monad.IO.Class ( liftIO ) -import Data.Maybe ( listToMaybe ) ---import Database.Groundhog.TH ---import Database.Groundhog.Sqlite -import Text.Show.Pretty ( ppShow ) +import Control.Arrow ( (&&&), arr, returnA ) +import Database.Groundhog.Core ( PersistEntity ) +import Database.Groundhog.Sqlite ( + defaultMigrationLogger, + insert, + migrate, + runDbConn, + runMigration, + withSqliteConn ) import Text.XML.HXT.Core ( + ArrowXml, + IOStateArrow, SysConfigList, XmlPickler, + XmlTree, + (>>>), + (/>), + getAttrl, + getText, + hasName, no, + readDocument, runX, + unpickleDoc, withPreserveComment, withRemoveWS, + withSubstDTDEntities, withValidate, xpickle, - xunpickleDocument, yes ) +import System.Environment ( getArgs ) +import qualified TSN.Injuries as Injuries ( + Listing, + Message ( listings ) ) +import qualified TSN.InjuriesDetail as InjuriesDetail ( + Listing ( player_listings ), + Message ( listings ), + PlayerListing ) -import qualified TSN.Injuries as Injuries ( Message ) -import qualified TSN.InjuriesDetail as InjuriesDetail ( Message ) +-- | A list of options passed to 'readDocument' when we parse an XML +-- document. We don't validate because the DTDs from TSN are +-- wrong. As a result, we don't want to keep useless DTDs +-- areound. Thus we disable 'withSubstDTDEntities' which, when +-- combined with "withValidate no", prevents HXT from trying to read +-- the DTD at all. +-- parse_opts :: SysConfigList parse_opts = [ withPreserveComment no, withRemoveWS yes, + withSubstDTDEntities no, withValidate no ] -parse_file :: XmlPickler a => FilePath -> IO (Maybe a) -parse_file path = - fmap listToMaybe $ - runX ( xunpickleDocument xpickle parse_opts path ) - --- main_sql :: IO () --- main_sql = --- withSqliteConn "foo.sqlite3" $ runDbConn $ do --- runMigration defaultMigrationLogger $ do --- migrate (undefined :: Injuries.Message) --- migrate (undefined :: Injuries.Listing) - --- msg :: Maybe Injuries.Message <- liftIO $ parse_file --- "test/xml/injuriesxml.xml" --- case msg of --- Nothing -> return () --- Just m -> do --- msg_id <- insert m --- return () + +-- | We put the 'XmlTree' argument last so that it's easy to eta +-- reduce all of the import_foo functions that call this. +-- +import_generic :: (XmlPickler a, PersistEntity b) + => b -- ^ Dummy Listing instance needed for 'migrate' + -> (a -> [b]) -- ^ listings getter + -> XmlTree + -> IO () +import_generic dummy g xml = + withSqliteConn "foo.sqlite3" $ runDbConn $ do + runMigration defaultMigrationLogger $ do + migrate dummy + let msg = unpickleDoc xpickle xml + case msg of + Nothing -> error "Should unpickle!" + Just m -> mapM_ (\l -> insert l) (g m) + +-- | Import TSN.Injuries from an 'XmlTree'. +import_injuries :: XmlTree -> IO () +import_injuries = + import_generic + (undefined :: Injuries.Listing) + Injuries.listings + +-- | Import TSN.InjuriesDetail from an 'XmlTree'. +import_injuries_detail :: XmlTree -> IO () +import_injuries_detail = + import_generic + (undefined :: InjuriesDetail.PlayerListing) + ( (concatMap InjuriesDetail.player_listings) . InjuriesDetail.listings) + +import_file :: FilePath -> IO () +import_file path = do + results <- runX $ readA >>> (doctypeA &&& returnA) >>> (arr import_with_dtd) + case results of + [] -> error "ERROR: Unable to determine DOCTYPE." + (r:_) -> r -- Need to do something with the result or it gets GCed? + -- We do only expect one result fortunately. + where + -- | An arrow that reads a document into an 'XmlTree'. + readA :: IOStateArrow s a XmlTree + readA = readDocument parse_opts path + + -- | An arrow which parses the doctype "SYSTEM" of an 'XmlTree'. + -- We use these to determine the parser to use. + doctypeA :: ArrowXml a => a XmlTree String + doctypeA = getAttrl >>> hasName "doctype-SYSTEM" /> getText + + -- | Takes a 'Doctype', 'XmlTree' pair and uses the 'Doctype' to + -- determine which function to call on the 'XmlTree'. + import_with_dtd :: (String, XmlTree) -> IO () + import_with_dtd (dtd,xml) + | dtd == "injuriesxml.dtd" = import_injuries xml + | dtd == "Injuries_Detail_XML.dtd" = import_injuries_detail xml + | otherwise = error "ERROR: Unrecognized DTD." + main :: IO () main = do - msg1 :: Maybe Injuries.Message <- parse_file "test/xml/injuriesxml.xml" - putStr $ ppShow msg1 - - msg2 :: Maybe InjuriesDetail.Message <- parse_file - "test/xml/Injuries_Detail_XML.xml" - putStr $ ppShow msg2 + args <- getArgs + import_file (args !! 0) diff --git a/src/TSN/Injuries.hs b/src/TSN/Injuries.hs index 4809786..3877c12 100644 --- a/src/TSN/Injuries.hs +++ b/src/TSN/Injuries.hs @@ -13,7 +13,8 @@ -- automatically. The root message is not retained. -- module TSN.Injuries ( - Message ) + Listing, + Message( listings ) ) where import Data.Tuple.Curry ( uncurryN ) @@ -31,7 +32,6 @@ import Text.XML.HXT.Core ( xpWrap ) - data Listing = Listing { team :: String, diff --git a/src/TSN/InjuriesDetail.hs b/src/TSN/InjuriesDetail.hs index 178b8d1..aa4d7c0 100644 --- a/src/TSN/InjuriesDetail.hs +++ b/src/TSN/InjuriesDetail.hs @@ -17,7 +17,9 @@ -- are not retained. -- module TSN.InjuriesDetail ( - Message ) + Listing ( player_listings ), + Message ( listings ), + PlayerListing ) where import Data.Time ( UTCTime ) @@ -77,9 +79,6 @@ mkPersist defaultCodegenConfig [groundhog| dbName: injuries_detail |] ---TODO see if it works witout this ---xpBool :: PU Bool ---xpBool = xpickle pickle_player_listing :: PU PlayerListing pickle_player_listing =