X-Git-Url: http://gitweb.michael.orlitzky.com/?a=blobdiff_plain;f=src%2FMain.hs;h=11cbfae1305c7d446eca0b00168a0c3cc14431f3;hb=2fe07315388ff9d0d6b548bba27ddf25dd692a40;hp=d2e51727f9468b58417c0ecbb7ba71cdefeef324;hpb=3f7a07312dfbdae0d71b2c0d181c93df9ffe53b4;p=dead%2Fhtsn-import.git diff --git a/src/Main.hs b/src/Main.hs index d2e5172..11cbfae 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -1,18 +1,119 @@ module Main where -import Text.Show.Pretty ( ppShow ) -import Text.XML.HXT.Core +import Control.Arrow ( (&&&), arr, returnA ) +import Database.Groundhog.Core ( PersistEntity ) +import Database.Groundhog.Sqlite ( + defaultMigrationLogger, + insert, + migrate, + runDbConn, + runMigration, + withSqliteConn ) +import Text.XML.HXT.Core ( + ArrowXml, + IOStateArrow, + SysConfigList, + XmlPickler, + XmlTree, + (>>>), + (/>), + getAttrl, + getText, + hasName, + no, + readDocument, + runX, + unpickleDoc, + withPreserveComment, + withRemoveWS, + withSubstDTDEntities, + withValidate, + xpickle, + yes ) +import System.Environment ( getArgs ) +import qualified TSN.Injuries as Injuries ( + Listing, + Message ( listings ) ) +import qualified TSN.InjuriesDetail as InjuriesDetail ( + Listing ( player_listings ), + Message ( listings ), + PlayerListing ) -import qualified TSN.Injuries as Injuries + +-- | A list of options passed to 'readDocument' when we parse an XML +-- document. We don't validate because the DTDs from TSN are +-- wrong. As a result, we don't want to keep useless DTDs +-- areound. Thus we disable 'withSubstDTDEntities' which, when +-- combined with "withValidate no", prevents HXT from trying to read +-- the DTD at all. +-- +parse_opts :: SysConfigList +parse_opts = + [ withPreserveComment no, + withRemoveWS yes, + withSubstDTDEntities no, + withValidate no ] + + +-- | We put the 'XmlTree' argument last so that it's easy to eta +-- reduce all of the import_foo functions that call this. +-- +import_generic :: (XmlPickler a, PersistEntity b) + => b -- ^ Dummy Listing instance needed for 'migrate' + -> (a -> [b]) -- ^ listings getter + -> XmlTree + -> IO () +import_generic dummy g xml = + withSqliteConn "foo.sqlite3" $ runDbConn $ do + runMigration defaultMigrationLogger $ do + migrate dummy + let msg = unpickleDoc xpickle xml + case msg of + Nothing -> error "Should unpickle!" + Just m -> mapM_ (\l -> insert l) (g m) + +-- | Import TSN.Injuries from an 'XmlTree'. +import_injuries :: XmlTree -> IO () +import_injuries = + import_generic + (undefined :: Injuries.Listing) + Injuries.listings + +-- | Import TSN.InjuriesDetail from an 'XmlTree'. +import_injuries_detail :: XmlTree -> IO () +import_injuries_detail = + import_generic + (undefined :: InjuriesDetail.PlayerListing) + ( (concatMap InjuriesDetail.player_listings) . InjuriesDetail.listings) + +import_file :: FilePath -> IO () +import_file path = do + results <- runX $ readA >>> (doctypeA &&& returnA) >>> (arr import_with_dtd) + case results of + [] -> error "ERROR: Unable to determine DOCTYPE." + (r:_) -> r -- Need to do something with the result or it gets GCed? + -- We do only expect one result fortunately. + where + -- | An arrow that reads a document into an 'XmlTree'. + readA :: IOStateArrow s a XmlTree + readA = readDocument parse_opts path + + -- | An arrow which parses the doctype "SYSTEM" of an 'XmlTree'. + -- We use these to determine the parser to use. + doctypeA :: ArrowXml a => a XmlTree String + doctypeA = getAttrl >>> hasName "doctype-SYSTEM" /> getText + + -- | Takes a 'Doctype', 'XmlTree' pair and uses the 'Doctype' to + -- determine which function to call on the 'XmlTree'. + import_with_dtd :: (String, XmlTree) -> IO () + import_with_dtd (dtd,xml) + | dtd == "injuriesxml.dtd" = import_injuries xml + | dtd == "Injuries_Detail_XML.dtd" = import_injuries_detail xml + | otherwise = error "ERROR: Unrecognized DTD." main :: IO () main = do - res <- runX ( xunpickleDocument Injuries.pickle_message - [ withValidate no - , withTrace 1 - , withRemoveWS yes - , withPreserveComment no - ] "test/xml/injuriesxml.xml" ) - putStr $ ppShow res + args <- getArgs + import_file (args !! 0)