From 9fff5c185dd7a2c8655815f36b72736d61401e41 Mon Sep 17 00:00:00 2001 From: Michael Orlitzky Date: Wed, 1 Jan 2014 14:19:35 -0500 Subject: [PATCH] Rename the ImportResult constructors. Add some code documentation. Wrap the DTD name (String) in a newtype, DtdName. --- src/Main.hs | 99 +++++++++++++++++++++++++++++----------- src/TSN/DbImport.hs | 13 ++++-- src/TSN/XML/Heartbeat.hs | 6 +-- src/TSN/XML/News.hs | 4 +- src/Xml.hs | 6 +++ 5 files changed, 94 insertions(+), 34 deletions(-) diff --git a/src/Main.hs b/src/Main.hs index 7932bf3..06cb22e 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -1,9 +1,9 @@ -{-# LANGUAGE NoMonomorphismRestriction #-} {-# LANGUAGE DoAndIfThenElse #-} +{-# LANGUAGE NoMonomorphismRestriction #-} module Main where -import Control.Arrow ( (&&&), arr, returnA ) +import Control.Arrow ( (&&&), (>>^), arr, returnA ) import Control.Concurrent ( threadDelay ) import Control.Exception ( SomeException, catch ) import Control.Monad ( when ) @@ -46,36 +46,64 @@ import qualified TSN.XML.Heartbeat as Heartbeat ( verify ) import qualified TSN.XML.Injuries as Injuries ( Listing ) import qualified TSN.XML.InjuriesDetail as InjuriesDetail ( PlayerListing ) import qualified TSN.XML.News as News ( Message ) -import Xml ( parse_opts ) - - - -import_file :: Configuration -> FilePath -> IO Bool +import Xml ( DtdName(..), parse_opts ) + + +-- | This is where most of the work happens. This function is called +-- on every file that we would like to import. It determines which +-- importer to use based on the DTD, processes the file, and then +-- returns whether or not any records were imported. If the file was +-- processed, the number of records imported is returned (wrapped in +-- a Just). Otherwise, if the file was not processed, 'Nothing' is +-- returned. +-- +-- Since we are already in arrow world with HXT, the +-- 'import_with_dtd' function is lifted to an 'Arrow' as well with +-- 'arr'. This prevents us from having to do a bunch of unwrapping +-- and rewrapping with the associated error checking. +-- +import_file :: Configuration -- ^ A configuration object needed for the + -- 'backend' and 'connection_string'. + + -> FilePath -- ^ The path of the XML file to import. + + -> IO (Maybe Int) -- ^ If we processed the file, Just the number + -- of records imported. Otherwise, Nothing. import_file cfg path = do results <- parse_and_import `catch` exception_handler case results of - -- If results' is empty, one of the arrows return "nothing." [] -> do + -- One of the arrows returned "nothing." report_error $ "Unable to determine DTD for file " ++ path ++ "." - return False - (Err errmsg:_) -> do + return Nothing + (ImportFailed errmsg:_) -> do report_error errmsg - return False - (Info infomsg:_) -> do + return Nothing + (ImportSkipped infomsg:_) -> do + -- We processed the message but didn't import anything. Return + -- "success" so that the XML file is deleted. report_info infomsg - return True - (Succ count:_) -> do + return $ Just 0 + (ImportSucceeded count:_) -> do report_info $ "Successfully imported " ++ (show count) ++ " records from " ++ path ++ "." - return True + return $ Just count + (ImportUnsupported infomsg:_) -> do + -- For now we return "success" for these too, since we know we don't + -- support a bunch of DTDs and we want them to get deleted. + report_info infomsg + return $ Just 0 where + -- | This will catch *any* exception, even the ones thrown by + -- Haskell's 'error' (which should never occur under normal + -- circumstances). exception_handler :: SomeException -> IO [ImportResult] exception_handler e = do report_error (show e) let errdesc = "Failed to import file " ++ path ++ "." -- Return a nonempty list so we don't claim incorrectly that -- we couldn't parse the DTD. - return [Err errdesc] + return [ImportFailed errdesc] -- | An arrow that reads a document into an 'XmlTree'. readA :: IOStateArrow s a XmlTree @@ -83,8 +111,8 @@ import_file cfg path = do -- | An arrow which parses the doctype "SYSTEM" of an 'XmlTree'. -- We use these to determine the parser to use. - doctypeA :: ArrowXml a => a XmlTree String - doctypeA = getAttrl >>> hasName "doctype-SYSTEM" /> getText + doctypeA :: ArrowXml a => a XmlTree DtdName + doctypeA = getAttrl >>> hasName "doctype-SYSTEM" /> getText >>^ DtdName -- | Combine the arrows above as well as the function below -- (arrowized with 'arr') into an IO action that does everything @@ -99,10 +127,12 @@ import_file cfg path = do >>= sequence - -- | Takes a 'Doctype', 'XmlTree' pair and uses the 'Doctype' to - -- determine which function to call on the 'XmlTree'. - import_with_dtd :: (String, XmlTree) -> IO ImportResult - import_with_dtd (dtd,xml) + -- | Takes a ('DtdName', 'XmlTree') pair and uses the 'DtdName' + -- to determine which function to call on the 'XmlTree'. + import_with_dtd :: (DtdName, XmlTree) -> IO ImportResult + import_with_dtd (DtdName dtd,xml) + -- We special-case the heartbeat so it doesn't have to run in + -- the database monad. | dtd == "Heartbeat.dtd" = Heartbeat.verify xml | otherwise = -- We need NoMonomorphismRestriction here. @@ -127,9 +157,15 @@ import_file cfg path = do | otherwise = \_ -> do -- Dummy arg simplifies the other cases. let infomsg = "Unrecognized DTD in " ++ path ++ ": " ++ dtd ++ "." - return $ Info infomsg + return $ ImportUnsupported infomsg +-- | Entry point of the program. It twiddles some knobs for +-- configuration options and then calls 'import_file' on each XML file +-- given on the command-line. +-- +-- Any file successfully processed is then removed, and we're done. +-- main :: IO () main = do rc_cfg <- OC.from_rc @@ -158,13 +194,22 @@ main = do -- Zip the results with the files list to find out which ones can be -- deleted. let result_pairs = zip (OC.xml_files opt_config) results - let victims = filter (\(_,result) -> result) result_pairs + let victims = [ (p,c) | (p, Just c) <- result_pairs ] + let imported_count = sum $ map snd victims + report_info $ "Imported " ++ (show imported_count) ++ " records total." mapM_ ((kill True) . fst) victims where + -- | Wrap these two actions into one function so that we don't + -- report that the file was removed if the exception handler is + -- run. + remove_and_report path = do + removeFile path + report_info $ "Removed processed file " ++ path ++ "." + + -- | Try to remove @path@ and potentially try again. kill try_again path = do - removeFile path `catchIOError` exception_handler - report_info $ "Removed imported file " ++ path ++ "." + (remove_and_report path) `catchIOError` exception_handler where -- | A wrapper around threadDelay which takes seconds instead of -- microseconds as its argument. @@ -173,6 +218,8 @@ main = do let microseconds = seconds * (10 ^ (6 :: Int)) threadDelay microseconds + -- | If we can't remove the file, report that, and try once + -- more after waiting a few seconds. exception_handler :: IOError -> IO () exception_handler e = do report_error (show e) diff --git a/src/TSN/DbImport.hs b/src/TSN/DbImport.hs index 3646d7c..3aeb29f 100644 --- a/src/TSN/DbImport.hs +++ b/src/TSN/DbImport.hs @@ -26,7 +26,13 @@ import Text.XML.HXT.Core ( -- 'Either' with three choices. A "Info" return value means that -- the XML document *was* processed, so it should be removed. -- -data ImportResult = Err String | Info String | Succ Int +data ImportResult = + ImportFailed String -- ^ Failure with an error message. + | ImportSkipped String -- ^ We processed the file, but didn't import it. + -- The reason is contained in the second field. + | ImportSucceeded Int -- ^ We did import records, and here's how many. + | ImportUnsupported String -- ^ We didn't know how to process this file. + -- The second field should contain info. -- | Instances of this type know how to insert themselves into a -- Groundhog database. @@ -50,7 +56,8 @@ import_generic g dummy xml = do runMigration defaultMigrationLogger $ migrate dummy let root_element = unpickleDoc xpickle xml case root_element of - Nothing -> return $ Err "Could not unpickle document in import_generic." + Nothing -> return $ + ImportFailed "Could not unpickle document in import_generic." Just elt -> do ids <- mapM insert (g elt) - return $ Succ (length ids) + return $ ImportSucceeded (length ids) diff --git a/src/TSN/XML/Heartbeat.hs b/src/TSN/XML/Heartbeat.hs index c408483..6cc4931 100644 --- a/src/TSN/XML/Heartbeat.hs +++ b/src/TSN/XML/Heartbeat.hs @@ -52,9 +52,9 @@ instance XmlPickler Message where verify :: XmlTree -> IO ImportResult verify xml = do let root_element = unpickleDoc xpickle xml :: Maybe Message - case root_element of - Nothing -> return $ Err "Could not unpickle document in import_generic." - Just _ -> return $ Info "Heartbeat received." + return $ case root_element of + Nothing -> ImportFailed "Could not unpickle document in import_generic." + Just _ -> ImportSkipped "Heartbeat received. Thump." -- * Tasty Tests heartbeat_tests :: TestTree diff --git a/src/TSN/XML/News.hs b/src/TSN/XML/News.hs index bc443d2..aa1a01c 100644 --- a/src/TSN/XML/News.hs +++ b/src/TSN/XML/News.hs @@ -315,7 +315,7 @@ instance DbImport Message where case root_element of Nothing -> do let errmsg = "Could not unpickle News message in dbimport." - return $ Err errmsg + return $ ImportFailed errmsg Just message -> do news_id <- insert (from_xml message :: Message) let nts :: [NewsTeam] = map (from_xml_fk news_id) @@ -325,7 +325,7 @@ instance DbImport Message where nt_ids <- mapM insert nts loc_ids <- mapM insert nlocs - return $ Succ (1 + (length nt_ids) + (length loc_ids)) + return $ ImportSucceeded (1 + (length nt_ids) + (length loc_ids)) -- * Tasty Tests diff --git a/src/Xml.hs b/src/Xml.hs index dfb6d05..96ba011 100644 --- a/src/Xml.hs +++ b/src/Xml.hs @@ -3,6 +3,7 @@ -- | General XML stuff. -- module Xml ( + DtdName(..), ToFromXml(..), parse_opts, pickle_unpickle ) @@ -59,6 +60,11 @@ class ToFromXml a where from_xml_fk :: AutoKey (Container a) -> Xml a -> a from_xml_fk _ = from_xml + +-- | Represents the DTD filename ("SYSTEM") part of the DOCTYPE +-- definition. +newtype DtdName = DtdName String + -- | A list of options passed to 'readDocument' when we parse an XML -- document. We don't validate because the DTDs from TSN are -- wrong. As a result, we don't want to keep useless DTDs -- 2.43.2