X-Git-Url: http://gitweb.michael.orlitzky.com/?a=blobdiff_plain;f=src%2FXml.hs;h=e8b7c4de311e0e90203dbda322e4bae2d2767676;hb=88b80555c3df3649799c8caa4de8d9c87c50be45;hp=96ba0116e184986b7b0a1942a5d96c002eb860db;hpb=9fff5c185dd7a2c8655815f36b72736d61401e41;p=dead%2Fhtsn-import.git diff --git a/src/Xml.hs b/src/Xml.hs index 96ba011..e8b7c4d 100644 --- a/src/Xml.hs +++ b/src/Xml.hs @@ -4,93 +4,75 @@ -- module Xml ( DtdName(..), - ToFromXml(..), + FromXml(..), parse_opts, - pickle_unpickle ) + pickle_unpickle, + unpickleable ) where -import Database.Groundhog ( AutoKey ) +import Control.Exception ( SomeException(..), catch ) import Text.XML.HXT.Core ( (>>>), (/>), + PU, SysConfigList, XmlPickler(..), hasName, - no, readDocument, runX, - withPreserveComment, withRemoveWS, - withSubstDTDEntities, - withValidate, xpickleVal, xunpickleVal, yes ) --- | A typeclass for types which can be converted into an associated --- XML type. The story behind this is long, but basically, we need --- to different types for each XML thingie we're going to import: a --- database type and an XML type. Both Groundhog and HXT are very --- particular about the types that they can use, and there's no way --- to reuse e.g. a type that HXT can pickle in Groundhog. So this --- typeclass gives us a way to get the XML type from the Groundhog --- type. +-- | A typeclass for XML types that can be converted into an +-- associated database type. The story behind this is long, but +-- basically, we need to different types most XML thingies we're +-- going to import: a database type and an XML type. -- --- At first there appears to be an equally-valid approach, getting the --- Groundhog type from the XML one. But Groundhog won't use type family --- instances, so here we are. +-- Both Groundhog and HXT are very particular about the types that +-- they can use, and there's no way to reuse e.g. a type that HXT +-- can pickle in Groundhog. This typeclass gives us a standard way +-- to get the database type from the XML type that we have to define +-- for HXT. -- -class ToFromXml a where - -- | Each instance a must declare its associated XML type (Xml a) - type Xml a :: * - type Container a :: * +class FromXml a where + -- | Each instance @a@ must declare its associated database type @Db a@. + type Db a :: * - -- | And provide a function for getting an (Xml a) out of an "a." - to_xml :: a -> Xml a + -- | And provide a function for getting a @Db a@ out of an @a@. + from_xml :: a -> Db a - -- | And provide a function for getting an "a" out of an (Xml a). - from_xml :: Xml a -> a - -- | Often we need to provide a key to use as a foreign key into - -- some container. If the instance "belongs" to some other object, - -- then it might need to be passed a key before it can un-XML - -- itself. For example, the XML version of 'NewsTeam' doesn't - -- contain a message ID which is part of its database type. - from_xml_fk :: AutoKey (Container a) -> Xml a -> a - from_xml_fk _ = from_xml - - --- | Represents the DTD filename ("SYSTEM") part of the DOCTYPE +-- | Represents the DTD filename (\"SYSTEM\") part of the DOCTYPE -- definition. newtype DtdName = DtdName String -- | A list of options passed to 'readDocument' when we parse an XML --- document. We don't validate because the DTDs from TSN are --- wrong. As a result, we don't want to keep useless DTDs --- areound. Thus we disable 'withSubstDTDEntities' which, when --- combined with "withValidate no", prevents HXT from trying to read --- the DTD at all. +-- document. All cosmetic whitespace should be removed, otherwise we +-- would have to parse whitespace in each (un)pickler. -- parse_opts :: SysConfigList -parse_opts = - [ withPreserveComment no, - withRemoveWS yes, - withSubstDTDEntities no, - withValidate no ] +parse_opts = [ withRemoveWS yes ] -- | Given a root element name and a file path, return both the --- original unpickled root "object" and the one that was constructed --- by pickled and unpickling the original. This is used in a number --- of XML tests which pickle/unpickle and then make sure that the --- output is the same as the input. +-- original unpickled root \"object\" and the one that was +-- constructed by pickled and unpickling the original. This is used +-- in a number of XML tests which pickle/unpickle and then make sure +-- that the output is the same as the input. -- -- We return the object instead of an XmlTree (which would save us -- an unpickle call) because otherwise the type of @a@ in the call -- to 'xpickle' would be ambiguous. By returning some @a@s, we allow -- the caller to annotate its type. -- +-- Note that this will happily pickle nothing to nothing and then +-- unpickle it back to more nothing. So the fact that the +-- before/after results from this function agree does not mean that +-- the document was successfully unpickled! +-- pickle_unpickle :: XmlPickler a => String -> FilePath @@ -98,7 +80,7 @@ pickle_unpickle :: XmlPickler a pickle_unpickle root_element filepath = do -- We need to check only the root message element since -- readDocument produces a bunch of other junk. - expected <- runX $ arr_getobj + expected <- runX arr_getobj actual <- runX $ arr_getobj >>> xpickleVal xpickle @@ -112,3 +94,23 @@ pickle_unpickle root_element filepath = do hasName root_element >>> xunpickleVal xpickle + + + +-- | Is the given XML file unpickleable? Unpickling will be attempted +-- using the @unpickler@ argument. If we unilaterally used the +-- generic 'xpickle' function for our unpickler, a type ambiguity +-- would result. By taking the unpickler as an argument, we allow +-- the caller to indirectly specify a concrete type. +-- +-- Apologies the the name; unpickleable means \"we can unpickle +-- it\", not \"not pickleable.\" +-- +unpickleable :: XmlPickler a => FilePath -> PU a -> IO Bool +unpickleable filepath unpickler = do + xmldoc <- try_unpickle `catch` (\(SomeException _) -> return []) + return $ (not . null) xmldoc + where + try_unpickle = runX $ readDocument parse_opts filepath + >>> + xunpickleVal unpickler