{-# LANGUAGE TypeFamilies #-} -- | General XML stuff. -- module Xml ( DtdName(..), ToFromXml(..), parse_opts, pickle_unpickle, unpickleable ) where import Control.Exception ( SomeException(..), catch ) import Database.Groundhog ( AutoKey ) import Text.XML.HXT.Core ( (>>>), (/>), PU, SysConfigList, XmlPickler(..), hasName, no, readDocument, runX, withPreserveComment, withRemoveWS, withSubstDTDEntities, withValidate, xpickleVal, xunpickleVal, yes ) -- | A typeclass for types which can be converted into an associated -- XML type. The story behind this is long, but basically, we need -- to different types for each XML thingie we're going to import: a -- database type and an XML type. Both Groundhog and HXT are very -- particular about the types that they can use, and there's no way -- to reuse e.g. a type that HXT can pickle in Groundhog. So this -- typeclass gives us a way to get the XML type from the Groundhog -- type. -- -- At first there appears to be an equally-valid approach, getting the -- Groundhog type from the XML one. But Groundhog won't use type family -- instances, so here we are. -- class ToFromXml a where -- | Each instance a must declare its associated XML type (Xml a) type Xml a :: * type Container a :: * -- | And provide a function for getting an (Xml a) out of an "a." to_xml :: a -> Xml a -- | And provide a function for getting an "a" out of an (Xml a). from_xml :: Xml a -> a -- | Often we need to provide a key to use as a foreign key into -- some container. If the instance "belongs" to some other object, -- then it might need to be passed a key before it can un-XML -- itself. For example, the XML version of 'NewsTeam' doesn't -- contain a message ID which is part of its database type. from_xml_fk :: AutoKey (Container a) -> Xml a -> a from_xml_fk _ = from_xml -- | Represents the DTD filename ("SYSTEM") part of the DOCTYPE -- definition. newtype DtdName = DtdName String -- | A list of options passed to 'readDocument' when we parse an XML -- document. We don't validate because the DTDs from TSN are -- wrong. As a result, we don't want to keep useless DTDs -- areound. Thus we disable 'withSubstDTDEntities' which, when -- combined with "withValidate no", prevents HXT from trying to read -- the DTD at all. -- parse_opts :: SysConfigList parse_opts = [ withPreserveComment no, withRemoveWS yes, withSubstDTDEntities no, withValidate no ] -- | Given a root element name and a file path, return both the -- original unpickled root "object" and the one that was constructed -- by pickled and unpickling the original. This is used in a number -- of XML tests which pickle/unpickle and then make sure that the -- output is the same as the input. -- -- We return the object instead of an XmlTree (which would save us -- an unpickle call) because otherwise the type of @a@ in the call -- to 'xpickle' would be ambiguous. By returning some @a@s, we allow -- the caller to annotate its type. -- -- Note that this will happily pickle nothing to nothing and then -- unpickle it back to more nothing. So the fact that the -- before/after results from this function agree does not mean that -- the document was successfully unpickled! -- pickle_unpickle :: XmlPickler a => String -> FilePath -> IO ([a], [a]) pickle_unpickle root_element filepath = do -- We need to check only the root message element since -- readDocument produces a bunch of other junk. expected <- runX $ arr_getobj actual <- runX $ arr_getobj >>> xpickleVal xpickle >>> xunpickleVal xpickle return (expected, actual) where arr_getobj = readDocument parse_opts filepath /> hasName root_element >>> xunpickleVal xpickle -- | Is the given XML file unpickleable? Unpickling will be attempted -- using the @unpickler@ argument. If we unilaterally used the -- generic 'xpickle' function for our unpickler, a type ambiguity -- would result. By taking the unpickler as an argument, we allow -- the caller to indirectly specify a concrete type. -- unpickleable :: XmlPickler a => FilePath -> PU a -> IO Bool unpickleable filepath unpickler = do xmldoc <- try_unpickle `catch` (\(SomeException _) -> return []) return $ (not . null) xmldoc where try_unpickle = runX $ readDocument parse_opts filepath >>> xunpickleVal unpickler