X-Git-Url: http://gitweb.michael.orlitzky.com/?a=blobdiff_plain;f=src%2FXml.hs;h=d4ea4679cef1bafc974e7d9eede7249b39957491;hb=HEAD;hp=7f641b2fb54d1847215b7733331312c46f5b11b0;hpb=3b91dab2cc716aac332368e707f4ed27f059838a;p=dead%2Fhtsn-import.git diff --git a/src/Xml.hs b/src/Xml.hs index 7f641b2..d4ea467 100644 --- a/src/Xml.hs +++ b/src/Xml.hs @@ -1,92 +1,120 @@ +{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE TypeFamilies #-} -- | General XML stuff. -- module Xml ( + Child(..), DtdName(..), - ToFromXml(..), + FromXml(..), + FromXmlFk(..), + ToDb(..), parse_opts, + parse_opts_novalidate, pickle_unpickle, - unpickleable ) + unpickleable, + unsafe_read_document, + unsafe_read_invalid_document, + unsafe_unpickle ) where +-- System imports. import Control.Exception ( SomeException(..), catch ) -import Database.Groundhog ( AutoKey ) +import Database.Groundhog.Core ( PersistEntity(..) ) import Text.XML.HXT.Core ( (>>>), (/>), PU, SysConfigList, - XmlPickler(..), - hasName, + XmlTree, + isElem, no, readDocument, runX, - withPreserveComment, withRemoveWS, withSubstDTDEntities, withValidate, xpickleVal, + xunpickleDocument, xunpickleVal, yes ) --- | A typeclass for types which can be converted into an associated --- XML type. The story behind this is long, but basically, we need --- to different types for each XML thingie we're going to import: a --- database type and an XML type. Both Groundhog and HXT are very --- particular about the types that they can use, and there's no way --- to reuse e.g. a type that HXT can pickle in Groundhog. So this --- typeclass gives us a way to get the XML type from the Groundhog --- type. + +-- | Common associated type shared by 'FromXml' and 'FromXmlFk'. This +-- basically just forces the client to define the \"database +-- version\" of his type. +-- +class ToDb a where + -- | Each instance @a@ must declare its associated database type @Db a@. + type Db a :: * + +-- | A typeclass for XML types that can be converted into an +-- associated database type. The story behind this is long, but +-- basically, we need to different types most XML thingies we're +-- going to import: a database type and an XML type. -- --- At first there appears to be an equally-valid approach, getting the --- Groundhog type from the XML one. But Groundhog won't use type family --- instances, so here we are. +-- Both Groundhog and HXT are very particular about the types that +-- they can use, and there's no way to reuse e.g. a type that HXT +-- can pickle in Groundhog. This typeclass gives us a standard way +-- to get the database type from the XML type that we have to define +-- for HXT. -- -class ToFromXml a where - -- | Each instance a must declare its associated XML type (Xml a) - type Xml a :: * - type Container a :: * +class (ToDb a) => FromXml a where + -- | A function for getting a @Db a@ out of an @a@. + from_xml :: a -> Db a - -- | And provide a function for getting an (Xml a) out of an "a." - to_xml :: a -> Xml a - -- | And provide a function for getting an "a" out of an (Xml a). - from_xml :: Xml a -> a +-- | A class for XML representations which are children of other +-- elements. The foal is to associate a child XML element with its +-- parent element's database type. This is required to construct the +-- database analogue of @a@ in 'FromXmlFk'. +-- +class Child a where + -- | The type of our parent object, i.e. to the type to whom our + -- foreign key will point. + type Parent a :: * - -- | Often we need to provide a key to use as a foreign key into - -- some container. If the instance "belongs" to some other object, - -- then it might need to be passed a key before it can un-XML - -- itself. For example, the XML version of 'NewsTeam' doesn't - -- contain a message ID which is part of its database type. - from_xml_fk :: AutoKey (Container a) -> Xml a -> a - from_xml_fk _ = from_xml + +-- | Some database types cannot be constructed from the XML type +-- alone; they must be supplied a foreign key to a parent object +-- first. Members of this class can be converted from an XML +-- representation to a database representation in this manner. +-- +class (Child a, ToDb a) => FromXmlFk a where + -- | The function that produces a @Db a@ out of a foreign key and an + -- @a@. The parameter order makes it easier to map this function + -- over a bunch of things. + from_xml_fk :: DefaultKey (Parent a) -> a -> Db a --- | Represents the DTD filename ("SYSTEM") part of the DOCTYPE + +-- | Represents the DTD filename (\"SYSTEM\") part of the DOCTYPE -- definition. newtype DtdName = DtdName String -- | A list of options passed to 'readDocument' when we parse an XML --- document. We don't validate because the DTDs from TSN are --- wrong. As a result, we don't want to keep useless DTDs --- areound. Thus we disable 'withSubstDTDEntities' which, when --- combined with "withValidate no", prevents HXT from trying to read --- the DTD at all. +-- document. All cosmetic whitespace should be removed, otherwise we +-- would have to parse whitespace in each (un)pickler. -- parse_opts :: SysConfigList -parse_opts = - [ withPreserveComment no, - withRemoveWS yes, - withSubstDTDEntities no ] +parse_opts = [ withRemoveWS yes, + withSubstDTDEntities no ] + +-- | Like 'parse_opts' except we don't validate the document against +-- its DTD. This is useful when we need to parse a document that we +-- /know/ is invalid so that we can deliver a better error message. +-- +parse_opts_novalidate :: SysConfigList +parse_opts_novalidate = (withValidate no) : parse_opts --- | Given a root element name and a file path, return both the --- original unpickled root "object" and the one that was constructed --- by pickled and unpickling the original. This is used in a number --- of XML tests which pickle/unpickle and then make sure that the --- output is the same as the input. +-- | Given an @unpickler@ and a @filepath@, attempt to unpickle the +-- root element of @filepath@ using @unpickler@ and return both the +-- original unpickled object and one constructed by pickling and +-- unpickling that original. This is used in a number of XML tests +-- which pickle/unpickle and then make sure that the output is the +-- same as the input. -- -- We return the object instead of an XmlTree (which would save us -- an unpickle call) because otherwise the type of @a@ in the call @@ -98,27 +126,26 @@ parse_opts = -- before/after results from this function agree does not mean that -- the document was successfully unpickled! -- -pickle_unpickle :: XmlPickler a - => String - -> FilePath +pickle_unpickle :: PU a -- ^ @unpickler@ returning an @a@ + -> FilePath -- ^ Path to the document to unpickle. -> IO ([a], [a]) -pickle_unpickle root_element filepath = do +pickle_unpickle unpickler filepath = do -- We need to check only the root message element since -- readDocument produces a bunch of other junk. expected <- runX arr_getobj actual <- runX $ arr_getobj >>> - xpickleVal xpickle + xpickleVal unpickler >>> - xunpickleVal xpickle + xunpickleVal unpickler return (expected, actual) where arr_getobj = readDocument parse_opts filepath /> - hasName root_element + isElem -- Drop the extra junk readDocument pulls in. >>> - xunpickleVal xpickle + xunpickleVal unpickler @@ -128,7 +155,10 @@ pickle_unpickle root_element filepath = do -- would result. By taking the unpickler as an argument, we allow -- the caller to indirectly specify a concrete type. -- -unpickleable :: XmlPickler a => FilePath -> PU a -> IO Bool +-- Apologies the the name; unpickleable means \"we can unpickle +-- it\", not \"not pickleable.\" +-- +unpickleable :: FilePath -> PU a -> IO Bool unpickleable filepath unpickler = do xmldoc <- try_unpickle `catch` (\(SomeException _) -> return []) return $ (not . null) xmldoc @@ -136,3 +166,26 @@ unpickleable filepath unpickler = do try_unpickle = runX $ readDocument parse_opts filepath >>> xunpickleVal unpickler + + +-- | Unpickle from a 'FilePath' using the given pickler. Explode if it +-- doesn't work. +-- +unsafe_unpickle :: FilePath -> PU a -> IO a +unsafe_unpickle filepath unpickler = + fmap head $ runX $ xunpickleDocument unpickler parse_opts filepath + + +-- | Read an XML document from a 'FilePath' into an XmlTree. Explode if it +-- doesn't work. +-- +unsafe_read_document :: FilePath -> IO XmlTree +unsafe_read_document filepath = + fmap head $ runX $ readDocument parse_opts filepath + +-- | The same as 'unsafe_read_document', except it allows you to read +-- documents which don't validate against their DTDs. +-- +unsafe_read_invalid_document :: FilePath -> IO XmlTree +unsafe_read_invalid_document filepath = + fmap head $ runX $ readDocument parse_opts_novalidate filepath