]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/Xml.hs
Set "withSubstDTDEntities no" for now.
[dead/htsn-import.git] / src / Xml.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE TypeFamilies #-}
3
4 -- | General XML stuff.
5 --
6 module Xml (
7 DtdName(..),
8 FromXml(..),
9 FromXmlFk(..),
10 ToDb(..),
11 parse_opts,
12 pickle_unpickle,
13 unpickleable )
14 where
15
16 -- System imports.
17 import Control.Exception ( SomeException(..), catch )
18 import Database.Groundhog.Core ( PersistEntity(..) )
19 import Text.XML.HXT.Core (
20 (>>>),
21 (/>),
22 PU,
23 SysConfigList,
24 isElem,
25 no,
26 readDocument,
27 runX,
28 withRemoveWS,
29 withSubstDTDEntities,
30 withValidate,
31 xpickleVal,
32 xunpickleVal,
33 yes )
34
35 -- | Common associated type shared by 'FromXml' and 'FromXmlFk'. This
36 -- basically just forces the client to define the \"database
37 -- version\" of his type.
38 --
39 class ToDb a where
40 -- | Each instance @a@ must declare its associated database type @Db a@.
41 type Db a :: *
42
43 -- | A typeclass for XML types that can be converted into an
44 -- associated database type. The story behind this is long, but
45 -- basically, we need to different types most XML thingies we're
46 -- going to import: a database type and an XML type.
47 --
48 -- Both Groundhog and HXT are very particular about the types that
49 -- they can use, and there's no way to reuse e.g. a type that HXT
50 -- can pickle in Groundhog. This typeclass gives us a standard way
51 -- to get the database type from the XML type that we have to define
52 -- for HXT.
53 --
54 class (ToDb a) => FromXml a where
55 -- | A function for getting a @Db a@ out of an @a@.
56 from_xml :: a -> Db a
57
58
59 -- | Some database types cannot be constructed from the XML type
60 -- alone; they must be supplied a foreign key to a parent object
61 -- first. Members of this class can be converted from an XML
62 -- representation to a database representation in this manner.
63 --
64 class (ToDb a) => FromXmlFk a where
65 -- | The type of our parent object, i.e. to the type to whom our
66 -- foreign key will point.
67 type Parent a :: *
68
69 -- | The function that produces a @Db a@ out of a foreign key and an
70 -- @a@. The parameter order makes it easier to map this function
71 -- over a bunch of things.
72 from_xml_fk :: DefaultKey (Parent a) -> a -> Db a
73
74
75 -- | Represents the DTD filename (\"SYSTEM\") part of the DOCTYPE
76 -- definition.
77 newtype DtdName = DtdName String
78
79 -- | A list of options passed to 'readDocument' when we parse an XML
80 -- document. All cosmetic whitespace should be removed, otherwise we
81 -- would have to parse whitespace in each (un)pickler.
82 --
83 parse_opts :: SysConfigList
84 parse_opts = [ withRemoveWS yes,
85 withSubstDTDEntities no,
86 withValidate no ]
87
88
89 -- | Given an @unpickler@ and a @filepath@, attempt to unpickle the
90 -- root element of @filepath@ using @unpickler@ and return both the
91 -- original unpickled object and one constructed by pickling and
92 -- unpickling that original. This is used in a number of XML tests
93 -- which pickle/unpickle and then make sure that the output is the
94 -- same as the input.
95 --
96 -- We return the object instead of an XmlTree (which would save us
97 -- an unpickle call) because otherwise the type of @a@ in the call
98 -- to 'xpickle' would be ambiguous. By returning some @a@s, we allow
99 -- the caller to annotate its type.
100 --
101 -- Note that this will happily pickle nothing to nothing and then
102 -- unpickle it back to more nothing. So the fact that the
103 -- before/after results from this function agree does not mean that
104 -- the document was successfully unpickled!
105 --
106 pickle_unpickle :: PU a -- ^ @unpickler@ returning an @a@
107 -> FilePath -- ^ Path to the document to unpickle.
108 -> IO ([a], [a])
109 pickle_unpickle unpickler filepath = do
110 -- We need to check only the root message element since
111 -- readDocument produces a bunch of other junk.
112 expected <- runX arr_getobj
113 actual <- runX $ arr_getobj
114 >>>
115 xpickleVal unpickler
116 >>>
117 xunpickleVal unpickler
118
119 return (expected, actual)
120 where
121 arr_getobj = readDocument parse_opts filepath
122 />
123 isElem -- Drop the extra junk readDocument pulls in.
124 >>>
125 xunpickleVal unpickler
126
127
128
129 -- | Is the given XML file unpickleable? Unpickling will be attempted
130 -- using the @unpickler@ argument. If we unilaterally used the
131 -- generic 'xpickle' function for our unpickler, a type ambiguity
132 -- would result. By taking the unpickler as an argument, we allow
133 -- the caller to indirectly specify a concrete type.
134 --
135 -- Apologies the the name; unpickleable means \"we can unpickle
136 -- it\", not \"not pickleable.\"
137 --
138 unpickleable :: FilePath -> PU a -> IO Bool
139 unpickleable filepath unpickler = do
140 xmldoc <- try_unpickle `catch` (\(SomeException _) -> return [])
141 return $ (not . null) xmldoc
142 where
143 try_unpickle = runX $ readDocument parse_opts filepath
144 >>>
145 xunpickleVal unpickler