]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/Xml.hs
Simplify parse_opts now that we can validate against the DTDs.
[dead/htsn-import.git] / src / Xml.hs
1 {-# LANGUAGE TypeFamilies #-}
2
3 -- | General XML stuff.
4 --
5 module Xml (
6 DtdName(..),
7 ToFromXml(..),
8 parse_opts,
9 pickle_unpickle,
10 unpickleable )
11 where
12
13 import Control.Exception ( SomeException(..), catch )
14 import Database.Groundhog ( AutoKey )
15 import Text.XML.HXT.Core (
16 (>>>),
17 (/>),
18 PU,
19 SysConfigList,
20 XmlPickler(..),
21 hasName,
22 readDocument,
23 runX,
24 withRemoveWS,
25 xpickleVal,
26 xunpickleVal,
27 yes )
28
29
30 -- | A typeclass for types which can be converted into an associated
31 -- XML type. The story behind this is long, but basically, we need
32 -- to different types for each XML thingie we're going to import: a
33 -- database type and an XML type. Both Groundhog and HXT are very
34 -- particular about the types that they can use, and there's no way
35 -- to reuse e.g. a type that HXT can pickle in Groundhog. So this
36 -- typeclass gives us a way to get the XML type from the Groundhog
37 -- type.
38 --
39 -- At first there appears to be an equally-valid approach, getting the
40 -- Groundhog type from the XML one. But Groundhog won't use type family
41 -- instances, so here we are.
42 --
43 class ToFromXml a where
44 -- | Each instance a must declare its associated XML type (Xml a)
45 type Xml a :: *
46 type Container a :: *
47
48 -- | And provide a function for getting an (Xml a) out of an "a."
49 to_xml :: a -> Xml a
50
51 -- | And provide a function for getting an "a" out of an (Xml a).
52 from_xml :: Xml a -> a
53
54 -- | Often we need to provide a key to use as a foreign key into
55 -- some container. If the instance "belongs" to some other object,
56 -- then it might need to be passed a key before it can un-XML
57 -- itself. For example, the XML version of 'NewsTeam' doesn't
58 -- contain a message ID which is part of its database type.
59 from_xml_fk :: AutoKey (Container a) -> Xml a -> a
60 from_xml_fk _ = from_xml
61
62
63 -- | Represents the DTD filename ("SYSTEM") part of the DOCTYPE
64 -- definition.
65 newtype DtdName = DtdName String
66
67 -- | A list of options passed to 'readDocument' when we parse an XML
68 -- document. All cosmetic whitespace should be removed, otherwise we
69 -- have to parse it in each pickler.
70 --
71 parse_opts :: SysConfigList
72 parse_opts = [ withRemoveWS yes ]
73
74
75 -- | Given a root element name and a file path, return both the
76 -- original unpickled root "object" and the one that was constructed
77 -- by pickled and unpickling the original. This is used in a number
78 -- of XML tests which pickle/unpickle and then make sure that the
79 -- output is the same as the input.
80 --
81 -- We return the object instead of an XmlTree (which would save us
82 -- an unpickle call) because otherwise the type of @a@ in the call
83 -- to 'xpickle' would be ambiguous. By returning some @a@s, we allow
84 -- the caller to annotate its type.
85 --
86 -- Note that this will happily pickle nothing to nothing and then
87 -- unpickle it back to more nothing. So the fact that the
88 -- before/after results from this function agree does not mean that
89 -- the document was successfully unpickled!
90 --
91 pickle_unpickle :: XmlPickler a
92 => String
93 -> FilePath
94 -> IO ([a], [a])
95 pickle_unpickle root_element filepath = do
96 -- We need to check only the root message element since
97 -- readDocument produces a bunch of other junk.
98 expected <- runX arr_getobj
99 actual <- runX $ arr_getobj
100 >>>
101 xpickleVal xpickle
102 >>>
103 xunpickleVal xpickle
104
105 return (expected, actual)
106 where
107 arr_getobj = readDocument parse_opts filepath
108 />
109 hasName root_element
110 >>>
111 xunpickleVal xpickle
112
113
114
115 -- | Is the given XML file unpickleable? Unpickling will be attempted
116 -- using the @unpickler@ argument. If we unilaterally used the
117 -- generic 'xpickle' function for our unpickler, a type ambiguity
118 -- would result. By taking the unpickler as an argument, we allow
119 -- the caller to indirectly specify a concrete type.
120 --
121 unpickleable :: XmlPickler a => FilePath -> PU a -> IO Bool
122 unpickleable filepath unpickler = do
123 xmldoc <- try_unpickle `catch` (\(SomeException _) -> return [])
124 return $ (not . null) xmldoc
125 where
126 try_unpickle = runX $ readDocument parse_opts filepath
127 >>>
128 xunpickleVal unpickler