]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/Xml.hs
Add and update documentation.
[dead/htsn-import.git] / src / Xml.hs
1 {-# LANGUAGE TypeFamilies #-}
2
3 -- | General XML stuff.
4 --
5 module Xml (
6 DtdName(..),
7 FromXml(..),
8 parse_opts,
9 pickle_unpickle,
10 unpickleable )
11 where
12
13 import Control.Exception ( SomeException(..), catch )
14 import Text.XML.HXT.Core (
15 (>>>),
16 (/>),
17 PU,
18 SysConfigList,
19 XmlPickler(..),
20 hasName,
21 readDocument,
22 runX,
23 withRemoveWS,
24 xpickleVal,
25 xunpickleVal,
26 yes )
27
28
29 -- | A typeclass for XML types that can be converted into an
30 -- associated database type. The story behind this is long, but
31 -- basically, we need to different types most XML thingies we're
32 -- going to import: a database type and an XML type.
33 --
34 -- Both Groundhog and HXT are very particular about the types that
35 -- they can use, and there's no way to reuse e.g. a type that HXT
36 -- can pickle in Groundhog. This typeclass gives us a standard way
37 -- to get the database type from the XML type that we have to define
38 -- for HXT.
39 --
40 class FromXml a where
41 -- | Each instance @a@ must declare its associated database type @Db a@.
42 type Db a :: *
43
44 -- | And provide a function for getting a @Db a@ out of an @a@.
45 from_xml :: a -> Db a
46
47
48 -- | Represents the DTD filename (\"SYSTEM\") part of the DOCTYPE
49 -- definition.
50 newtype DtdName = DtdName String
51
52 -- | A list of options passed to 'readDocument' when we parse an XML
53 -- document. All cosmetic whitespace should be removed, otherwise we
54 -- would have to parse whitespace in each (un)pickler.
55 --
56 parse_opts :: SysConfigList
57 parse_opts = [ withRemoveWS yes ]
58
59
60 -- | Given a root element name and a file path, return both the
61 -- original unpickled root \"object\" and the one that was
62 -- constructed by pickled and unpickling the original. This is used
63 -- in a number of XML tests which pickle/unpickle and then make sure
64 -- that the output is the same as the input.
65 --
66 -- We return the object instead of an XmlTree (which would save us
67 -- an unpickle call) because otherwise the type of @a@ in the call
68 -- to 'xpickle' would be ambiguous. By returning some @a@s, we allow
69 -- the caller to annotate its type.
70 --
71 -- Note that this will happily pickle nothing to nothing and then
72 -- unpickle it back to more nothing. So the fact that the
73 -- before/after results from this function agree does not mean that
74 -- the document was successfully unpickled!
75 --
76 pickle_unpickle :: XmlPickler a
77 => String
78 -> FilePath
79 -> IO ([a], [a])
80 pickle_unpickle root_element filepath = do
81 -- We need to check only the root message element since
82 -- readDocument produces a bunch of other junk.
83 expected <- runX arr_getobj
84 actual <- runX $ arr_getobj
85 >>>
86 xpickleVal xpickle
87 >>>
88 xunpickleVal xpickle
89
90 return (expected, actual)
91 where
92 arr_getobj = readDocument parse_opts filepath
93 />
94 hasName root_element
95 >>>
96 xunpickleVal xpickle
97
98
99
100 -- | Is the given XML file unpickleable? Unpickling will be attempted
101 -- using the @unpickler@ argument. If we unilaterally used the
102 -- generic 'xpickle' function for our unpickler, a type ambiguity
103 -- would result. By taking the unpickler as an argument, we allow
104 -- the caller to indirectly specify a concrete type.
105 --
106 -- Apologies the the name; unpickleable means \"we can unpickle
107 -- it\", not \"not pickleable.\"
108 --
109 unpickleable :: XmlPickler a => FilePath -> PU a -> IO Bool
110 unpickleable filepath unpickler = do
111 xmldoc <- try_unpickle `catch` (\(SomeException _) -> return [])
112 return $ (not . null) xmldoc
113 where
114 try_unpickle = runX $ readDocument parse_opts filepath
115 >>>
116 xunpickleVal unpickler