]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/Xml.hs
Rewrite everything to use XmlImport/DbImport classes making things much more easy...
[dead/htsn-import.git] / src / Xml.hs
1 {-# LANGUAGE TypeFamilies #-}
2
3 -- | General XML stuff.
4 --
5 module Xml (
6 DtdName(..),
7 FromXml(..),
8 parse_opts,
9 pickle_unpickle,
10 unpickleable )
11 where
12
13 import Control.Exception ( SomeException(..), catch )
14 import Text.XML.HXT.Core (
15 (>>>),
16 (/>),
17 PU,
18 SysConfigList,
19 XmlPickler(..),
20 hasName,
21 readDocument,
22 runX,
23 withRemoveWS,
24 xpickleVal,
25 xunpickleVal,
26 yes )
27
28
29 -- | A typeclass for XML types that can be converted into an associated
30 -- database type. The story behind this is long, but basically, we need
31 -- to different types for each XML thingie we're going to import: a
32 -- database type and an XML type. Both Groundhog and HXT are very
33 -- particular about the types that they can use, and there's no way
34 -- to reuse e.g. a type that HXT can pickle in Groundhog. So this
35 -- typeclass gives us a way to get the database type from the XML
36 -- type that we have to define for HXT.
37 --
38 class FromXml a where
39 -- | Each instance a must declare its associated database type (Db a)
40 type Db a :: *
41
42 -- | And provide a function for getting a (Db a) out of an "a".
43 from_xml :: a -> Db a
44
45
46 -- | Represents the DTD filename ("SYSTEM") part of the DOCTYPE
47 -- definition.
48 newtype DtdName = DtdName String
49
50 -- | A list of options passed to 'readDocument' when we parse an XML
51 -- document. All cosmetic whitespace should be removed, otherwise we
52 -- have to parse it in each pickler.
53 --
54 parse_opts :: SysConfigList
55 parse_opts = [ withRemoveWS yes ]
56
57
58 -- | Given a root element name and a file path, return both the
59 -- original unpickled root "object" and the one that was constructed
60 -- by pickled and unpickling the original. This is used in a number
61 -- of XML tests which pickle/unpickle and then make sure that the
62 -- output is the same as the input.
63 --
64 -- We return the object instead of an XmlTree (which would save us
65 -- an unpickle call) because otherwise the type of @a@ in the call
66 -- to 'xpickle' would be ambiguous. By returning some @a@s, we allow
67 -- the caller to annotate its type.
68 --
69 -- Note that this will happily pickle nothing to nothing and then
70 -- unpickle it back to more nothing. So the fact that the
71 -- before/after results from this function agree does not mean that
72 -- the document was successfully unpickled!
73 --
74 pickle_unpickle :: XmlPickler a
75 => String
76 -> FilePath
77 -> IO ([a], [a])
78 pickle_unpickle root_element filepath = do
79 -- We need to check only the root message element since
80 -- readDocument produces a bunch of other junk.
81 expected <- runX arr_getobj
82 actual <- runX $ arr_getobj
83 >>>
84 xpickleVal xpickle
85 >>>
86 xunpickleVal xpickle
87
88 return (expected, actual)
89 where
90 arr_getobj = readDocument parse_opts filepath
91 />
92 hasName root_element
93 >>>
94 xunpickleVal xpickle
95
96
97
98 -- | Is the given XML file unpickleable? Unpickling will be attempted
99 -- using the @unpickler@ argument. If we unilaterally used the
100 -- generic 'xpickle' function for our unpickler, a type ambiguity
101 -- would result. By taking the unpickler as an argument, we allow
102 -- the caller to indirectly specify a concrete type.
103 --
104 unpickleable :: XmlPickler a => FilePath -> PU a -> IO Bool
105 unpickleable filepath unpickler = do
106 xmldoc <- try_unpickle `catch` (\(SomeException _) -> return [])
107 return $ (not . null) xmldoc
108 where
109 try_unpickle = runX $ readDocument parse_opts filepath
110 >>>
111 xunpickleVal unpickler