]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/Xml.hs
Rename the ImportResult constructors.
[dead/htsn-import.git] / src / Xml.hs
1 {-# LANGUAGE TypeFamilies #-}
2
3 -- | General XML stuff.
4 --
5 module Xml (
6 DtdName(..),
7 ToFromXml(..),
8 parse_opts,
9 pickle_unpickle )
10 where
11
12 import Database.Groundhog ( AutoKey )
13 import Text.XML.HXT.Core (
14 (>>>),
15 (/>),
16 SysConfigList,
17 XmlPickler(..),
18 hasName,
19 no,
20 readDocument,
21 runX,
22 withPreserveComment,
23 withRemoveWS,
24 withSubstDTDEntities,
25 withValidate,
26 xpickleVal,
27 xunpickleVal,
28 yes )
29
30
31 -- | A typeclass for types which can be converted into an associated
32 -- XML type. The story behind this is long, but basically, we need
33 -- to different types for each XML thingie we're going to import: a
34 -- database type and an XML type. Both Groundhog and HXT are very
35 -- particular about the types that they can use, and there's no way
36 -- to reuse e.g. a type that HXT can pickle in Groundhog. So this
37 -- typeclass gives us a way to get the XML type from the Groundhog
38 -- type.
39 --
40 -- At first there appears to be an equally-valid approach, getting the
41 -- Groundhog type from the XML one. But Groundhog won't use type family
42 -- instances, so here we are.
43 --
44 class ToFromXml a where
45 -- | Each instance a must declare its associated XML type (Xml a)
46 type Xml a :: *
47 type Container a :: *
48
49 -- | And provide a function for getting an (Xml a) out of an "a."
50 to_xml :: a -> Xml a
51
52 -- | And provide a function for getting an "a" out of an (Xml a).
53 from_xml :: Xml a -> a
54
55 -- | Often we need to provide a key to use as a foreign key into
56 -- some container. If the instance "belongs" to some other object,
57 -- then it might need to be passed a key before it can un-XML
58 -- itself. For example, the XML version of 'NewsTeam' doesn't
59 -- contain a message ID which is part of its database type.
60 from_xml_fk :: AutoKey (Container a) -> Xml a -> a
61 from_xml_fk _ = from_xml
62
63
64 -- | Represents the DTD filename ("SYSTEM") part of the DOCTYPE
65 -- definition.
66 newtype DtdName = DtdName String
67
68 -- | A list of options passed to 'readDocument' when we parse an XML
69 -- document. We don't validate because the DTDs from TSN are
70 -- wrong. As a result, we don't want to keep useless DTDs
71 -- areound. Thus we disable 'withSubstDTDEntities' which, when
72 -- combined with "withValidate no", prevents HXT from trying to read
73 -- the DTD at all.
74 --
75 parse_opts :: SysConfigList
76 parse_opts =
77 [ withPreserveComment no,
78 withRemoveWS yes,
79 withSubstDTDEntities no,
80 withValidate no ]
81
82
83 -- | Given a root element name and a file path, return both the
84 -- original unpickled root "object" and the one that was constructed
85 -- by pickled and unpickling the original. This is used in a number
86 -- of XML tests which pickle/unpickle and then make sure that the
87 -- output is the same as the input.
88 --
89 -- We return the object instead of an XmlTree (which would save us
90 -- an unpickle call) because otherwise the type of @a@ in the call
91 -- to 'xpickle' would be ambiguous. By returning some @a@s, we allow
92 -- the caller to annotate its type.
93 --
94 pickle_unpickle :: XmlPickler a
95 => String
96 -> FilePath
97 -> IO ([a], [a])
98 pickle_unpickle root_element filepath = do
99 -- We need to check only the root message element since
100 -- readDocument produces a bunch of other junk.
101 expected <- runX $ arr_getobj
102 actual <- runX $ arr_getobj
103 >>>
104 xpickleVal xpickle
105 >>>
106 xunpickleVal xpickle
107
108 return (expected, actual)
109 where
110 arr_getobj = readDocument parse_opts filepath
111 />
112 hasName root_element
113 >>>
114 xunpickleVal xpickle