]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/TSN/XML/News.hs
Finish documenting TSN.XML.News and fix all compiler warnings therein.
[dead/htsn-import.git] / src / TSN / XML / News.hs
1 {-# LANGUAGE DeriveDataTypeable #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE GADTs #-}
4 {-# LANGUAGE QuasiQuotes #-}
5 {-# LANGUAGE RecordWildCards #-}
6 {-# LANGUAGE StandaloneDeriving #-}
7 {-# LANGUAGE TemplateHaskell #-}
8 {-# LANGUAGE TypeFamilies #-}
9
10 -- | Parse TSN XML for the DTD "newsxml.dtd". Each document contains a
11 -- root element \<message\> that contains an entire news item.
12 --
13 module TSN.XML.News (
14 news_tests,
15 pickle_message,
16 -- * WARNING: these are private but exported to silence warnings
17 News_NewsLocationConstructor(..),
18 News_NewsTeamConstructor(..),
19 NewsConstructor(..),
20 NewsLocationConstructor(..),
21 NewsTeamConstructor(..) )
22 where
23
24 -- System imports.
25 import Data.Data ( Data, constrFields, dataTypeConstrs, dataTypeOf )
26 import Data.List.Utils ( join, split )
27 import Data.Tuple.Curry ( uncurryN )
28 import Data.Typeable ( Typeable )
29 import Database.Groundhog (
30 insert_,
31 migrate )
32 import Database.Groundhog.Core ( DefaultKey )
33 import Database.Groundhog.TH (
34 defaultCodegenConfig,
35 groundhog,
36 mkPersist )
37 import Test.Tasty ( TestTree, testGroup )
38 import Test.Tasty.HUnit ( (@?=), testCase )
39 import Text.XML.HXT.Core (
40 PU,
41 xp13Tuple,
42 xpAttr,
43 xpElem,
44 xpInt,
45 xpList,
46 xpOption,
47 xpPair,
48 xpText,
49 xpTriple,
50 xpWrap )
51
52 -- Local imports.
53 import TSN.Codegen (
54 tsn_codegen_config,
55 tsn_db_field_namer ) -- Used in a test
56 import TSN.DbImport ( DbImport(..), ImportResult(..), run_dbmigrate )
57 import TSN.XmlImport ( XmlImport(..) )
58 import Xml ( FromXml(..), pickle_unpickle, unpickleable )
59
60
61
62 -- | The database type for teams as they show up in the news.
63 --
64 data NewsTeam =
65 NewsTeam { team_name :: String }
66 deriving (Eq, Show)
67
68 -- | This is needed to define the XmlImport instance for NewsTeam; it
69 -- basically says that the DB representation is the same as the XML
70 -- representation.
71 --
72 instance FromXml NewsTeam where
73 type Db NewsTeam = NewsTeam
74 from_xml = id
75
76 -- | Allow us to call 'insert_xml' on the XML representation of
77 -- NewsTeams.
78 --
79 instance XmlImport NewsTeam
80
81
82 -- | Mapping between News records and NewsTeam records in the
83 -- database.
84 --
85 data News_NewsTeam = News_NewsTeam
86 (DefaultKey News)
87 (DefaultKey NewsTeam)
88
89
90 -- | The database type for locations as they show up in the news.
91 --
92 data NewsLocation =
93 NewsLocation {
94 city :: Maybe String,
95 state :: Maybe String,
96 country :: String }
97 deriving (Eq, Show)
98
99 -- | This is needed to define the XmlImport instance for NewsLocation; it
100 -- basically says that the DB representation is the same as the XML
101 -- representation.
102 --
103 instance FromXml NewsLocation where
104 type Db NewsLocation = NewsLocation
105 from_xml = id
106
107 -- | Allow us to call 'insert_xml' on the XML representation of
108 -- NewsLocations.
109 --
110 instance XmlImport NewsLocation
111
112
113 -- | Mapping between News records and NewsLocation records in the
114 -- database.
115 --
116 data News_NewsLocation = News_NewsLocation
117 (DefaultKey News)
118 (DefaultKey NewsLocation)
119
120
121 -- | The msg_id child of <message> contains an event_id attribute; we
122 -- embed it into the 'News' type. We (pointlessly) use the "db_"
123 -- prefix here so that the two names don't collide on "id" when
124 -- Groundhog is creating its fields using our field namer.
125 data MsgId =
126 MsgId {
127 db_msg_id :: Int,
128 db_event_id :: Maybe Int }
129 deriving (Data, Eq, Show, Typeable)
130
131
132 -- | The XML representation of a news item (message).
133 --
134 data Message =
135 Message {
136 xml_xml_file_id :: Int,
137 xml_heading :: String,
138 xml_mid :: MsgId,
139 xml_category :: String,
140 xml_sport :: String,
141 xml_url :: Maybe String,
142 xml_teams :: [NewsTeam],
143 xml_locations :: [NewsLocation],
144 xml_sms :: String,
145 xml_editor :: Maybe String,
146 xml_text :: Maybe String, -- Text and continue seem to show up in pairs,
147 xml_continue :: Maybe String, -- either both present or both missing.
148 xml_time_stamp :: String }
149 deriving (Eq, Show)
150
151
152 -- | The database representation of a news item. We drop several
153 -- uninteresting fields from 'Message', and omit the list fields which
154 -- will be represented as join tables.
155 --
156 data News =
157 News {
158 db_mid :: MsgId,
159 db_sport :: String,
160 db_url :: Maybe String,
161 db_sms :: String,
162 db_editor :: Maybe String,
163 db_text :: Maybe String,
164 db_continue :: Maybe String }
165 deriving (Data, Eq, Show, Typeable)
166
167
168 -- | Convert the XML representation 'Message' to the database
169 -- representation 'News'.
170 --
171 instance FromXml Message where
172 type Db Message = News
173
174 -- | We use a record wildcard so GHC doesn't complain that we never
175 -- used the field names.
176 --
177 from_xml Message{..} = News { db_mid = xml_mid,
178 db_sport = xml_sport,
179 db_url = xml_url,
180 db_sms = xml_sms,
181 db_editor = xml_editor,
182 db_text = xml_text,
183 db_continue = xml_continue }
184
185 -- | This lets us call 'insert_xml' on a 'Message'.
186 --
187 instance XmlImport Message
188
189 -- | Define 'dbmigrate' and 'dbimport' for 'Message's. The import is
190 -- slightly non-generic because of our 'News_NewsTeam' and
191 -- 'News_NewsLocation' join tables.
192 --
193 instance DbImport Message where
194 dbmigrate _ =
195 run_dbmigrate $ do
196 migrate (undefined :: NewsTeam)
197 migrate (undefined :: NewsLocation)
198 migrate (undefined :: News)
199 migrate (undefined :: News_NewsTeam)
200 migrate (undefined :: News_NewsLocation)
201
202 dbimport message = do
203 -- Insert the message and acquire its primary key (unique ID)
204 news_id <- insert_xml message
205
206 -- And insert each one into its own table. We use insert_xml_or_select
207 -- because we know that most teams will already exist, and we
208 -- want to get back the id for the existing team when
209 -- there's a collision.
210 nt_ids <- mapM insert_xml_or_select (xml_teams message)
211
212 -- Now that the teams have been inserted, create
213 -- news__news_team records mapping beween the two.
214 let news_news_teams = map (News_NewsTeam news_id) nt_ids
215 mapM_ insert_ news_news_teams
216
217 -- Do all of that over again for the NewsLocations.
218 loc_ids <- mapM insert_xml_or_select (xml_locations message)
219 let news_news_locations = map (News_NewsLocation news_id) loc_ids
220 mapM_ insert_ news_news_locations
221
222 return ImportSucceeded
223
224
225 -- | These types don't have special XML representations or field name
226 -- collisions so we use the defaultCodegenConfig and give their
227 -- fields nice simple names.
228 mkPersist defaultCodegenConfig [groundhog|
229 - entity: NewsTeam
230 dbName: news_teams
231 constructors:
232 - name: NewsTeam
233 uniques:
234 - name: unique_news_team
235 type: constraint
236 fields: [team_name]
237
238 - entity: NewsLocation
239 dbName: news_locations
240 constructors:
241 - name: NewsLocation
242 uniques:
243 - name: unique_news_location
244 type: constraint
245 fields: [city, state, country]
246
247 |]
248
249
250 -- | These types have fields with e.g. db_ and xml_ prefixes, so we
251 -- use our own codegen to peel those off before naming the columns.
252 --
253 mkPersist tsn_codegen_config [groundhog|
254 - entity: News
255 dbName: news
256 constructors:
257 - name: News
258 fields:
259 - name: db_mid
260 embeddedType:
261 - {name: msg_id, dbName: msg_id}
262 - {name: event_id, dbName: event_id}
263 - embedded: MsgId
264 fields:
265 - name: db_msg_id
266 dbName: msg_id
267 - name: db_event_id
268 dbName: event_id
269
270
271 - entity: News_NewsTeam
272 dbName: news__news_teams
273 constructors:
274 - name: News_NewsTeam
275 fields:
276 - name: news_NewsTeam0
277 dbName: news_id
278 - name: news_NewsTeam1
279 dbName: news_teams_id
280
281 - entity: News_NewsLocation
282 dbName: news__news_locations
283 constructors:
284 - name: News_NewsLocation
285 fields:
286 - name: news_NewsLocation0
287 dbName: news_id
288 - name: news_NewsLocation1
289 dbName: news_locations_id
290 |]
291
292
293 -- | Convert a 'NewsTeam' to/from XML.
294 --
295 pickle_news_team :: PU NewsTeam
296 pickle_news_team =
297 xpElem "team" $
298 xpWrap (from_string, to_string) xpText
299 where
300 to_string :: NewsTeam -> String
301 to_string = team_name
302
303 from_string :: String -> NewsTeam
304 from_string = NewsTeam
305
306
307 -- | Convert a 'MsgId' to/from XML.
308 --
309 pickle_msg_id :: PU MsgId
310 pickle_msg_id =
311 xpElem "msg_id" $
312 xpWrap (from_tuple, to_tuple) $
313 xpPair xpInt (xpAttr "EventId" (xpOption xpInt))
314 where
315 from_tuple = uncurryN MsgId
316 to_tuple m = (db_msg_id m, db_event_id m)
317
318
319 -- | Convert a 'NewsLocation' to/from XML.
320 --
321 pickle_location :: PU NewsLocation
322 pickle_location =
323 xpElem "location" $
324 xpWrap (from_tuple, to_tuple) $
325 xpTriple (xpOption (xpElem "city" xpText))
326 (xpOption (xpElem "state" xpText))
327 (xpElem "country" xpText)
328 where
329 from_tuple =
330 uncurryN NewsLocation
331 to_tuple l = (city l, state l, country l)
332
333
334 -- | Convert a 'Message' to/from XML.
335 --
336 pickle_message :: PU Message
337 pickle_message =
338 xpElem "message" $
339 xpWrap (from_tuple, to_tuple) $
340 xp13Tuple (xpElem "XML_File_ID" xpInt)
341 (xpElem "heading" xpText)
342 pickle_msg_id
343 (xpElem "category" xpText)
344 (xpElem "sport" xpText)
345 (xpElem "url" $ xpOption xpText)
346 (xpList pickle_news_team)
347 (xpList pickle_location)
348 (xpElem "SMS" xpText)
349 (xpOption (xpElem "Editor" xpText))
350 (xpOption (xpElem "text" xpText))
351 pickle_continue
352 (xpElem "time_stamp" xpText)
353 where
354 from_tuple = uncurryN Message
355 to_tuple m = (xml_xml_file_id m, -- Verbose,
356 xml_heading m, -- but
357 xml_mid m, -- eliminates
358 xml_category m, -- GHC
359 xml_sport m, -- warnings
360 xml_url m, -- .
361 xml_teams m, -- .
362 xml_locations m, -- .
363 xml_sms m,
364 xml_editor m,
365 xml_text m,
366 xml_continue m,
367 xml_time_stamp m)
368
369 -- | We combine all of the \<continue\> elements into one 'String'
370 -- while unpickling and do the reverse while pickling.
371 --
372 pickle_continue :: PU (Maybe String)
373 pickle_continue =
374 xpOption $
375 xpWrap (to_string, from_string) $
376 xpElem "continue" $
377 xpList (xpElem "P" xpText)
378 where
379 from_string :: String -> [String]
380 from_string = split "\n"
381
382 to_string :: [String] -> String
383 to_string = join "\n"
384
385
386
387 -- * Tasty Tests
388 news_tests :: TestTree
389 news_tests =
390 testGroup
391 "News tests"
392 [ test_news_fields_have_correct_names,
393 test_pickle_of_unpickle_is_identity,
394 test_unpickle_succeeds ]
395
396
397 test_news_fields_have_correct_names :: TestTree
398 test_news_fields_have_correct_names =
399 testCase "news fields get correct database names" $
400 mapM_ check (zip actual expected)
401 where
402 -- This is cool, it uses the (derived) Data instance of
403 -- News.News to get its constructor names.
404 field_names :: [String]
405 field_names =
406 constrFields . head $ dataTypeConstrs $ dataTypeOf (undefined :: News)
407
408 expected :: [String]
409 expected =
410 map (\x -> tsn_db_field_namer "herp" "derp" 8675309 x 90210) field_names
411
412 actual :: [String]
413 actual = ["mid", "sport", "url", "sms", "editor", "text", "continue"]
414
415 check (x,y) = (x @?= y)
416
417
418 -- | Warning, succeess of this test does not mean that unpickling
419 -- succeeded.
420 test_pickle_of_unpickle_is_identity :: TestTree
421 test_pickle_of_unpickle_is_identity = testGroup "pickle-unpickle tests"
422 [ check "pickle composed with unpickle is the identity"
423 "test/xml/newsxml.xml",
424
425 check "pickle composed with unpickle is the identity (with Editor)"
426 "test/xml/newsxml-with-editor.xml" ]
427 where
428 check desc path = testCase desc $ do
429 (expected, actual) <- pickle_unpickle pickle_message path
430 actual @?= expected
431
432
433 test_unpickle_succeeds :: TestTree
434 test_unpickle_succeeds = testGroup "unpickle tests"
435 [ check "unpickling succeeds"
436 "test/xml/newsxml.xml",
437
438 check "unpickling succeeds (with Editor)"
439 "test/xml/newsxml-with-editor.xml" ]
440 where
441 check desc path = testCase desc $ do
442 actual <- unpickleable path pickle_message
443 let expected = True
444 actual @?= expected