1 {-# LANGUAGE DeriveDataTypeable #-}
2 {-# LANGUAGE FlexibleInstances #-}
4 {-# LANGUAGE QuasiQuotes #-}
5 {-# LANGUAGE RecordWildCards #-}
6 {-# LANGUAGE StandaloneDeriving #-}
7 {-# LANGUAGE TemplateHaskell #-}
8 {-# LANGUAGE TypeFamilies #-}
10 -- | Parse TSN XML for the DTD "newsxml.dtd". Each document contains a
11 -- root element \<message\> that contains an entire news item.
16 -- * WARNING: these are private but exported to silence warnings
17 News_NewsLocationConstructor(..),
18 News_NewsTeamConstructor(..),
20 NewsLocationConstructor(..),
21 NewsTeamConstructor(..) )
25 import Data.Data ( Data, constrFields, dataTypeConstrs, dataTypeOf )
26 import Data.List.Utils ( join, split )
27 import Data.Tuple.Curry ( uncurryN )
28 import Data.Typeable ( Typeable )
29 import Database.Groundhog (
32 import Database.Groundhog.Core ( DefaultKey )
33 import Database.Groundhog.TH (
37 import Test.Tasty ( TestTree, testGroup )
38 import Test.Tasty.HUnit ( (@?=), testCase )
39 import Text.XML.HXT.Core (
55 tsn_db_field_namer ) -- Used in a test
56 import TSN.DbImport ( DbImport(..), ImportResult(..), run_dbmigrate )
57 import TSN.XmlImport ( XmlImport(..) )
58 import Xml ( FromXml(..), pickle_unpickle, unpickleable )
62 -- | The database type for teams as they show up in the news.
65 NewsTeam { team_name :: String }
68 -- | This is needed to define the XmlImport instance for NewsTeam; it
69 -- basically says that the DB representation is the same as the XML
72 instance FromXml NewsTeam where
73 type Db NewsTeam = NewsTeam
76 -- | Allow us to call 'insert_xml' on the XML representation of
79 instance XmlImport NewsTeam
82 -- | Mapping between News records and NewsTeam records in the
85 data News_NewsTeam = News_NewsTeam
90 -- | The database type for locations as they show up in the news.
95 state :: Maybe String,
99 -- | This is needed to define the XmlImport instance for NewsLocation; it
100 -- basically says that the DB representation is the same as the XML
103 instance FromXml NewsLocation where
104 type Db NewsLocation = NewsLocation
107 -- | Allow us to call 'insert_xml' on the XML representation of
110 instance XmlImport NewsLocation
113 -- | Mapping between News records and NewsLocation records in the
116 data News_NewsLocation = News_NewsLocation
118 (DefaultKey NewsLocation)
121 -- | The msg_id child of <message> contains an event_id attribute; we
122 -- embed it into the 'News' type. We (pointlessly) use the "db_"
123 -- prefix here so that the two names don't collide on "id" when
124 -- Groundhog is creating its fields using our field namer.
128 db_event_id :: Maybe Int }
129 deriving (Data, Eq, Show, Typeable)
132 -- | The XML representation of a news item (message).
136 xml_xml_file_id :: Int,
137 xml_heading :: String,
139 xml_category :: String,
141 xml_url :: Maybe String,
142 xml_teams :: [NewsTeam],
143 xml_locations :: [NewsLocation],
145 xml_editor :: Maybe String,
146 xml_text :: Maybe String, -- Text and continue seem to show up in pairs,
147 xml_continue :: Maybe String, -- either both present or both missing.
148 xml_time_stamp :: String }
152 -- | The database representation of a news item. We drop several
153 -- uninteresting fields from 'Message', and omit the list fields which
154 -- will be represented as join tables.
160 db_url :: Maybe String,
162 db_editor :: Maybe String,
163 db_text :: Maybe String,
164 db_continue :: Maybe String }
165 deriving (Data, Eq, Show, Typeable)
168 -- | Convert the XML representation 'Message' to the database
169 -- representation 'News'.
171 instance FromXml Message where
172 type Db Message = News
174 -- | We use a record wildcard so GHC doesn't complain that we never
175 -- used the field names.
177 from_xml Message{..} = News { db_mid = xml_mid,
178 db_sport = xml_sport,
181 db_editor = xml_editor,
183 db_continue = xml_continue }
185 -- | This lets us call 'insert_xml' on a 'Message'.
187 instance XmlImport Message
189 -- | Define 'dbmigrate' and 'dbimport' for 'Message's. The import is
190 -- slightly non-generic because of our 'News_NewsTeam' and
191 -- 'News_NewsLocation' join tables.
193 instance DbImport Message where
196 migrate (undefined :: NewsTeam)
197 migrate (undefined :: NewsLocation)
198 migrate (undefined :: News)
199 migrate (undefined :: News_NewsTeam)
200 migrate (undefined :: News_NewsLocation)
202 dbimport message = do
203 -- Insert the message and acquire its primary key (unique ID)
204 news_id <- insert_xml message
206 -- And insert each one into its own table. We use insert_xml_or_select
207 -- because we know that most teams will already exist, and we
208 -- want to get back the id for the existing team when
209 -- there's a collision.
210 nt_ids <- mapM insert_xml_or_select (xml_teams message)
212 -- Now that the teams have been inserted, create
213 -- news__news_team records mapping beween the two.
214 let news_news_teams = map (News_NewsTeam news_id) nt_ids
215 mapM_ insert_ news_news_teams
217 -- Do all of that over again for the NewsLocations.
218 loc_ids <- mapM insert_xml_or_select (xml_locations message)
219 let news_news_locations = map (News_NewsLocation news_id) loc_ids
220 mapM_ insert_ news_news_locations
222 return ImportSucceeded
225 -- | These types don't have special XML representations or field name
226 -- collisions so we use the defaultCodegenConfig and give their
227 -- fields nice simple names.
228 mkPersist defaultCodegenConfig [groundhog|
234 - name: unique_news_team
238 - entity: NewsLocation
239 dbName: news_locations
243 - name: unique_news_location
245 fields: [city, state, country]
250 -- | These types have fields with e.g. db_ and xml_ prefixes, so we
251 -- use our own codegen to peel those off before naming the columns.
253 mkPersist tsn_codegen_config [groundhog|
261 - {name: msg_id, dbName: msg_id}
262 - {name: event_id, dbName: event_id}
271 - entity: News_NewsTeam
272 dbName: news__news_teams
274 - name: News_NewsTeam
276 - name: news_NewsTeam0
278 - name: news_NewsTeam1
279 dbName: news_teams_id
281 - entity: News_NewsLocation
282 dbName: news__news_locations
284 - name: News_NewsLocation
286 - name: news_NewsLocation0
288 - name: news_NewsLocation1
289 dbName: news_locations_id
293 -- | Convert a 'NewsTeam' to/from XML.
295 pickle_news_team :: PU NewsTeam
298 xpWrap (from_string, to_string) xpText
300 to_string :: NewsTeam -> String
301 to_string = team_name
303 from_string :: String -> NewsTeam
304 from_string = NewsTeam
307 -- | Convert a 'MsgId' to/from XML.
309 pickle_msg_id :: PU MsgId
312 xpWrap (from_tuple, to_tuple) $
313 xpPair xpInt (xpAttr "EventId" (xpOption xpInt))
315 from_tuple = uncurryN MsgId
316 to_tuple m = (db_msg_id m, db_event_id m)
319 -- | Convert a 'NewsLocation' to/from XML.
321 pickle_location :: PU NewsLocation
324 xpWrap (from_tuple, to_tuple) $
325 xpTriple (xpOption (xpElem "city" xpText))
326 (xpOption (xpElem "state" xpText))
327 (xpElem "country" xpText)
330 uncurryN NewsLocation
331 to_tuple l = (city l, state l, country l)
334 -- | Convert a 'Message' to/from XML.
336 pickle_message :: PU Message
339 xpWrap (from_tuple, to_tuple) $
340 xp13Tuple (xpElem "XML_File_ID" xpInt)
341 (xpElem "heading" xpText)
343 (xpElem "category" xpText)
344 (xpElem "sport" xpText)
345 (xpElem "url" $ xpOption xpText)
346 (xpList pickle_news_team)
347 (xpList pickle_location)
348 (xpElem "SMS" xpText)
349 (xpOption (xpElem "Editor" xpText))
350 (xpOption (xpElem "text" xpText))
352 (xpElem "time_stamp" xpText)
354 from_tuple = uncurryN Message
355 to_tuple m = (xml_xml_file_id m, -- Verbose,
356 xml_heading m, -- but
357 xml_mid m, -- eliminates
358 xml_category m, -- GHC
359 xml_sport m, -- warnings
362 xml_locations m, -- .
369 -- | We combine all of the \<continue\> elements into one 'String'
370 -- while unpickling and do the reverse while pickling.
372 pickle_continue :: PU (Maybe String)
375 xpWrap (to_string, from_string) $
377 xpList (xpElem "P" xpText)
379 from_string :: String -> [String]
380 from_string = split "\n"
382 to_string :: [String] -> String
383 to_string = join "\n"
388 news_tests :: TestTree
392 [ test_news_fields_have_correct_names,
393 test_pickle_of_unpickle_is_identity,
394 test_unpickle_succeeds ]
397 test_news_fields_have_correct_names :: TestTree
398 test_news_fields_have_correct_names =
399 testCase "news fields get correct database names" $
400 mapM_ check (zip actual expected)
402 -- This is cool, it uses the (derived) Data instance of
403 -- News.News to get its constructor names.
404 field_names :: [String]
406 constrFields . head $ dataTypeConstrs $ dataTypeOf (undefined :: News)
410 map (\x -> tsn_db_field_namer "herp" "derp" 8675309 x 90210) field_names
413 actual = ["mid", "sport", "url", "sms", "editor", "text", "continue"]
415 check (x,y) = (x @?= y)
418 -- | Warning, succeess of this test does not mean that unpickling
420 test_pickle_of_unpickle_is_identity :: TestTree
421 test_pickle_of_unpickle_is_identity = testGroup "pickle-unpickle tests"
422 [ check "pickle composed with unpickle is the identity"
423 "test/xml/newsxml.xml",
425 check "pickle composed with unpickle is the identity (with Editor)"
426 "test/xml/newsxml-with-editor.xml" ]
428 check desc path = testCase desc $ do
429 (expected, actual) <- pickle_unpickle pickle_message path
433 test_unpickle_succeeds :: TestTree
434 test_unpickle_succeeds = testGroup "unpickle tests"
435 [ check "unpickling succeeds"
436 "test/xml/newsxml.xml",
438 check "unpickling succeeds (with Editor)"
439 "test/xml/newsxml-with-editor.xml" ]
441 check desc path = testCase desc $ do
442 actual <- unpickleable path pickle_message