]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/TSN/News.hs
Get the news import working more or less how it's supposed to.
[dead/htsn-import.git] / src / TSN / News.hs
1 {-# LANGUAGE BangPatterns #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE GADTs #-}
4 {-# LANGUAGE QuasiQuotes #-}
5 {-# LANGUAGE RecordWildCards #-}
6 {-# LANGUAGE ScopedTypeVariables #-}
7 {-# LANGUAGE StandaloneDeriving #-}
8 {-# LANGUAGE TemplateHaskell #-}
9 {-# LANGUAGE TypeFamilies #-}
10
11 -- | Parse TSN XML for the DTD "newsxml.dtd". Each document contains a
12 -- root element \<message\> that contains an entire news item.
13 --
14 module TSN.News (
15 Message,
16 news_tests )
17 where
18
19 import Control.Monad.IO.Class ( MonadIO, liftIO )
20 import Data.List.Utils ( join, split )
21 import Data.Tuple.Curry ( uncurryN )
22 import Database.Groundhog (
23 defaultMigrationLogger,
24 insert,
25 migrate,
26 runMigration )
27 import Database.Groundhog.Core ( DefaultKey )
28 import Database.Groundhog.TH (
29 groundhog,
30 mkPersist )
31 import Test.Tasty ( TestTree, testGroup )
32 import Test.Tasty.HUnit ( (@?=), testCase )
33 import Text.XML.HXT.Core (
34 PU,
35 XmlPickler(..),
36 unpickleDoc,
37 xp12Tuple,
38 xpAttr,
39 xpElem,
40 xpList,
41 xpOption,
42 xpPair,
43 xpPrim,
44 xpText,
45 xpTriple,
46 xpWrap )
47
48 import Network.Services.TSN.Report ( report_error )
49 import TSN.Codegen ( tsn_codegen_config )
50 import TSN.DbImport ( DbImport(..) )
51 import Xml ( ToFromXml(..), pickle_unpickle )
52
53
54
55 -- | The database type for teams as they show up in the news. We need
56 -- this separate from its XML representation because of the
57 -- DefaultKey pointing to a message. We don't know how to create one
58 -- of those unless we've just inserted a message into the database,
59 -- so it screws up pickling.
60 data NewsTeam =
61 NewsTeam {
62 nt_news_id :: DefaultKey Message, -- ^ foreign key.
63 db_team_name :: String }
64 deriving instance Eq NewsTeam -- Standalone instances necessary for
65 deriving instance Show NewsTeam -- Groundhog types with DefaultKeys
66
67 -- | The XML type for teams as they show up in the news. See
68 -- 'NewsTeam' for why there are two types.
69 data NewsTeamXml =
70 NewsTeamXml {
71 xml_team_name :: String }
72 deriving (Eq, Show)
73
74 -- | Specify how to convert between the two representations NewsTeam
75 -- (database) and NewsTeamXml (XML).
76 instance ToFromXml NewsTeam where
77 type Xml NewsTeam = NewsTeamXml
78 type Container NewsTeam = Message
79 -- Use a record wildcard here so GHC doesn't complain that we never
80 -- used our named fields.
81 to_xml (NewsTeam {..}) = NewsTeamXml db_team_name
82 -- We can't create a DefaultKey Message...
83 from_xml = error "Called from_xml on a NewsTeam"
84 -- unless we're handed one.
85 from_xml_fk key = (NewsTeam key) . xml_team_name
86
87
88 -- | The database type for locations as they show up in the news. We
89 -- need this separate from its XML representation because of the
90 -- DefaultKey pointing to a message. We don't know how to create one
91 -- of those unless we've just inserted a message into the database,
92 -- so it screws up pickling.
93 data NewsLocation =
94 NewsLocation {
95 loc_news_id :: DefaultKey Message, -- ^ foreign key.
96 db_city ::String,
97 db_state :: String,
98 db_country :: String }
99 deriving instance Eq NewsLocation -- Standalone instances necessary for
100 deriving instance Show NewsLocation -- Groundhog types with DefaultKeys
101
102 -- | The XML type for locations as they show up in the news. See
103 -- 'NewsLocation' for why there are two types.
104 data NewsLocationXml =
105 NewsLocationXml {
106 xml_city :: String,
107 xml_state :: String,
108 xml_country :: String }
109 deriving (Eq, Show)
110
111
112 -- | Specify how to convert between the two representations
113 -- NewsLocation (database) and NewsLocationXml (XML).
114 instance ToFromXml NewsLocation where
115 type Xml NewsLocation = NewsLocationXml
116 type Container NewsLocation = Message
117 -- Use a record wildcard here so GHC doesn't complain that we never
118 -- used our named fields.
119 to_xml (NewsLocation {..}) = NewsLocationXml db_city db_state db_country
120 -- We can't create a DefaultKey Message...
121 from_xml = error "Called from_xml on a NewsLocation"
122 -- unless we're given one.
123 from_xml_fk key (NewsLocationXml x y z) = NewsLocation key x y z
124
125
126 -- | The msg_id child of <message> contains an event_id attribute; we
127 -- embed it into the 'Message' type. We (pointlessly) use the "db_"
128 -- prefix here so that the two names collide on "id" when Groundhog
129 -- is creating its fields using our field namer.
130 data MsgId =
131 MsgId {
132 db_msg_id :: Int,
133 db_event_id :: Maybe Int }
134 deriving (Eq, Show)
135
136
137 data MessageXml =
138 MessageXml {
139 xml_xml_file_id :: Int,
140 xml_heading :: String,
141 xml_mid :: MsgId,
142 xml_category :: String,
143 xml_sport :: String,
144 xml_url :: String,
145 xml_teams :: [NewsTeamXml],
146 xml_locations :: [NewsLocationXml],
147 xml_sms :: String,
148 xml_text :: String,
149 xml_continue :: String,
150 xml_time_stamp :: String }
151 deriving (Eq, Show)
152
153 data Message =
154 Message {
155 db_xml_file_id :: Int,
156 db_heading :: String,
157 db_mid :: MsgId,
158 db_category :: String,
159 db_sport :: String,
160 db_url :: String,
161 db_sms :: String,
162 db_text :: String,
163 db_continue :: String,
164 db_time_stamp :: String }
165 deriving (Eq, Show)
166
167 instance ToFromXml Message where
168 type Xml Message = MessageXml
169 type Container Message = ()
170
171 -- Use a record wildcard here so GHC doesn't complain that we never
172 -- used our named fields.
173 to_xml (Message {..}) =
174 MessageXml
175 db_xml_file_id
176 db_heading
177 db_mid
178 db_category
179 db_sport
180 db_url
181 []
182 []
183 db_sms
184 db_text
185 db_continue
186 db_time_stamp
187
188 -- We don't need the key argument (from_xml_fk) since the XML type
189 -- contains more information in this case.
190 from_xml (MessageXml a b c d e f _ _ g h i j) =
191 Message a b c d e f g h i j
192
193
194 mkPersist tsn_codegen_config [groundhog|
195 - entity: NewsTeam
196 dbName: news_teams
197
198 - entity: NewsLocation
199 dbName: news_locations
200
201 - entity: Message
202 dbName: news
203 constructors:
204 - name: Message
205 fields:
206 - name: db_mid
207 embeddedType:
208 - {name: msg_id, dbName: msg_id}
209 - {name: event_id, dbName: event_id}
210 - embedded: MsgId
211 fields:
212 - name: db_msg_id
213 dbName: msg_id
214 - name: db_event_id
215 dbName: event_id
216 |]
217
218 pickle_news_team :: PU NewsTeamXml
219 pickle_news_team =
220 xpElem "team" $
221 xpWrap (from_string, to_string) xpText
222 where
223 to_string :: NewsTeamXml -> String
224 to_string = xml_team_name
225
226 from_string :: String -> NewsTeamXml
227 from_string = NewsTeamXml
228
229 instance XmlPickler NewsTeamXml where
230 xpickle = pickle_news_team
231
232 pickle_msg_id :: PU MsgId
233 pickle_msg_id =
234 xpElem "msg_id" $
235 xpWrap (from_tuple, to_tuple) $
236 xpPair xpPrim (xpAttr "EventId" (xpOption xpPrim))
237 where
238 from_tuple = uncurryN MsgId
239 to_tuple m = (db_msg_id m, db_event_id m)
240
241 instance XmlPickler MsgId where
242 xpickle = pickle_msg_id
243
244 pickle_location :: PU NewsLocationXml
245 pickle_location =
246 xpElem "location" $
247 xpWrap (from_tuple, to_tuple) $
248 xpTriple (xpElem "city" xpText)
249 (xpElem "state" xpText)
250 (xpElem "country" xpText)
251 where
252 from_tuple =
253 uncurryN NewsLocationXml
254 to_tuple l = (xml_city l, xml_state l, xml_country l)
255
256 instance XmlPickler NewsLocationXml where
257 xpickle = pickle_location
258
259
260 pickle_message :: PU MessageXml
261 pickle_message =
262 xpElem "message" $
263 xpWrap (from_tuple, to_tuple) $
264 xp12Tuple (xpElem "XML_File_ID" xpPrim)
265 (xpElem "heading" xpText)
266 pickle_msg_id
267 (xpElem "category" xpText)
268 (xpElem "sport" xpText)
269 (xpElem "url" xpText)
270 (xpList $ pickle_news_team)
271 (xpList $ pickle_location)
272 (xpElem "SMS" xpText)
273 (xpElem "text" xpText)
274 pickle_continue
275 (xpElem "time_stamp" xpText)
276 where
277 from_tuple = uncurryN MessageXml
278 to_tuple m = (xml_xml_file_id m,
279 xml_heading m,
280 xml_mid m,
281 xml_category m,
282 xml_sport m,
283 xml_url m,
284 xml_teams m,
285 xml_locations m,
286 xml_sms m,
287 xml_text m,
288 xml_continue m,
289 xml_time_stamp m)
290
291 pickle_continue :: PU String
292 pickle_continue =
293 xpWrap (to_string, from_string) $
294 xpElem "continue" $
295 (xpList $ xpElem "P" xpText)
296 where
297 from_string :: String -> [String]
298 from_string = split "\n"
299
300 to_string :: [String] -> String
301 to_string = join "\n"
302
303 instance XmlPickler MessageXml where
304 xpickle = pickle_message
305
306
307
308 instance DbImport Message where
309 dbimport _ xml = do
310 runMigration defaultMigrationLogger $ do
311 migrate (undefined :: Message)
312 migrate (undefined :: NewsTeam)
313 migrate (undefined :: NewsLocation)
314 let root_element = unpickleDoc xpickle xml :: Maybe MessageXml
315 case root_element of
316 Nothing -> do
317 let errmsg = "Could not unpickle News message in dbimport."
318 liftIO $ report_error errmsg
319 return Nothing
320 Just message -> do
321 news_id <- insert (from_xml message :: Message)
322 let nts :: [NewsTeam] = map (from_xml_fk news_id)
323 (xml_teams message)
324 let nlocs :: [NewsLocation] = map (from_xml_fk news_id)
325 (xml_locations message)
326 nt_ids <- mapM insert nts
327 loc_ids <- mapM insert nlocs
328
329 return $ Just (1 + (length nt_ids) + (length loc_ids))
330
331
332 -- * Tasty Tests
333 news_tests :: TestTree
334 news_tests =
335 testGroup
336 "News tests"
337 [ test_pickle_of_unpickle_is_identity ]
338
339
340 test_pickle_of_unpickle_is_identity :: TestTree
341 test_pickle_of_unpickle_is_identity =
342 testCase "pickle composed with unpickle is the identity" $ do
343 let path = "test/xml/newsxml.xml"
344 (expected :: [MessageXml], actual) <- pickle_unpickle "message" path
345 actual @?= expected