]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/TSN/XML/SportInfo.hs
Add SportInfo support for WNBATurnoversXML.dtd.
[dead/htsn-import.git] / src / TSN / XML / SportInfo.hs
1 {-# LANGUAGE FlexibleInstances #-}
2 {-# LANGUAGE GADTs #-}
3 {-# LANGUAGE QuasiQuotes #-}
4 {-# LANGUAGE RecordWildCards #-}
5 {-# LANGUAGE TemplateHaskell #-}
6 {-# LANGUAGE TypeFamilies #-}
7
8 -- | SportInfo represents a collection of DTDs that we don't really
9 -- handle but want to make available. The raw XML gets stored in the
10 -- database along with the XML_File_ID, but we don't parse any of it.
11 --
12 -- This is almost completely redundant with "TSN.XML.GameInfo", but
13 -- the redundancy is necessary: we need separate 'Message' types so
14 -- that we can have separate 'DbImport' instances. It would take
15 -- more code/work to abstract (if it's even possible) than to
16 -- duplicate.
17 --
18 module TSN.XML.SportInfo (
19 dtds,
20 parse_xml,
21 sport_info_tests,
22 -- * WARNING: these are private but exported to silence warnings
23 SportInfoConstructor(..) )
24 where
25
26 -- System imports.
27 import Data.Either ( rights )
28 import Data.Time.Clock ( UTCTime )
29 import Database.Groundhog (
30 countAll,
31 migrate,
32 runMigration,
33 silentMigrationLogger )
34 import Database.Groundhog.Generic ( runDbConn )
35 import Database.Groundhog.Sqlite ( withSqliteConn )
36 import Database.Groundhog.TH (
37 groundhog,
38 mkPersist )
39 import Test.Tasty ( TestTree, testGroup )
40 import Test.Tasty.HUnit ( (@?=), testCase )
41 import Text.XML.HXT.Core ( XmlTree )
42 import Text.XML.HXT.DOM.ShowXml ( xshow )
43
44 -- Local imports.
45 import TSN.Codegen ( tsn_codegen_config )
46 import TSN.DbImport (
47 DbImport(..),
48 ImportResult(..),
49 run_dbmigrate )
50 import TSN.Parse (
51 parse_message,
52 parse_xmlfid,
53 parse_xml_time_stamp )
54 import TSN.XmlImport ( XmlImport(..) )
55 import Xml (
56 FromXml(..),
57 ToDb(..),
58 unsafe_read_document )
59
60
61 -- | The DTDs for everything that we consider \"Sport Info.\"
62 --
63 -- TODO: This is the list from the old implementation. We need to
64 -- make sure that we are really receiving XML for these DTDs
65 -- (i.e. the names are correct).
66 --
67 dtds :: [String]
68 dtds =
69 [ "CBASK_3PPctXML.dtd",
70 "Cbask_All_Tourn_Teams_XML.dtd",
71 "CBASK_AssistsXML.dtd",
72 "Cbask_Awards_XML.dtd",
73 "CBASK_BlocksXML.dtd",
74 "Cbask_Conf_Standings_XML.dtd",
75 "Cbask_DivII_III_Indv_Stats_XML.dtd",
76 "Cbask_DivII_Team_Stats_XML.dtd",
77 "Cbask_DivIII_Team_Stats_XML.dtd",
78 "CBASK_FGPctXML.dtd",
79 "CBASK_FoulsXML.dtd",
80 "CBASK_FTPctXML.dtd",
81 "Cbask_Indv_No_Avg_XML.dtd", -- no xml
82 "Cbask_Indv_Scoring_XML.dtd",
83 "Cbask_Indv_Shooting_XML.dtd", -- no xml
84 "CBASK_MinutesXML.dtd",
85 "Cbask_Polls_XML.dtd",
86 "CBASK_ReboundsXML.dtd",
87 "CBASK_ScoringLeadersXML.dtd",
88 "CBASK_StealsXML.dtd", -- no xml
89 "Cbask_Team_Scoring_Rebound_Margin_XML.dtd", -- no xml
90 "Cbask_Team_Scoring_XML.dtd", -- no xml
91 "Cbask_Team_Shooting_Pct_XML.dtd", -- no xml
92 "Cbask_Team_ThreePT_Made_XML.dtd",
93 "Cbask_Team_ThreePT_PCT_XML.dtd",
94 "Cbask_Team_Win_Pct_XML.dtd",
95 "Cbask_Top_Twenty_Five_XML.dtd",
96 "CBASK_TopTwentyFiveResult_XML.dtd",
97 "Cbask_Tourn_Awards_XML.dtd",
98 "Cbask_Tourn_Champs_XML.dtd",
99 "Cbask_Tourn_Indiv_XML.dtd",
100 "Cbask_Tourn_Leaders_XML.dtd",
101 "Cbask_Tourn_MVP_XML.dtd",
102 "Cbask_Tourn_Records_XML.dtd",
103 "LeagueScheduleXML.dtd",
104 "minorscoresxml.dtd",
105 "Minor_Baseball_League_Leaders_XML.dtd",
106 "Minor_Baseball_Standings_XML.dtd",
107 "Minor_Baseball_Transactions_XML.dtd",
108 "mlbbattingavgxml.dtd",
109 "mlbdoublesleadersxml.dtd",
110 "MLBGamesPlayedXML.dtd",
111 "MLBGIDPXML.dtd",
112 "MLBHitByPitchXML.dtd",
113 "mlbhitsleadersxml.dtd",
114 "mlbhomerunsxml.dtd",
115 "MLBHRFreqXML.dtd",
116 "MLBIntWalksXML.dtd",
117 "MLBKORateXML.dtd",
118 "mlbonbasepctxml.dtd",
119 "MLBOPSXML.dtd",
120 "MLBPlateAppsXML.dtd",
121 "mlbrbisxml.dtd",
122 "mlbrunsleadersxml.dtd",
123 "MLBSacFliesXML.dtd",
124 "MLBSacrificesXML.dtd",
125 "MLBSBSuccessXML.dtd",
126 "mlbsluggingpctxml.dtd",
127 "mlbstandxml.dtd",
128 "mlbstandxml_preseason.dtd",
129 "mlbstolenbasexml.dtd",
130 "mlbtotalbasesleadersxml.dtd",
131 "mlbtriplesleadersxml.dtd",
132 "MLBWalkRateXML.dtd",
133 "mlbwalksleadersxml.dtd",
134 "MLBXtraBaseHitsXML.dtd",
135 "MLB_ERA_Leaders.dtd",
136 "MLB_Fielding_XML.dtd",
137 "MLB_Pitching_Appearances_Leaders.dtd",
138 "MLB_Pitching_Balks_Leaders.dtd",
139 "MLB_Pitching_CG_Leaders.dtd",
140 "MLB_Pitching_ER_Allowed_Leaders.dtd",
141 "MLB_Pitching_Hits_Allowed_Leaders.dtd",
142 "MLB_Pitching_Hit_Batters_Leaders.dtd",
143 "MLB_Pitching_HR_Allowed_Leaders.dtd",
144 "MLB_Pitching_IP_Leaders.dtd",
145 "MLB_Pitching_Runs_Allowed_Leaders.dtd",
146 "MLB_Pitching_Saves_Leaders.dtd",
147 "MLB_Pitching_Shut_Outs_Leaders.dtd",
148 "MLB_Pitching_Starts_Leaders.dtd",
149 "MLB_Pitching_Strike_Outs_Leaders.dtd",
150 "MLB_Pitching_Walks_Leaders.dtd",
151 "MLB_Pitching_WHIP_Leaders.dtd",
152 "MLB_Pitching_Wild_Pitches_Leaders.dtd",
153 "MLB_Pitching_Win_Percentage_Leaders.dtd",
154 "MLB_Pitching_WL_Leaders.dtd",
155 "NBA_Team_Stats_XML.dtd",
156 "NBA3PPctXML.dtd",
157 "NBAAssistsXML.dtd",
158 "NBABlocksXML.dtd",
159 "nbaconfrecxml.dtd",
160 "nbadaysxml.dtd",
161 "nbadivisionsxml.dtd",
162 "NBAFGPctXML.dtd",
163 "NBAFoulsXML.dtd",
164 "NBAFTPctXML.dtd",
165 "NBAMinutesXML.dtd",
166 "NBAReboundsXML.dtd",
167 "NBAScorersXML.dtd",
168 "nbastandxml.dtd",
169 "NBAStealsXML.dtd",
170 "nbateamleadersxml.dtd",
171 "nbatripledoublexml.dtd",
172 "NBATurnoversXML.dtd",
173 "NCAA_Conference_Schedule_XML.dtd",
174 "nflfirstdownxml.dtd",
175 "NFLFumbleLeaderXML.dtd",
176 "NFLGiveTakeXML.dtd",
177 "NFLGrassTurfDomeOutsideXML.dtd", -- no xml
178 "NFLInside20XML.dtd",
179 "NFLInterceptionLeadersXML.dtd", -- no xml
180 "NFLKickoffsXML.dtd",
181 "NFLMondayNightXML.dtd",
182 "NFLPassingLeadersXML.dtd", -- no xml
183 "NFLPassLeadXML.dtd",
184 "NFLQBStartsXML.dtd",
185 "NFLReceivingLeadersXML.dtd", -- no xml
186 "NFLRushingLeadersXML.dtd", -- no xml
187 "NFLSackLeadersXML.dtd",
188 "nflstandxml.dtd",
189 "NFLTackleFFLeadersXML.dtd", -- no xml
190 "NFLTeamRankingsXML.dtd",
191 "NFLTopKickoffReturnXML.dtd", -- no xml
192 "NFLTopPerformanceXML.dtd",
193 "NFLTopPuntReturnXML.dtd", -- no xml
194 "NFLTotalYardageXML.dtd",
195 "NFLYardsXML.dtd", -- no xml
196 "NFL_KickingLeaders_XML.dtd",
197 "NFL_NBA_Draft_XML.dtd",
198 "NFL_PuntingLeaders_XML.dtd", -- no xml
199 "NFL_Roster_XML.dtd",
200 "NFL_Team_Stats_XML.dtd",
201 "Transactions_XML.dtd",
202 "Weekly_Sched_XML.dtd",
203 "WNBA_Team_Leaders_XML.dtd",
204 "WNBA3PPctXML.dtd",
205 "WNBAAssistsXML.dtd",
206 "WNBABlocksXML.dtd",
207 "WNBAFGPctXML.dtd",
208 "WNBAFoulsXML.dtd",
209 "WNBAFTPctXML.dtd",
210 "WNBAMinutesXML.dtd",
211 "WNBAReboundsXML.dtd",
212 "WNBAScorersXML.dtd",
213 "wnbastandxml.dtd",
214 "WNBAStealsXML.dtd",
215 "WNBATurnoversXML.dtd" ]
216
217
218 -- | XML representation of a SportInfo \<message\>.
219 --
220 data Message =
221 Message {
222 xml_dtd :: String,
223 xml_xml_file_id :: Int,
224 xml_time_stamp :: UTCTime,
225 xml_xml :: String }
226 deriving (Eq, Show)
227
228
229 -- | Attempt to parse a 'Message' from an 'XmlTree'. If we cannot,
230 -- we fail with an error message.
231 --
232 parse_xml :: String -> XmlTree -> Either String Message
233 parse_xml dtdname xmltree = do
234 xmlfid <- parse_xmlfid xmltree
235 timestamp <- parse_xml_time_stamp xmltree
236 message <- parse_message xmltree
237 return $ Message dtdname (fromInteger xmlfid) timestamp (xshow [message])
238
239
240 -- | Database representation of a 'Message'.
241 --
242 data SportInfo =
243 SportInfo {
244 db_dtd :: String,
245 db_xml_file_id :: Int,
246 db_time_stamp :: UTCTime,
247 db_xml :: String }
248
249
250 instance ToDb Message where
251 -- | The database analogue of a 'Message' is an 'SportInfo'.
252 type Db Message = SportInfo
253
254 instance FromXml Message where
255 -- | The XML to DB conversion is trivial here.
256 --
257 from_xml Message{..} = SportInfo {
258 db_dtd = xml_dtd,
259 db_xml_file_id = xml_xml_file_id,
260 db_time_stamp = xml_time_stamp,
261 db_xml = xml_xml }
262
263
264 -- | This allows us to insert the XML representation 'Message'
265 -- directly.
266 --
267 instance XmlImport Message
268
269
270 --
271 -- Database code
272 --
273
274 instance DbImport Message where
275 dbmigrate _ =
276 run_dbmigrate $ migrate (undefined :: SportInfo)
277
278 -- | We import a 'Message' by inserting the whole thing at
279 -- once. Nothing fancy going on here.
280 dbimport msg = do
281 insert_xml_ msg
282 return ImportSucceeded
283
284
285 -- | The database schema for SportInfo is trivial; all we need is for
286 -- the XML_File_ID to be unique.
287 --
288 mkPersist tsn_codegen_config [groundhog|
289 - entity: SportInfo
290 constructors:
291 - name: SportInfo
292 uniques:
293 - name: unique_sport_info
294 type: constraint
295 # Prevent multiple imports of the same message.
296 fields: [db_xml_file_id]
297 |]
298
299
300 --
301 -- Tasty Tests
302 --
303
304 -- | A list of all tests for this module.
305 --
306 sport_info_tests :: TestTree
307 sport_info_tests =
308 testGroup
309 "SportInfo tests"
310 [ test_parse_xml_succeeds,
311 test_dbimport_succeeds ]
312
313
314 -- | Sample XML documents for SportInfo types.
315 --
316 sport_info_test_files :: [FilePath]
317 sport_info_test_files =
318 map ("test/xml/sportinfo/" ++) [
319 "CBASK_3PPctXML.xml",
320 "Cbask_All_Tourn_Teams_XML.xml",
321 "CBASK_AssistsXML.xml",
322 "Cbask_Awards_XML.xml",
323 "CBASK_BlocksXML.xml",
324 "Cbask_Conf_Standings_XML.xml",
325 "Cbask_DivII_III_Indv_Stats_XML.xml",
326 "Cbask_DivII_Team_Stats_XML.xml",
327 "Cbask_DivIII_Team_Stats_XML.xml",
328 "CBASK_FGPctXML.xml",
329 "CBASK_FoulsXML.xml",
330 "CBASK_FTPctXML.xml",
331 "Cbask_Indv_Scoring_XML.xml",
332 "CBASK_MinutesXML.xml",
333 "Cbask_Polls_XML.xml",
334 "CBASK_ReboundsXML.xml",
335 "CBASK_ScoringLeadersXML.xml",
336 "Cbask_Team_ThreePT_Made_XML.xml",
337 "Cbask_Team_ThreePT_PCT_XML.xml",
338 "Cbask_Team_Win_Pct_XML.xml",
339 "Cbask_Top_Twenty_Five_XML.xml",
340 "CBASK_TopTwentyFiveResult_XML.xml",
341 "Cbask_Tourn_Awards_XML.xml",
342 "Cbask_Tourn_Champs_XML.xml",
343 "Cbask_Tourn_Indiv_XML.xml",
344 "Cbask_Tourn_Leaders_XML.xml",
345 "Cbask_Tourn_MVP_XML.xml",
346 "Cbask_Tourn_Records_XML.xml",
347 "LeagueScheduleXML.xml",
348 "minorscoresxml.xml",
349 "Minor_Baseball_League_Leaders_XML.xml",
350 "Minor_Baseball_Standings_XML.xml",
351 "Minor_Baseball_Transactions_XML.xml",
352 "mlbbattingavgxml.xml",
353 "mlbdoublesleadersxml.xml",
354 "MLBGamesPlayedXML.xml",
355 "MLBGIDPXML.xml",
356 "MLBHitByPitchXML.xml",
357 "mlbhitsleadersxml.xml",
358 "mlbhomerunsxml.xml",
359 "MLBHRFreqXML.xml",
360 "MLBIntWalksXML.xml",
361 "MLBKORateXML.xml",
362 "mlbonbasepctxml.xml",
363 "MLBOPSXML.xml",
364 "MLBPlateAppsXML.xml",
365 "mlbrbisxml.xml",
366 "mlbrunsleadersxml.xml",
367 "MLBSacFliesXML.xml",
368 "MLBSacrificesXML.xml",
369 "MLBSBSuccessXML.xml",
370 "mlbsluggingpctxml.xml",
371 "mlbstandxml.xml",
372 "mlbstandxml_preseason.xml",
373 "mlbstolenbasexml.xml",
374 "mlbtotalbasesleadersxml.xml",
375 "mlbtriplesleadersxml.xml",
376 "MLBWalkRateXML.xml",
377 "mlbwalksleadersxml.xml",
378 "MLBXtraBaseHitsXML.xml",
379 "MLB_ERA_Leaders.xml",
380 "MLB_Pitching_Appearances_Leaders.xml",
381 "MLB_Pitching_Balks_Leaders.xml",
382 "MLB_Pitching_CG_Leaders.xml",
383 "MLB_Pitching_ER_Allowed_Leaders.xml",
384 "MLB_Pitching_Hits_Allowed_Leaders.xml",
385 "MLB_Pitching_Hit_Batters_Leaders.xml",
386 "MLB_Pitching_HR_Allowed_Leaders.xml",
387 "MLB_Pitching_IP_Leaders.xml",
388 "MLB_Pitching_Runs_Allowed_Leaders.xml",
389 "MLB_Pitching_Saves_Leaders.xml",
390 "MLB_Pitching_Shut_Outs_Leaders.xml",
391 "MLB_Pitching_Starts_Leaders.xml",
392 "MLB_Pitching_Strike_Outs_Leaders.xml",
393 "MLB_Pitching_Walks_Leaders.xml",
394 "MLB_Pitching_WHIP_Leaders.xml",
395 "MLB_Pitching_Wild_Pitches_Leaders.xml",
396 "MLB_Pitching_Win_Percentage_Leaders.xml",
397 "MLB_Pitching_WL_Leaders.xml",
398 "NBA_Team_Stats_XML.xml",
399 "NBA3PPctXML.xml",
400 "NBAAssistsXML.xml",
401 "NBABlocksXML.xml",
402 "nbaconfrecxml.xml",
403 "nbadaysxml.xml",
404 "nbadivisionsxml.xml",
405 "NBAFGPctXML.xml",
406 "NBAFoulsXML.xml",
407 "NBAFTPctXML.xml",
408 "NBAMinutesXML.xml",
409 "NBAReboundsXML.xml",
410 "NBAScorersXML.xml",
411 "nbastandxml.xml",
412 "NBAStealsXML.xml",
413 "nbateamleadersxml.xml",
414 "nbatripledoublexml.xml",
415 "NBATurnoversXML.xml",
416 "NCAA_Conference_Schedule_XML.xml",
417 "nflfirstdownxml.xml",
418 "NFLFumbleLeaderXML.xml",
419 "NFLGiveTakeXML.xml",
420 "NFLInside20XML.xml",
421 "NFLKickoffsXML.xml",
422 "NFLMondayNightXML.xml",
423 "NFLPassLeadXML.xml",
424 "NFLQBStartsXML.xml",
425 "NFLSackLeadersXML.xml",
426 "nflstandxml.xml",
427 "NFLTeamRankingsXML.xml",
428 "NFLTopPerformanceXML.xml",
429 "NFLTotalYardageXML.xml",
430 "NFL_KickingLeaders_XML.xml",
431 "NFL_NBA_Draft_XML.xml",
432 "NFL_Roster_XML.xml",
433 "NFL_Team_Stats_XML.xml",
434 "Transactions_XML.xml",
435 "Weekly_Sched_XML.xml",
436 "WNBA_Team_Leaders_XML.xml",
437 "WNBA3PPctXML.xml",
438 "WNBAAssistsXML.xml",
439 "WNBABlocksXML.xml",
440 "WNBAFGPctXML.xml",
441 "WNBAFoulsXML.xml",
442 "WNBAFTPctXML.xml",
443 "WNBAMinutesXML.xml",
444 "WNBAReboundsXML.xml",
445 "WNBAScorersXML.xml",
446 "wnbastandxml.xml",
447 "WNBAStealsXML.xml",
448 "WNBATurnoversXML.xml" ]
449
450
451
452 -- | Make sure we can parse every element of 'sport_info_test_files'.
453 --
454 test_parse_xml_succeeds :: TestTree
455 test_parse_xml_succeeds =
456 testGroup "parse_xml" $ map check sport_info_test_files
457 where
458 check t = testCase t $ do
459 x <- unsafe_read_document t
460 let result = parse_xml "dummy" x
461 let actual = case result of -- isRight appears in base-4.7
462 Left _ -> False
463 Right _ -> True
464 let expected = True
465 actual @?= expected
466
467
468 -- | Ensure that each element of 'sport_info_test_files' can be imported
469 -- by counting the total number of database records (after
470 -- importing) and comparing it against the length of
471 -- 'sport_info_test_files'.
472 --
473 test_dbimport_succeeds :: TestTree
474 test_dbimport_succeeds = testCase "dbimport succeeds" $ do
475 xmltrees <- mapM unsafe_read_document sport_info_test_files
476 let msgs = rights $ map (parse_xml "dummy") xmltrees
477 actual <- withSqliteConn ":memory:" $ runDbConn $ do
478 runMigration silentMigrationLogger $ do
479 migrate (undefined :: SportInfo)
480 mapM_ dbimport msgs
481 countAll (undefined :: SportInfo)
482
483 actual @?= expected
484 where
485 expected = length sport_info_test_files