]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/TSN/XML/SportInfo.hs
Add SportInfo support for NFLPassLeadXML.dtd.
[dead/htsn-import.git] / src / TSN / XML / SportInfo.hs
1 {-# LANGUAGE FlexibleInstances #-}
2 {-# LANGUAGE GADTs #-}
3 {-# LANGUAGE QuasiQuotes #-}
4 {-# LANGUAGE RecordWildCards #-}
5 {-# LANGUAGE TemplateHaskell #-}
6 {-# LANGUAGE TypeFamilies #-}
7
8 -- | SportInfo represents a collection of DTDs that we don't really
9 -- handle but want to make available. The raw XML gets stored in the
10 -- database along with the XML_File_ID, but we don't parse any of it.
11 --
12 -- This is almost completely redundant with "TSN.XML.GameInfo", but
13 -- the redundancy is necessary: we need separate 'Message' types so
14 -- that we can have separate 'DbImport' instances. It would take
15 -- more code/work to abstract (if it's even possible) than to
16 -- duplicate.
17 --
18 module TSN.XML.SportInfo (
19 dtds,
20 parse_xml,
21 sport_info_tests,
22 -- * WARNING: these are private but exported to silence warnings
23 SportInfoConstructor(..) )
24 where
25
26 -- System imports.
27 import Data.Either ( rights )
28 import Data.Time.Clock ( UTCTime )
29 import Database.Groundhog (
30 countAll,
31 migrate,
32 runMigration,
33 silentMigrationLogger )
34 import Database.Groundhog.Generic ( runDbConn )
35 import Database.Groundhog.Sqlite ( withSqliteConn )
36 import Database.Groundhog.TH (
37 groundhog,
38 mkPersist )
39 import Test.Tasty ( TestTree, testGroup )
40 import Test.Tasty.HUnit ( (@?=), testCase )
41 import Text.XML.HXT.Core ( XmlTree )
42 import Text.XML.HXT.DOM.ShowXml ( xshow )
43
44 -- Local imports.
45 import TSN.Codegen ( tsn_codegen_config )
46 import TSN.DbImport (
47 DbImport(..),
48 ImportResult(..),
49 run_dbmigrate )
50 import TSN.Parse (
51 parse_message,
52 parse_xmlfid,
53 parse_xml_time_stamp )
54 import TSN.XmlImport ( XmlImport(..) )
55 import Xml (
56 FromXml(..),
57 ToDb(..),
58 unsafe_read_document )
59
60
61 -- | The DTDs for everything that we consider \"Sport Info.\"
62 --
63 -- TODO: This is the list from the old implementation. We need to
64 -- make sure that we are really receiving XML for these DTDs
65 -- (i.e. the names are correct).
66 --
67 dtds :: [String]
68 dtds =
69 [ "CBASK_3PPctXML.dtd",
70 "Cbask_All_Tourn_Teams_XML.dtd",
71 "CBASK_AssistsXML.dtd",
72 "Cbask_Awards_XML.dtd",
73 "CBASK_BlocksXML.dtd",
74 "Cbask_Conf_Standings_XML.dtd",
75 "Cbask_DivII_III_Indv_Stats_XML.dtd",
76 "Cbask_DivII_Team_Stats_XML.dtd",
77 "Cbask_DivIII_Team_Stats_XML.dtd",
78 "CBASK_FGPctXML.dtd",
79 "CBASK_FoulsXML.dtd",
80 "CBASK_FTPctXML.dtd",
81 "Cbask_Indv_No_Avg_XML.dtd", -- no xml
82 "Cbask_Indv_Scoring_XML.dtd",
83 "Cbask_Indv_Shooting_XML.dtd", -- no xml
84 "CBASK_MinutesXML.dtd",
85 "Cbask_Polls_XML.dtd",
86 "CBASK_ReboundsXML.dtd",
87 "CBASK_ScoringLeadersXML.dtd",
88 "CBASK_StealsXML.dtd", -- no xml
89 "Cbask_Team_Scoring_Rebound_Margin_XML.dtd", -- no xml
90 "Cbask_Team_Scoring_XML.dtd", -- no xml
91 "Cbask_Team_Shooting_Pct_XML.dtd", -- no xml
92 "Cbask_Team_ThreePT_Made_XML.dtd",
93 "Cbask_Team_ThreePT_PCT_XML.dtd",
94 "Cbask_Team_Win_Pct_XML.dtd",
95 "Cbask_Top_Twenty_Five_XML.dtd",
96 "CBASK_TopTwentyFiveResult_XML.dtd",
97 "Cbask_Tourn_Awards_XML.dtd",
98 "Cbask_Tourn_Champs_XML.dtd",
99 "Cbask_Tourn_Indiv_XML.dtd",
100 "Cbask_Tourn_Leaders_XML.dtd",
101 "Cbask_Tourn_MVP_XML.dtd",
102 "Cbask_Tourn_Records_XML.dtd",
103 "LeagueScheduleXML.dtd",
104 "minorscoresxml.dtd",
105 "Minor_Baseball_League_Leaders_XML.dtd",
106 "Minor_Baseball_Standings_XML.dtd",
107 "Minor_Baseball_Transactions_XML.dtd",
108 "mlbbattingavgxml.dtd",
109 "mlbdoublesleadersxml.dtd",
110 "MLBGamesPlayedXML.dtd",
111 "MLBGIDPXML.dtd",
112 "MLBHitByPitchXML.dtd",
113 "mlbhitsleadersxml.dtd",
114 "mlbhomerunsxml.dtd",
115 "MLBHRFreqXML.dtd",
116 "MLBIntWalksXML.dtd",
117 "MLBKORateXML.dtd",
118 "mlbonbasepctxml.dtd",
119 "MLBOPSXML.dtd",
120 "MLBPlateAppsXML.dtd",
121 "mlbrbisxml.dtd",
122 "mlbrunsleadersxml.dtd",
123 "MLBSacFliesXML.dtd",
124 "MLBSacrificesXML.dtd",
125 "MLBSBSuccessXML.dtd",
126 "mlbsluggingpctxml.dtd",
127 "mlbstandxml.dtd",
128 "mlbstandxml_preseason.dtd",
129 "mlbstolenbasexml.dtd",
130 "mlbtotalbasesleadersxml.dtd",
131 "mlbtriplesleadersxml.dtd",
132 "MLBWalkRateXML.dtd",
133 "mlbwalksleadersxml.dtd",
134 "MLBXtraBaseHitsXML.dtd",
135 "MLB_ERA_Leaders.dtd",
136 "MLB_Fielding_XML.dtd",
137 "MLB_Pitching_Appearances_Leaders.dtd",
138 "MLB_Pitching_Balks_Leaders.dtd",
139 "MLB_Pitching_CG_Leaders.dtd",
140 "MLB_Pitching_ER_Allowed_Leaders.dtd",
141 "MLB_Pitching_Hits_Allowed_Leaders.dtd",
142 "MLB_Pitching_Hit_Batters_Leaders.dtd",
143 "MLB_Pitching_HR_Allowed_Leaders.dtd",
144 "MLB_Pitching_IP_Leaders.dtd",
145 "MLB_Pitching_Runs_Allowed_Leaders.dtd",
146 "MLB_Pitching_Saves_Leaders.dtd",
147 "MLB_Pitching_Shut_Outs_Leaders.dtd",
148 "MLB_Pitching_Starts_Leaders.dtd",
149 "MLB_Pitching_Strike_Outs_Leaders.dtd",
150 "MLB_Pitching_Walks_Leaders.dtd",
151 "MLB_Pitching_WHIP_Leaders.dtd",
152 "MLB_Pitching_Wild_Pitches_Leaders.dtd",
153 "MLB_Pitching_Win_Percentage_Leaders.dtd",
154 "MLB_Pitching_WL_Leaders.dtd",
155 "NBA_Team_Stats_XML.dtd",
156 "NBA3PPctXML.dtd",
157 "NBAAssistsXML.dtd",
158 "NBABlocksXML.dtd",
159 "nbaconfrecxml.dtd",
160 "nbadaysxml.dtd",
161 "nbadivisionsxml.dtd",
162 "NBAFGPctXML.dtd",
163 "NBAFoulsXML.dtd",
164 "NBAFTPctXML.dtd",
165 "NBAMinutesXML.dtd",
166 "NBAReboundsXML.dtd",
167 "NBAScorersXML.dtd",
168 "nbastandxml.dtd",
169 "NBAStealsXML.dtd",
170 "nbateamleadersxml.dtd",
171 "nbatripledoublexml.dtd",
172 "NBATurnoversXML.dtd",
173 "NCAA_Conference_Schedule_XML.dtd",
174 "nflfirstdownxml.dtd",
175 "NFLFumbleLeaderXML.dtd",
176 "NFLGiveTakeXML.dtd",
177 "NFLGrassTurfDomeOutsideXML.dtd", -- no xml
178 "NFLInside20XML.dtd",
179 "NFLInterceptionLeadersXML.dtd", -- no xml
180 "NFLKickoffsXML.dtd",
181 "NFLMondayNightXML.dtd",
182 "NFLPassingLeadersXML.dtd", -- no xml
183 "NFLPassLeadXML.dtd",
184 "NFLQBStartsXML.dtd", -- no dtd
185 "NFLReceivingLeadersXML.dtd", -- no dtd
186 "NFLRushingLeadersXML.dtd", -- no dtd
187 "NFLSackLeadersXML.dtd", -- no dtd
188 "nflstandxml.dtd", -- no dtd
189 "NFLTackleFFLeadersXML.dtd", -- no dtd
190 "NFLTeamRankingsXML.dtd", -- no dtd
191 "NFLTopKickoffReturnXML.dtd", -- no dtd
192 "NFLTopPerformanceXML.dtd", -- no dtd
193 "NFLTopPuntReturnXML.dtd", -- no dtd
194 "NFLTotalYardageXML.dtd", -- no dtd
195 "NFLYardsXML.dtd", -- no dtd
196 "NFL_KickingLeaders_XML.dtd", -- no dtd
197 "NFL_NBA_Draft_XML.dtd", -- no dtd
198 "NFL_PuntingLeaders_XML.dtd", -- no dtd
199 "NFL_Roster_XML.dtd", -- no dtd
200 "NFL_Team_Stats_XML.dtd", -- no dtd
201 "Transactions_XML.dtd", -- no dtd
202 "Weekly_Sched_XML.dtd", -- no dtd
203 "WNBA_Team_Leaders_XML.dtd", -- no dtd
204 "WNBA3PPctXML.dtd", -- no dtd
205 "WNBAAssistsXML.dtd", -- no dtd
206 "WNBABlocksXML.dtd", -- no dtd
207 "WNBAFGPctXML.dtd", -- no dtd
208 "WNBAFoulsXML.dtd", -- no dtd
209 "WNBAFTPctXML.dtd", -- no dtd
210 "WNBAMinutesXML.dtd", -- no dtd
211 "WNBAReboundsXML.dtd", -- no dtd
212 "WNBAScorersXML.dtd", -- no dtd
213 "wnbastandxml.dtd", -- no dtd
214 "WNBAStealsXML.dtd", -- no dtd
215 "WNBATurnoversXML.dtd" -- no dtd
216 ]
217
218
219 -- | XML representation of a SportInfo \<message\>.
220 --
221 data Message =
222 Message {
223 xml_dtd :: String,
224 xml_xml_file_id :: Int,
225 xml_time_stamp :: UTCTime,
226 xml_xml :: String }
227 deriving (Eq, Show)
228
229
230 -- | Attempt to parse a 'Message' from an 'XmlTree'. If we cannot,
231 -- we fail with an error message.
232 --
233 parse_xml :: String -> XmlTree -> Either String Message
234 parse_xml dtdname xmltree = do
235 xmlfid <- parse_xmlfid xmltree
236 timestamp <- parse_xml_time_stamp xmltree
237 message <- parse_message xmltree
238 return $ Message dtdname (fromInteger xmlfid) timestamp (xshow [message])
239
240
241 -- | Database representation of a 'Message'.
242 --
243 data SportInfo =
244 SportInfo {
245 db_dtd :: String,
246 db_xml_file_id :: Int,
247 db_time_stamp :: UTCTime,
248 db_xml :: String }
249
250
251 instance ToDb Message where
252 -- | The database analogue of a 'Message' is an 'SportInfo'.
253 type Db Message = SportInfo
254
255 instance FromXml Message where
256 -- | The XML to DB conversion is trivial here.
257 --
258 from_xml Message{..} = SportInfo {
259 db_dtd = xml_dtd,
260 db_xml_file_id = xml_xml_file_id,
261 db_time_stamp = xml_time_stamp,
262 db_xml = xml_xml }
263
264
265 -- | This allows us to insert the XML representation 'Message'
266 -- directly.
267 --
268 instance XmlImport Message
269
270
271 --
272 -- Database code
273 --
274
275 instance DbImport Message where
276 dbmigrate _ =
277 run_dbmigrate $ migrate (undefined :: SportInfo)
278
279 -- | We import a 'Message' by inserting the whole thing at
280 -- once. Nothing fancy going on here.
281 dbimport msg = do
282 insert_xml_ msg
283 return ImportSucceeded
284
285
286 -- | The database schema for SportInfo is trivial; all we need is for
287 -- the XML_File_ID to be unique.
288 --
289 mkPersist tsn_codegen_config [groundhog|
290 - entity: SportInfo
291 constructors:
292 - name: SportInfo
293 uniques:
294 - name: unique_sport_info
295 type: constraint
296 # Prevent multiple imports of the same message.
297 fields: [db_xml_file_id]
298 |]
299
300
301 --
302 -- Tasty Tests
303 --
304
305 -- | A list of all tests for this module.
306 --
307 sport_info_tests :: TestTree
308 sport_info_tests =
309 testGroup
310 "SportInfo tests"
311 [ test_parse_xml_succeeds,
312 test_dbimport_succeeds ]
313
314
315 -- | Sample XML documents for SportInfo types.
316 --
317 sport_info_test_files :: [FilePath]
318 sport_info_test_files =
319 map ("test/xml/sportinfo/" ++) [
320 "CBASK_3PPctXML.xml",
321 "Cbask_All_Tourn_Teams_XML.xml",
322 "CBASK_AssistsXML.xml",
323 "Cbask_Awards_XML.xml",
324 "CBASK_BlocksXML.xml",
325 "Cbask_Conf_Standings_XML.xml",
326 "Cbask_DivII_III_Indv_Stats_XML.xml",
327 "Cbask_DivII_Team_Stats_XML.xml",
328 "Cbask_DivIII_Team_Stats_XML.xml",
329 "CBASK_FGPctXML.xml",
330 "CBASK_FoulsXML.xml",
331 "CBASK_FTPctXML.xml",
332 "Cbask_Indv_Scoring_XML.xml",
333 "CBASK_MinutesXML.xml",
334 "Cbask_Polls_XML.xml",
335 "CBASK_ReboundsXML.xml",
336 "CBASK_ScoringLeadersXML.xml",
337 "Cbask_Team_ThreePT_Made_XML.xml",
338 "Cbask_Team_ThreePT_PCT_XML.xml",
339 "Cbask_Team_Win_Pct_XML.xml",
340 "Cbask_Top_Twenty_Five_XML.xml",
341 "CBASK_TopTwentyFiveResult_XML.xml",
342 "Cbask_Tourn_Awards_XML.xml",
343 "Cbask_Tourn_Champs_XML.xml",
344 "Cbask_Tourn_Indiv_XML.xml",
345 "Cbask_Tourn_Leaders_XML.xml",
346 "Cbask_Tourn_MVP_XML.xml",
347 "Cbask_Tourn_Records_XML.xml",
348 "LeagueScheduleXML.xml",
349 "minorscoresxml.xml",
350 "Minor_Baseball_League_Leaders_XML.xml",
351 "Minor_Baseball_Standings_XML.xml",
352 "Minor_Baseball_Transactions_XML.xml",
353 "mlbbattingavgxml.xml",
354 "mlbdoublesleadersxml.xml",
355 "MLBGamesPlayedXML.xml",
356 "MLBGIDPXML.xml",
357 "MLBHitByPitchXML.xml",
358 "mlbhitsleadersxml.xml",
359 "mlbhomerunsxml.xml",
360 "MLBHRFreqXML.xml",
361 "MLBIntWalksXML.xml",
362 "MLBKORateXML.xml",
363 "mlbonbasepctxml.xml",
364 "MLBOPSXML.xml",
365 "MLBPlateAppsXML.xml",
366 "mlbrbisxml.xml",
367 "mlbrunsleadersxml.xml",
368 "MLBSacFliesXML.xml",
369 "MLBSacrificesXML.xml",
370 "MLBSBSuccessXML.xml",
371 "mlbsluggingpctxml.xml",
372 "mlbstandxml.xml",
373 "mlbstandxml_preseason.xml",
374 "mlbstolenbasexml.xml",
375 "mlbtotalbasesleadersxml.xml",
376 "mlbtriplesleadersxml.xml",
377 "MLBWalkRateXML.xml",
378 "mlbwalksleadersxml.xml",
379 "MLBXtraBaseHitsXML.xml",
380 "MLB_ERA_Leaders.xml",
381 "MLB_Pitching_Appearances_Leaders.xml",
382 "MLB_Pitching_Balks_Leaders.xml",
383 "MLB_Pitching_CG_Leaders.xml",
384 "MLB_Pitching_ER_Allowed_Leaders.xml",
385 "MLB_Pitching_Hits_Allowed_Leaders.xml",
386 "MLB_Pitching_Hit_Batters_Leaders.xml",
387 "MLB_Pitching_HR_Allowed_Leaders.xml",
388 "MLB_Pitching_IP_Leaders.xml",
389 "MLB_Pitching_Runs_Allowed_Leaders.xml",
390 "MLB_Pitching_Saves_Leaders.xml",
391 "MLB_Pitching_Shut_Outs_Leaders.xml",
392 "MLB_Pitching_Starts_Leaders.xml",
393 "MLB_Pitching_Strike_Outs_Leaders.xml",
394 "MLB_Pitching_Walks_Leaders.xml",
395 "MLB_Pitching_WHIP_Leaders.xml",
396 "MLB_Pitching_Wild_Pitches_Leaders.xml",
397 "MLB_Pitching_Win_Percentage_Leaders.xml",
398 "MLB_Pitching_WL_Leaders.xml",
399 "NBA_Team_Stats_XML.xml",
400 "NBA3PPctXML.xml",
401 "NBAAssistsXML.xml",
402 "NBABlocksXML.xml",
403 "nbaconfrecxml.xml",
404 "nbadaysxml.xml",
405 "nbadivisionsxml.xml",
406 "NBAFGPctXML.xml",
407 "NBAFoulsXML.xml",
408 "NBAFTPctXML.xml",
409 "NBAMinutesXML.xml",
410 "NBAReboundsXML.xml",
411 "NBAScorersXML.xml",
412 "nbastandxml.xml",
413 "NBAStealsXML.xml",
414 "nbateamleadersxml.xml",
415 "nbatripledoublexml.xml",
416 "NBATurnoversXML.xml",
417 "NCAA_Conference_Schedule_XML.xml",
418 "nflfirstdownxml.xml",
419 "NFLFumbleLeaderXML.xml",
420 "NFLGiveTakeXML.xml",
421 "NFLInside20XML.xml",
422 "NFLKickoffsXML.xml",
423 "NFLMondayNightXML.xml",
424 "NFLPassLeadXML.xml"
425 ]
426
427
428
429 -- | Make sure we can parse every element of 'sport_info_test_files'.
430 --
431 test_parse_xml_succeeds :: TestTree
432 test_parse_xml_succeeds =
433 testGroup "parse_xml" $ map check sport_info_test_files
434 where
435 check t = testCase t $ do
436 x <- unsafe_read_document t
437 let result = parse_xml "dummy" x
438 let actual = case result of -- isRight appears in base-4.7
439 Left _ -> False
440 Right _ -> True
441 let expected = True
442 actual @?= expected
443
444
445 -- | Ensure that each element of 'sport_info_test_files' can be imported
446 -- by counting the total number of database records (after
447 -- importing) and comparing it against the length of
448 -- 'sport_info_test_files'.
449 --
450 test_dbimport_succeeds :: TestTree
451 test_dbimport_succeeds = testCase "dbimport succeeds" $ do
452 xmltrees <- mapM unsafe_read_document sport_info_test_files
453 let msgs = rights $ map (parse_xml "dummy") xmltrees
454 actual <- withSqliteConn ":memory:" $ runDbConn $ do
455 runMigration silentMigrationLogger $ do
456 migrate (undefined :: SportInfo)
457 mapM_ dbimport msgs
458 countAll (undefined :: SportInfo)
459
460 actual @?= expected
461 where
462 expected = length sport_info_test_files