]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/TSN/XML/SportInfo.hs
Add SportInfo support for MLB_Pitching_IP_Leaders.dtd.
[dead/htsn-import.git] / src / TSN / XML / SportInfo.hs
1 {-# LANGUAGE FlexibleInstances #-}
2 {-# LANGUAGE GADTs #-}
3 {-# LANGUAGE QuasiQuotes #-}
4 {-# LANGUAGE RecordWildCards #-}
5 {-# LANGUAGE TemplateHaskell #-}
6 {-# LANGUAGE TypeFamilies #-}
7
8 -- | SportInfo represents a collection of DTDs that we don't really
9 -- handle but want to make available. The raw XML gets stored in the
10 -- database along with the XML_File_ID, but we don't parse any of it.
11 --
12 -- This is almost completely redundant with "TSN.XML.GameInfo", but
13 -- the redundancy is necessary: we need separate 'Message' types so
14 -- that we can have separate 'DbImport' instances. It would take
15 -- more code/work to abstract (if it's even possible) than to
16 -- duplicate.
17 --
18 module TSN.XML.SportInfo (
19 dtds,
20 parse_xml,
21 sport_info_tests,
22 -- * WARNING: these are private but exported to silence warnings
23 SportInfoConstructor(..) )
24 where
25
26 -- System imports.
27 import Data.Either ( rights )
28 import Data.Time.Clock ( UTCTime )
29 import Database.Groundhog (
30 countAll,
31 migrate,
32 runMigration,
33 silentMigrationLogger )
34 import Database.Groundhog.Generic ( runDbConn )
35 import Database.Groundhog.Sqlite ( withSqliteConn )
36 import Database.Groundhog.TH (
37 groundhog,
38 mkPersist )
39 import Test.Tasty ( TestTree, testGroup )
40 import Test.Tasty.HUnit ( (@?=), testCase )
41 import Text.XML.HXT.Core ( XmlTree )
42 import Text.XML.HXT.DOM.ShowXml ( xshow )
43
44 -- Local imports.
45 import TSN.Codegen ( tsn_codegen_config )
46 import TSN.DbImport (
47 DbImport(..),
48 ImportResult(..),
49 run_dbmigrate )
50 import TSN.Parse (
51 parse_message,
52 parse_xmlfid,
53 parse_xml_time_stamp )
54 import TSN.XmlImport ( XmlImport(..) )
55 import Xml (
56 FromXml(..),
57 ToDb(..),
58 unsafe_read_document )
59
60
61 -- | The DTDs for everything that we consider \"Sport Info.\"
62 --
63 -- TODO: This is the list from the old implementation. We need to
64 -- make sure that we are really receiving XML for these DTDs
65 -- (i.e. the names are correct).
66 --
67 dtds :: [String]
68 dtds =
69 [ "CBASK_3PPctXML.dtd",
70 "Cbask_All_Tourn_Teams_XML.dtd",
71 "CBASK_AssistsXML.dtd",
72 "Cbask_Awards_XML.dtd",
73 "CBASK_BlocksXML.dtd",
74 "Cbask_Conf_Standings_XML.dtd",
75 "Cbask_DivII_III_Indv_Stats_XML.dtd",
76 "Cbask_DivII_Team_Stats_XML.dtd",
77 "Cbask_DivIII_Team_Stats_XML.dtd",
78 "CBASK_FGPctXML.dtd",
79 "CBASK_FoulsXML.dtd",
80 "CBASK_FTPctXML.dtd",
81 "Cbask_Indv_No_Avg_XML.dtd", -- no dtd
82 "Cbask_Indv_Scoring_XML.dtd",
83 "Cbask_Indv_Shooting_XML.dtd", -- no dtd
84 "CBASK_MinutesXML.dtd",
85 "Cbask_Polls_XML.dtd",
86 "CBASK_ReboundsXML.dtd",
87 "CBASK_ScoringLeadersXML.dtd",
88 "CBASK_StealsXML.dtd", -- no dtd
89 "Cbask_Team_Scoring_Rebound_Margin_XML.dtd", -- no dtd
90 "Cbask_Team_Scoring_XML.dtd", -- no dtd
91 "Cbask_Team_Shooting_Pct_XML.dtd", -- no dtd
92 "Cbask_Team_ThreePT_Made_XML.dtd",
93 "Cbask_Team_ThreePT_PCT_XML.dtd",
94 "Cbask_Team_Win_Pct_XML.dtd",
95 "Cbask_Top_Twenty_Five_XML.dtd",
96 "CBASK_TopTwentyFiveResult_XML.dtd",
97 "Cbask_Tourn_Awards_XML.dtd",
98 "Cbask_Tourn_Champs_XML.dtd",
99 "Cbask_Tourn_Indiv_XML.dtd",
100 "Cbask_Tourn_Leaders_XML.dtd",
101 "Cbask_Tourn_MVP_XML.dtd",
102 "Cbask_Tourn_Records_XML.dtd",
103 "LeagueScheduleXML.dtd",
104 "minorscoresxml.dtd",
105 "Minor_Baseball_League_Leaders_XML.dtd",
106 "Minor_Baseball_Standings_XML.dtd",
107 "Minor_Baseball_Transactions_XML.dtd",
108 "mlbbattingavgxml.dtd",
109 "mlbdoublesleadersxml.dtd",
110 "MLBGamesPlayedXML.dtd",
111 "MLBGIDPXML.dtd",
112 "MLBHitByPitchXML.dtd",
113 "mlbhitsleadersxml.dtd",
114 "mlbhomerunsxml.dtd",
115 "MLBHRFreqXML.dtd",
116 "MLBIntWalksXML.dtd",
117 "MLBKORateXML.dtd",
118 "mlbonbasepctxml.dtd",
119 "MLBOPSXML.dtd",
120 "MLBPlateAppsXML.dtd",
121 "mlbrbisxml.dtd",
122 "mlbrunsleadersxml.dtd",
123 "MLBSacFliesXML.dtd",
124 "MLBSacrificesXML.dtd",
125 "MLBSBSuccessXML.dtd",
126 "mlbsluggingpctxml.dtd",
127 "mlbstandxml.dtd",
128 "mlbstandxml_preseason.dtd",
129 "mlbstolenbasexml.dtd",
130 "mlbtotalbasesleadersxml.dtd",
131 "mlbtriplesleadersxml.dtd",
132 "MLBWalkRateXML.dtd",
133 "mlbwalksleadersxml.dtd",
134 "MLBXtraBaseHitsXML.dtd",
135 "MLB_ERA_Leaders.dtd",
136 "MLB_Fielding_XML.dtd",
137 "MLB_Pitching_Appearances_Leaders.dtd",
138 "MLB_Pitching_Balks_Leaders.dtd",
139 "MLB_Pitching_CG_Leaders.dtd",
140 "MLB_Pitching_ER_Allowed_Leaders.dtd",
141 "MLB_Pitching_Hits_Allowed_Leaders.dtd",
142 "MLB_Pitching_Hit_Batters_Leaders.dtd",
143 "MLB_Pitching_HR_Allowed_Leaders.dtd",
144 "MLB_Pitching_IP_Leaders.dtd",
145 "MLB_Pitching_Runs_Allowed_Leaders.dtd", -- no dtd
146 "MLB_Pitching_Saves_Leaders.dtd", -- no dtd
147 "MLB_Pitching_Shut_Outs_Leaders.dtd", -- no dtd
148 "MLB_Pitching_Starts_Leaders.dtd", -- no dtd
149 "MLB_Pitching_Strike_Outs_Leaders.dtd", -- no dtd
150 "MLB_Pitching_Walks_Leaders.dtd", -- no dtd
151 "MLB_Pitching_WHIP_Leaders.dtd", -- no dtd
152 "MLB_Pitching_Wild_Pitches_Leaders.dtd", -- no dtd
153 "MLB_Pitching_Win_Percentage_Leaders.dtd", -- no dtd
154 "MLB_Pitching_WL_Leaders.dtd", -- no dtd
155 "NBA_Team_Stats_XML.dtd", -- no dtd
156 "NBA3PPctXML.dtd", -- no dtd
157 "NBAAssistsXML.dtd", -- no dtd
158 "NBABlocksXML.dtd", -- no dtd
159 "nbaconfrecxml.dtd", -- no dtd
160 "nbadaysxml.dtd", -- no dtd
161 "nbadivisionsxml.dtd", -- no dtd
162 "NBAFGPctXML.dtd", -- no dtd
163 "NBAFoulsXML.dtd", -- no dtd
164 "NBAFTPctXML.dtd", -- no dtd
165 "NBAMinutesXML.dtd", -- no dtd
166 "NBAReboundsXML.dtd", -- no dtd
167 "NBAScorersXML.dtd", -- no dtd
168 "nbastandxml.dtd", -- no dtd
169 "NBAStealsXML.dtd", -- no dtd
170 "nbateamleadersxml.dtd", -- no dtd
171 "nbatripledoublexml.dtd", -- no dtd
172 "NBATurnoversXML.dtd", -- no dtd
173 "NCAA_Conference_Schedule_XML.dtd", -- no dtd
174 "nflfirstdownxml.dtd", -- no dtd
175 "NFLFumbleLeaderXML.dtd", -- no dtd
176 "NFLGiveTakeXML.dtd", -- no dtd
177 "NFLGrassTurfDomeOutsideXML.dtd", -- no dtd
178 "NFLInside20XML.dtd", -- no dtd
179 "NFLInterceptionLeadersXML.dtd", -- no dtd
180 "NFLKickoffsXML.dtd", -- no dtd
181 "NFLMondayNightXML.dtd", -- no dtd
182 "NFLPassingLeadersXML.dtd", -- no dtd
183 "NFLPassLeadXML.dtd", -- no dtd
184 "NFLQBStartsXML.dtd", -- no dtd
185 "NFLReceivingLeadersXML.dtd", -- no dtd
186 "NFLRushingLeadersXML.dtd", -- no dtd
187 "NFLSackLeadersXML.dtd", -- no dtd
188 "nflstandxml.dtd", -- no dtd
189 "NFLTackleFFLeadersXML.dtd", -- no dtd
190 "NFLTeamRankingsXML.dtd", -- no dtd
191 "NFLTopKickoffReturnXML.dtd", -- no dtd
192 "NFLTopPerformanceXML.dtd", -- no dtd
193 "NFLTopPuntReturnXML.dtd", -- no dtd
194 "NFLTotalYardageXML.dtd", -- no dtd
195 "NFLYardsXML.dtd", -- no dtd
196 "NFL_KickingLeaders_XML.dtd", -- no dtd
197 "NFL_NBA_Draft_XML.dtd", -- no dtd
198 "NFL_PuntingLeaders_XML.dtd", -- no dtd
199 "NFL_Roster_XML.dtd", -- no dtd
200 "NFL_Team_Stats_XML.dtd", -- no dtd
201 "Transactions_XML.dtd", -- no dtd
202 "Weekly_Sched_XML.dtd", -- no dtd
203 "WNBA_Team_Leaders_XML.dtd", -- no dtd
204 "WNBA3PPctXML.dtd", -- no dtd
205 "WNBAAssistsXML.dtd", -- no dtd
206 "WNBABlocksXML.dtd", -- no dtd
207 "WNBAFGPctXML.dtd", -- no dtd
208 "WNBAFoulsXML.dtd", -- no dtd
209 "WNBAFTPctXML.dtd", -- no dtd
210 "WNBAMinutesXML.dtd", -- no dtd
211 "WNBAReboundsXML.dtd", -- no dtd
212 "WNBAScorersXML.dtd", -- no dtd
213 "wnbastandxml.dtd", -- no dtd
214 "WNBAStealsXML.dtd", -- no dtd
215 "WNBATurnoversXML.dtd" -- no dtd
216 ]
217
218
219 -- | XML representation of a SportInfo \<message\>.
220 --
221 data Message =
222 Message {
223 xml_dtd :: String,
224 xml_xml_file_id :: Int,
225 xml_time_stamp :: UTCTime,
226 xml_xml :: String }
227 deriving (Eq, Show)
228
229
230 -- | Attempt to parse a 'Message' from an 'XmlTree'. If we cannot,
231 -- we fail with an error message.
232 --
233 parse_xml :: String -> XmlTree -> Either String Message
234 parse_xml dtdname xmltree = do
235 xmlfid <- parse_xmlfid xmltree
236 timestamp <- parse_xml_time_stamp xmltree
237 message <- parse_message xmltree
238 return $ Message dtdname (fromInteger xmlfid) timestamp (xshow [message])
239
240
241 -- | Database representation of a 'Message'.
242 --
243 data SportInfo =
244 SportInfo {
245 db_dtd :: String,
246 db_xml_file_id :: Int,
247 db_time_stamp :: UTCTime,
248 db_xml :: String }
249
250
251 instance ToDb Message where
252 -- | The database analogue of a 'Message' is an 'SportInfo'.
253 type Db Message = SportInfo
254
255 instance FromXml Message where
256 -- | The XML to DB conversion is trivial here.
257 --
258 from_xml Message{..} = SportInfo {
259 db_dtd = xml_dtd,
260 db_xml_file_id = xml_xml_file_id,
261 db_time_stamp = xml_time_stamp,
262 db_xml = xml_xml }
263
264
265 -- | This allows us to insert the XML representation 'Message'
266 -- directly.
267 --
268 instance XmlImport Message
269
270
271 --
272 -- Database code
273 --
274
275 instance DbImport Message where
276 dbmigrate _ =
277 run_dbmigrate $ migrate (undefined :: SportInfo)
278
279 -- | We import a 'Message' by inserting the whole thing at
280 -- once. Nothing fancy going on here.
281 dbimport msg = do
282 insert_xml_ msg
283 return ImportSucceeded
284
285
286 -- | The database schema for SportInfo is trivial; all we need is for
287 -- the XML_File_ID to be unique.
288 --
289 mkPersist tsn_codegen_config [groundhog|
290 - entity: SportInfo
291 constructors:
292 - name: SportInfo
293 uniques:
294 - name: unique_sport_info
295 type: constraint
296 # Prevent multiple imports of the same message.
297 fields: [db_xml_file_id]
298 |]
299
300
301 --
302 -- Tasty Tests
303 --
304
305 -- | A list of all tests for this module.
306 --
307 sport_info_tests :: TestTree
308 sport_info_tests =
309 testGroup
310 "SportInfo tests"
311 [ test_parse_xml_succeeds,
312 test_dbimport_succeeds ]
313
314
315 -- | Sample XML documents for SportInfo types.
316 --
317 sport_info_test_files :: [FilePath]
318 sport_info_test_files =
319 map ("test/xml/sportinfo/" ++) [
320 "CBASK_3PPctXML.xml",
321 "Cbask_All_Tourn_Teams_XML.xml",
322 "CBASK_AssistsXML.xml",
323 "Cbask_Awards_XML.xml",
324 "CBASK_BlocksXML.xml",
325 "Cbask_Conf_Standings_XML.xml",
326 "Cbask_DivII_III_Indv_Stats_XML.xml",
327 "Cbask_DivII_Team_Stats_XML.xml",
328 "Cbask_DivIII_Team_Stats_XML.xml",
329 "CBASK_FGPctXML.xml",
330 "CBASK_FoulsXML.xml",
331 "CBASK_FTPctXML.xml",
332 "Cbask_Indv_Scoring_XML.xml",
333 "CBASK_MinutesXML.xml",
334 "Cbask_Polls_XML.xml",
335 "CBASK_ReboundsXML.xml",
336 "CBASK_ScoringLeadersXML.xml",
337 "Cbask_Team_ThreePT_Made_XML.xml",
338 "Cbask_Team_ThreePT_PCT_XML.xml",
339 "Cbask_Team_Win_Pct_XML.xml",
340 "Cbask_Top_Twenty_Five_XML.xml",
341 "CBASK_TopTwentyFiveResult_XML.xml",
342 "Cbask_Tourn_Awards_XML.xml",
343 "Cbask_Tourn_Champs_XML.xml",
344 "Cbask_Tourn_Indiv_XML.xml",
345 "Cbask_Tourn_Leaders_XML.xml",
346 "Cbask_Tourn_MVP_XML.xml",
347 "Cbask_Tourn_Records_XML.xml",
348 "LeagueScheduleXML.xml",
349 "minorscoresxml.xml",
350 "Minor_Baseball_League_Leaders_XML.xml",
351 "Minor_Baseball_Standings_XML.xml",
352 "Minor_Baseball_Transactions_XML.xml",
353 "mlbbattingavgxml.xml",
354 "mlbdoublesleadersxml.xml",
355 "MLBGamesPlayedXML.xml",
356 "MLBGIDPXML.xml",
357 "MLBHitByPitchXML.xml",
358 "mlbhitsleadersxml.xml",
359 "mlbhomerunsxml.xml",
360 "MLBHRFreqXML.xml",
361 "MLBIntWalksXML.xml",
362 "MLBKORateXML.xml",
363 "mlbonbasepctxml.xml",
364 "MLBOPSXML.xml",
365 "MLBPlateAppsXML.xml",
366 "mlbrbisxml.xml",
367 "mlbrunsleadersxml.xml",
368 "MLBSacFliesXML.xml",
369 "MLBSacrificesXML.xml",
370 "MLBSBSuccessXML.xml",
371 "mlbsluggingpctxml.xml",
372 "mlbstandxml.xml",
373 "mlbstandxml_preseason.xml",
374 "mlbstolenbasexml.xml",
375 "mlbtotalbasesleadersxml.xml",
376 "mlbtriplesleadersxml.xml",
377 "MLBWalkRateXML.xml",
378 "mlbwalksleadersxml.xml",
379 "MLBXtraBaseHitsXML.xml",
380 "MLB_ERA_Leaders.xml",
381 "MLB_Pitching_Appearances_Leaders.xml",
382 "MLB_Pitching_Balks_Leaders.xml",
383 "MLB_Pitching_CG_Leaders.xml",
384 "MLB_Pitching_ER_Allowed_Leaders.xml",
385 "MLB_Pitching_Hits_Allowed_Leaders.xml",
386 "MLB_Pitching_Hit_Batters_Leaders.xml",
387 "MLB_Pitching_HR_Allowed_Leaders.xml",
388 "MLB_Pitching_IP_Leaders.xml"
389 ]
390
391
392
393 -- | Make sure we can parse every element of 'sport_info_test_files'.
394 --
395 test_parse_xml_succeeds :: TestTree
396 test_parse_xml_succeeds =
397 testGroup "parse_xml" $ map check sport_info_test_files
398 where
399 check t = testCase t $ do
400 x <- unsafe_read_document t
401 let result = parse_xml "dummy" x
402 let actual = case result of -- isRight appears in base-4.7
403 Left _ -> False
404 Right _ -> True
405 let expected = True
406 actual @?= expected
407
408
409 -- | Ensure that each element of 'sport_info_test_files' can be imported
410 -- by counting the total number of database records (after
411 -- importing) and comparing it against the length of
412 -- 'sport_info_test_files'.
413 --
414 test_dbimport_succeeds :: TestTree
415 test_dbimport_succeeds = testCase "dbimport succeeds" $ do
416 xmltrees <- mapM unsafe_read_document sport_info_test_files
417 let msgs = rights $ map (parse_xml "dummy") xmltrees
418 actual <- withSqliteConn ":memory:" $ runDbConn $ do
419 runMigration silentMigrationLogger $ do
420 migrate (undefined :: SportInfo)
421 mapM_ dbimport msgs
422 countAll (undefined :: SportInfo)
423
424 actual @?= expected
425 where
426 expected = length sport_info_test_files