]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/TSN/XML/SportInfo.hs
Add SportInfo support for MLBOPSXML.dtd.
[dead/htsn-import.git] / src / TSN / XML / SportInfo.hs
1 {-# LANGUAGE FlexibleInstances #-}
2 {-# LANGUAGE GADTs #-}
3 {-# LANGUAGE QuasiQuotes #-}
4 {-# LANGUAGE RecordWildCards #-}
5 {-# LANGUAGE TemplateHaskell #-}
6 {-# LANGUAGE TypeFamilies #-}
7
8 -- | SportInfo represents a collection of DTDs that we don't really
9 -- handle but want to make available. The raw XML gets stored in the
10 -- database along with the XML_File_ID, but we don't parse any of it.
11 --
12 -- This is almost completely redundant with "TSN.XML.GameInfo", but
13 -- the redundancy is necessary: we need separate 'Message' types so
14 -- that we can have separate 'DbImport' instances. It would take
15 -- more code/work to abstract (if it's even possible) than to
16 -- duplicate.
17 --
18 module TSN.XML.SportInfo (
19 dtds,
20 parse_xml,
21 sport_info_tests,
22 -- * WARNING: these are private but exported to silence warnings
23 SportInfoConstructor(..) )
24 where
25
26 -- System imports.
27 import Data.Either ( rights )
28 import Data.Time.Clock ( UTCTime )
29 import Database.Groundhog (
30 countAll,
31 migrate,
32 runMigration,
33 silentMigrationLogger )
34 import Database.Groundhog.Generic ( runDbConn )
35 import Database.Groundhog.Sqlite ( withSqliteConn )
36 import Database.Groundhog.TH (
37 groundhog,
38 mkPersist )
39 import Test.Tasty ( TestTree, testGroup )
40 import Test.Tasty.HUnit ( (@?=), testCase )
41 import Text.XML.HXT.Core ( XmlTree )
42 import Text.XML.HXT.DOM.ShowXml ( xshow )
43
44 -- Local imports.
45 import TSN.Codegen ( tsn_codegen_config )
46 import TSN.DbImport (
47 DbImport(..),
48 ImportResult(..),
49 run_dbmigrate )
50 import TSN.Parse (
51 parse_message,
52 parse_xmlfid,
53 parse_xml_time_stamp )
54 import TSN.XmlImport ( XmlImport(..) )
55 import Xml (
56 FromXml(..),
57 ToDb(..),
58 unsafe_read_document )
59
60
61 -- | The DTDs for everything that we consider \"Sport Info.\"
62 --
63 -- TODO: This is the list from the old implementation. We need to
64 -- make sure that we are really receiving XML for these DTDs
65 -- (i.e. the names are correct).
66 --
67 dtds :: [String]
68 dtds =
69 [ "CBASK_3PPctXML.dtd",
70 "Cbask_All_Tourn_Teams_XML.dtd",
71 "CBASK_AssistsXML.dtd",
72 "Cbask_Awards_XML.dtd",
73 "CBASK_BlocksXML.dtd",
74 "Cbask_Conf_Standings_XML.dtd",
75 "Cbask_DivII_III_Indv_Stats_XML.dtd",
76 "Cbask_DivII_Team_Stats_XML.dtd",
77 "Cbask_DivIII_Team_Stats_XML.dtd",
78 "CBASK_FGPctXML.dtd",
79 "CBASK_FoulsXML.dtd",
80 "CBASK_FTPctXML.dtd",
81 "Cbask_Indv_No_Avg_XML.dtd", -- no dtd
82 "Cbask_Indv_Scoring_XML.dtd",
83 "Cbask_Indv_Shooting_XML.dtd", -- no dtd
84 "CBASK_MinutesXML.dtd",
85 "Cbask_Polls_XML.dtd",
86 "CBASK_ReboundsXML.dtd",
87 "CBASK_ScoringLeadersXML.dtd",
88 "CBASK_StealsXML.dtd", -- no dtd
89 "Cbask_Team_Scoring_Rebound_Margin_XML.dtd", -- no dtd
90 "Cbask_Team_Scoring_XML.dtd", -- no dtd
91 "Cbask_Team_Shooting_Pct_XML.dtd", -- no dtd
92 "Cbask_Team_ThreePT_Made_XML.dtd",
93 "Cbask_Team_ThreePT_PCT_XML.dtd",
94 "Cbask_Team_Win_Pct_XML.dtd",
95 "Cbask_Top_Twenty_Five_XML.dtd",
96 "CBASK_TopTwentyFiveResult_XML.dtd",
97 "Cbask_Tourn_Awards_XML.dtd",
98 "Cbask_Tourn_Champs_XML.dtd",
99 "Cbask_Tourn_Indiv_XML.dtd",
100 "Cbask_Tourn_Leaders_XML.dtd",
101 "Cbask_Tourn_MVP_XML.dtd",
102 "Cbask_Tourn_Records_XML.dtd",
103 "LeagueScheduleXML.dtd",
104 "minorscoresxml.dtd",
105 "Minor_Baseball_League_Leaders_XML.dtd",
106 "Minor_Baseball_Standings_XML.dtd",
107 "Minor_Baseball_Transactions_XML.dtd",
108 "mlbbattingavgxml.dtd",
109 "mlbdoublesleadersxml.dtd",
110 "MLBGamesPlayedXML.dtd",
111 "MLBGIDPXML.dtd",
112 "MLBHitByPitchXML.dtd",
113 "mlbhitsleadersxml.dtd",
114 "mlbhomerunsxml.dtd",
115 "MLBHRFreqXML.dtd",
116 "MLBIntWalksXML.dtd",
117 "MLBKORateXML.dtd",
118 "mlbonbasepctxml.dtd",
119 "MLBOPSXML.dtd",
120 "MLBPlateAppsXML.dtd", -- no dtd
121 "mlbrbisxml.dtd", -- no dtd
122 "mlbrunsleadersxml.dtd", -- no dtd
123 "MLBSacFliesXML.dtd", -- no dtd
124 "MLBSacrificesXML.dtd", -- no dtd
125 "MLBSBSuccessXML.dtd", -- no dtd
126 "mlbsluggingpctxml.dtd", -- no dtd
127 "mlbstandxml.dtd", -- no dtd
128 "mlbstandxml_preseason.dtd", -- no dtd
129 "mlbstolenbasexml.dtd", -- no dtd
130 "mlbtotalbasesleadersxml.dtd", -- no dtd
131 "mlbtriplesleadersxml.dtd", -- no dtd
132 "MLBWalkRateXML.dtd", -- no dtd
133 "mlbwalksleadersxml.dtd", -- no dtd
134 "MLBXtraBaseHitsXML.dtd", -- no dtd
135 "MLB_ERA_Leaders.dtd", -- no dtd
136 "MLB_Fielding_XML.dtd", -- no dtd
137 "MLB_Pitching_Appearances_Leaders.dtd", -- no dtd
138 "MLB_Pitching_Balks_Leaders.dtd", -- no dtd
139 "MLB_Pitching_CG_Leaders.dtd", -- no dtd
140 "MLB_Pitching_ER_Allowed_Leaders.dtd", -- no dtd
141 "MLB_Pitching_Hits_Allowed_Leaders.dtd", -- no dtd
142 "MLB_Pitching_Hit_Batters_Leaders.dtd", -- no dtd
143 "MLB_Pitching_HR_Allowed_Leaders.dtd", -- no dtd
144 "MLB_Pitching_IP_Leaders.dtd", -- no dtd
145 "MLB_Pitching_Runs_Allowed_Leaders.dtd", -- no dtd
146 "MLB_Pitching_Saves_Leaders.dtd", -- no dtd
147 "MLB_Pitching_Shut_Outs_Leaders.dtd", -- no dtd
148 "MLB_Pitching_Starts_Leaders.dtd", -- no dtd
149 "MLB_Pitching_Strike_Outs_Leaders.dtd", -- no dtd
150 "MLB_Pitching_Walks_Leaders.dtd", -- no dtd
151 "MLB_Pitching_WHIP_Leaders.dtd", -- no dtd
152 "MLB_Pitching_Wild_Pitches_Leaders.dtd", -- no dtd
153 "MLB_Pitching_Win_Percentage_Leaders.dtd", -- no dtd
154 "MLB_Pitching_WL_Leaders.dtd", -- no dtd
155 "NBA_Team_Stats_XML.dtd", -- no dtd
156 "NBA3PPctXML.dtd", -- no dtd
157 "NBAAssistsXML.dtd", -- no dtd
158 "NBABlocksXML.dtd", -- no dtd
159 "nbaconfrecxml.dtd", -- no dtd
160 "nbadaysxml.dtd", -- no dtd
161 "nbadivisionsxml.dtd", -- no dtd
162 "NBAFGPctXML.dtd", -- no dtd
163 "NBAFoulsXML.dtd", -- no dtd
164 "NBAFTPctXML.dtd", -- no dtd
165 "NBAMinutesXML.dtd", -- no dtd
166 "NBAReboundsXML.dtd", -- no dtd
167 "NBAScorersXML.dtd", -- no dtd
168 "nbastandxml.dtd", -- no dtd
169 "NBAStealsXML.dtd", -- no dtd
170 "nbateamleadersxml.dtd", -- no dtd
171 "nbatripledoublexml.dtd", -- no dtd
172 "NBATurnoversXML.dtd", -- no dtd
173 "NCAA_Conference_Schedule_XML.dtd", -- no dtd
174 "nflfirstdownxml.dtd", -- no dtd
175 "NFLFumbleLeaderXML.dtd", -- no dtd
176 "NFLGiveTakeXML.dtd", -- no dtd
177 "NFLGrassTurfDomeOutsideXML.dtd", -- no dtd
178 "NFLInside20XML.dtd", -- no dtd
179 "NFLInterceptionLeadersXML.dtd", -- no dtd
180 "NFLKickoffsXML.dtd", -- no dtd
181 "NFLMondayNightXML.dtd", -- no dtd
182 "NFLPassingLeadersXML.dtd", -- no dtd
183 "NFLPassLeadXML.dtd", -- no dtd
184 "NFLQBStartsXML.dtd", -- no dtd
185 "NFLReceivingLeadersXML.dtd", -- no dtd
186 "NFLRushingLeadersXML.dtd", -- no dtd
187 "NFLSackLeadersXML.dtd", -- no dtd
188 "nflstandxml.dtd", -- no dtd
189 "NFLTackleFFLeadersXML.dtd", -- no dtd
190 "NFLTeamRankingsXML.dtd", -- no dtd
191 "NFLTopKickoffReturnXML.dtd", -- no dtd
192 "NFLTopPerformanceXML.dtd", -- no dtd
193 "NFLTopPuntReturnXML.dtd", -- no dtd
194 "NFLTotalYardageXML.dtd", -- no dtd
195 "NFLYardsXML.dtd", -- no dtd
196 "NFL_KickingLeaders_XML.dtd", -- no dtd
197 "NFL_NBA_Draft_XML.dtd", -- no dtd
198 "NFL_PuntingLeaders_XML.dtd", -- no dtd
199 "NFL_Roster_XML.dtd", -- no dtd
200 "NFL_Team_Stats_XML.dtd", -- no dtd
201 "Transactions_XML.dtd", -- no dtd
202 "Weekly_Sched_XML.dtd", -- no dtd
203 "WNBA_Team_Leaders_XML.dtd", -- no dtd
204 "WNBA3PPctXML.dtd", -- no dtd
205 "WNBAAssistsXML.dtd", -- no dtd
206 "WNBABlocksXML.dtd", -- no dtd
207 "WNBAFGPctXML.dtd", -- no dtd
208 "WNBAFoulsXML.dtd", -- no dtd
209 "WNBAFTPctXML.dtd", -- no dtd
210 "WNBAMinutesXML.dtd", -- no dtd
211 "WNBAReboundsXML.dtd", -- no dtd
212 "WNBAScorersXML.dtd", -- no dtd
213 "wnbastandxml.dtd", -- no dtd
214 "WNBAStealsXML.dtd", -- no dtd
215 "WNBATurnoversXML.dtd" -- no dtd
216 ]
217
218
219 -- | XML representation of a SportInfo \<message\>.
220 --
221 data Message =
222 Message {
223 xml_dtd :: String,
224 xml_xml_file_id :: Int,
225 xml_time_stamp :: UTCTime,
226 xml_xml :: String }
227 deriving (Eq, Show)
228
229
230 -- | Attempt to parse a 'Message' from an 'XmlTree'. If we cannot,
231 -- we fail with an error message.
232 --
233 parse_xml :: String -> XmlTree -> Either String Message
234 parse_xml dtdname xmltree = do
235 xmlfid <- parse_xmlfid xmltree
236 timestamp <- parse_xml_time_stamp xmltree
237 message <- parse_message xmltree
238 return $ Message dtdname (fromInteger xmlfid) timestamp (xshow [message])
239
240
241 -- | Database representation of a 'Message'.
242 --
243 data SportInfo =
244 SportInfo {
245 db_dtd :: String,
246 db_xml_file_id :: Int,
247 db_time_stamp :: UTCTime,
248 db_xml :: String }
249
250
251 instance ToDb Message where
252 -- | The database analogue of a 'Message' is an 'SportInfo'.
253 type Db Message = SportInfo
254
255 instance FromXml Message where
256 -- | The XML to DB conversion is trivial here.
257 --
258 from_xml Message{..} = SportInfo {
259 db_dtd = xml_dtd,
260 db_xml_file_id = xml_xml_file_id,
261 db_time_stamp = xml_time_stamp,
262 db_xml = xml_xml }
263
264
265 -- | This allows us to insert the XML representation 'Message'
266 -- directly.
267 --
268 instance XmlImport Message
269
270
271 --
272 -- Database code
273 --
274
275 instance DbImport Message where
276 dbmigrate _ =
277 run_dbmigrate $ migrate (undefined :: SportInfo)
278
279 -- | We import a 'Message' by inserting the whole thing at
280 -- once. Nothing fancy going on here.
281 dbimport msg = do
282 insert_xml_ msg
283 return ImportSucceeded
284
285
286 -- | The database schema for SportInfo is trivial; all we need is for
287 -- the XML_File_ID to be unique.
288 --
289 mkPersist tsn_codegen_config [groundhog|
290 - entity: SportInfo
291 constructors:
292 - name: SportInfo
293 uniques:
294 - name: unique_sport_info
295 type: constraint
296 # Prevent multiple imports of the same message.
297 fields: [db_xml_file_id]
298 |]
299
300
301 --
302 -- Tasty Tests
303 --
304
305 -- | A list of all tests for this module.
306 --
307 sport_info_tests :: TestTree
308 sport_info_tests =
309 testGroup
310 "SportInfo tests"
311 [ test_parse_xml_succeeds,
312 test_dbimport_succeeds ]
313
314
315 -- | Sample XML documents for SportInfo types.
316 --
317 sport_info_test_files :: [FilePath]
318 sport_info_test_files =
319 map ("test/xml/sportinfo/" ++) [
320 "CBASK_3PPctXML.xml",
321 "Cbask_All_Tourn_Teams_XML.xml",
322 "CBASK_AssistsXML.xml",
323 "Cbask_Awards_XML.xml",
324 "CBASK_BlocksXML.xml",
325 "Cbask_Conf_Standings_XML.xml",
326 "Cbask_DivII_III_Indv_Stats_XML.xml",
327 "Cbask_DivII_Team_Stats_XML.xml",
328 "Cbask_DivIII_Team_Stats_XML.xml",
329 "CBASK_FGPctXML.xml",
330 "CBASK_FoulsXML.xml",
331 "CBASK_FTPctXML.xml",
332 "Cbask_Indv_Scoring_XML.xml",
333 "CBASK_MinutesXML.xml",
334 "Cbask_Polls_XML.xml",
335 "CBASK_ReboundsXML.xml",
336 "CBASK_ScoringLeadersXML.xml",
337 "Cbask_Team_ThreePT_Made_XML.xml",
338 "Cbask_Team_ThreePT_PCT_XML.xml",
339 "Cbask_Team_Win_Pct_XML.xml",
340 "Cbask_Top_Twenty_Five_XML.xml",
341 "CBASK_TopTwentyFiveResult_XML.xml",
342 "Cbask_Tourn_Awards_XML.xml",
343 "Cbask_Tourn_Champs_XML.xml",
344 "Cbask_Tourn_Indiv_XML.xml",
345 "Cbask_Tourn_Leaders_XML.xml",
346 "Cbask_Tourn_MVP_XML.xml",
347 "Cbask_Tourn_Records_XML.xml",
348 "LeagueScheduleXML.xml",
349 "minorscoresxml.xml",
350 "Minor_Baseball_League_Leaders_XML.xml",
351 "Minor_Baseball_Standings_XML.xml",
352 "Minor_Baseball_Transactions_XML.xml",
353 "mlbbattingavgxml.xml",
354 "mlbdoublesleadersxml.xml",
355 "MLBGamesPlayedXML.xml",
356 "MLBGIDPXML.xml",
357 "MLBHitByPitchXML.xml",
358 "mlbhitsleadersxml.xml",
359 "mlbhomerunsxml.xml",
360 "MLBHRFreqXML.xml",
361 "MLBIntWalksXML.xml",
362 "MLBKORateXML.xml",
363 "mlbonbasepctxml.xml",
364 "MLBOPSXML.xml"
365 ]
366
367
368
369 -- | Make sure we can parse every element of 'sport_info_test_files'.
370 --
371 test_parse_xml_succeeds :: TestTree
372 test_parse_xml_succeeds =
373 testGroup "parse_xml" $ map check sport_info_test_files
374 where
375 check t = testCase t $ do
376 x <- unsafe_read_document t
377 let result = parse_xml "dummy" x
378 let actual = case result of -- isRight appears in base-4.7
379 Left _ -> False
380 Right _ -> True
381 let expected = True
382 actual @?= expected
383
384
385 -- | Ensure that each element of 'sport_info_test_files' can be imported
386 -- by counting the total number of database records (after
387 -- importing) and comparing it against the length of
388 -- 'sport_info_test_files'.
389 --
390 test_dbimport_succeeds :: TestTree
391 test_dbimport_succeeds = testCase "dbimport succeeds" $ do
392 xmltrees <- mapM unsafe_read_document sport_info_test_files
393 let msgs = rights $ map (parse_xml "dummy") xmltrees
394 actual <- withSqliteConn ":memory:" $ runDbConn $ do
395 runMigration silentMigrationLogger $ do
396 migrate (undefined :: SportInfo)
397 mapM_ dbimport msgs
398 countAll (undefined :: SportInfo)
399
400 actual @?= expected
401 where
402 expected = length sport_info_test_files