]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/TSN/XML/SportInfo.hs
Add SportInfo support for Cbask_Team_Scoring_XML.dtd.
[dead/htsn-import.git] / src / TSN / XML / SportInfo.hs
1 {-# LANGUAGE FlexibleInstances #-}
2 {-# LANGUAGE GADTs #-}
3 {-# LANGUAGE QuasiQuotes #-}
4 {-# LANGUAGE RecordWildCards #-}
5 {-# LANGUAGE TemplateHaskell #-}
6 {-# LANGUAGE TypeFamilies #-}
7
8 -- | SportInfo represents a collection of DTDs that we don't really
9 -- handle but want to make available. The raw XML gets stored in the
10 -- database along with the XML_File_ID, but we don't parse any of it.
11 --
12 -- This is almost completely redundant with "TSN.XML.GameInfo", but
13 -- the redundancy is necessary: we need separate message types so
14 -- that we can have separate 'DbImport' instances. It would take
15 -- more code/work to abstract (if it's even possible) than to
16 -- duplicate.
17 --
18 module TSN.XML.SportInfo (
19 dtds,
20 parse_xml,
21 sport_info_tests,
22 -- * WARNING: these are private but exported to silence warnings
23 SportInfoConstructor(..) )
24 where
25
26 -- System imports.
27 import Data.Either ( rights )
28 import Data.String.Utils ( replace )
29 import Data.Time.Clock ( UTCTime )
30 import Database.Groundhog (
31 countAll,
32 insert_,
33 migrate,
34 runMigration,
35 silentMigrationLogger )
36 import Database.Groundhog.Generic ( runDbConn )
37 import Database.Groundhog.Sqlite ( withSqliteConn )
38 import Database.Groundhog.TH (
39 defaultCodegenConfig,
40 groundhog,
41 mkPersist )
42 import Test.Tasty ( TestTree, testGroup )
43 import Test.Tasty.HUnit ( (@?=), testCase )
44 import Text.XML.HXT.Core ( XmlTree )
45 import Text.XML.HXT.DOM.ShowXml ( xshow )
46
47 -- Local imports.
48 import TSN.DbImport (
49 DbImport(..),
50 ImportResult(..),
51 run_dbmigrate )
52 import TSN.Parse (
53 ParseError,
54 parse_message,
55 parse_xmlfid,
56 parse_xml_time_stamp )
57 import Xml ( unsafe_read_document )
58
59
60 -- | The DTDs for everything that we consider \"Sport Info.\"
61 --
62 dtds :: [String]
63 dtds =
64 [ "CBASK_3PPctXML.dtd",
65 "Cbask_All_Tourn_Teams_XML.dtd",
66 "CBASK_AssistsXML.dtd",
67 "Cbask_Awards_XML.dtd",
68 "CBASK_BlocksXML.dtd",
69 "Cbask_Conf_Standings_XML.dtd",
70 "Cbask_DivII_III_Indv_Stats_XML.dtd",
71 "Cbask_DivII_Team_Stats_XML.dtd",
72 "Cbask_DivIII_Team_Stats_XML.dtd",
73 "CBASK_FGPctXML.dtd",
74 "CBASK_FoulsXML.dtd",
75 "CBASK_FTPctXML.dtd",
76 "Cbask_Indv_No_Avg_XML.dtd",
77 "Cbask_Indv_Scoring_XML.dtd",
78 "Cbask_Indv_Shooting_XML.dtd",
79 "CBASK_MinutesXML.dtd",
80 "Cbask_Polls_XML.dtd",
81 "CBASK_ReboundsXML.dtd",
82 "CBASK_ScoringLeadersXML.dtd",
83 "Cbask_Team_Scoring_Rebound_Margin_XML.dtd",
84 "Cbask_Team_Scoring_XML.dtd",
85 "Cbask_Team_ThreePT_Made_XML.dtd",
86 "Cbask_Team_ThreePT_PCT_XML.dtd",
87 "Cbask_Team_Win_Pct_XML.dtd",
88 "Cbask_Top_Twenty_Five_XML.dtd",
89 "CBASK_TopTwentyFiveResult_XML.dtd",
90 "Cbask_Tourn_Awards_XML.dtd",
91 "Cbask_Tourn_Champs_XML.dtd",
92 "Cbask_Tourn_Indiv_XML.dtd",
93 "Cbask_Tourn_Leaders_XML.dtd",
94 "Cbask_Tourn_MVP_XML.dtd",
95 "Cbask_Tourn_Records_XML.dtd",
96 "LeagueScheduleXML.dtd",
97 "minorscoresxml.dtd",
98 "Minor_Baseball_League_Leaders_XML.dtd",
99 "Minor_Baseball_Standings_XML.dtd",
100 "Minor_Baseball_Transactions_XML.dtd",
101 "mlbbattingavgxml.dtd",
102 "mlbdoublesleadersxml.dtd",
103 "MLBGamesPlayedXML.dtd",
104 "MLBGIDPXML.dtd",
105 "MLBHitByPitchXML.dtd",
106 "mlbhitsleadersxml.dtd",
107 "mlbhomerunsxml.dtd",
108 "MLBHRFreqXML.dtd",
109 "MLBIntWalksXML.dtd",
110 "MLBKORateXML.dtd",
111 "mlbonbasepctxml.dtd",
112 "MLBOPSXML.dtd",
113 "MLBPlateAppsXML.dtd",
114 "mlbrbisxml.dtd",
115 "mlbrunsleadersxml.dtd",
116 "MLBSacFliesXML.dtd",
117 "MLBSacrificesXML.dtd",
118 "MLBSBSuccessXML.dtd",
119 "mlbsluggingpctxml.dtd",
120 "mlbstandxml.dtd",
121 "mlbstandxml_preseason.dtd",
122 "mlbstolenbasexml.dtd",
123 "mlbtotalbasesleadersxml.dtd",
124 "mlbtriplesleadersxml.dtd",
125 "MLBWalkRateXML.dtd",
126 "mlbwalksleadersxml.dtd",
127 "MLBXtraBaseHitsXML.dtd",
128 "MLB_ERA_Leaders.dtd",
129 "MLB_Fielding_XML.dtd",
130 "MLB_Pitching_Appearances_Leaders.dtd",
131 "MLB_Pitching_Balks_Leaders.dtd",
132 "MLB_Pitching_CG_Leaders.dtd",
133 "MLB_Pitching_ER_Allowed_Leaders.dtd",
134 "MLB_Pitching_Hits_Allowed_Leaders.dtd",
135 "MLB_Pitching_Hit_Batters_Leaders.dtd",
136 "MLB_Pitching_HR_Allowed_Leaders.dtd",
137 "MLB_Pitching_IP_Leaders.dtd",
138 "MLB_Pitching_Runs_Allowed_Leaders.dtd",
139 "MLB_Pitching_Saves_Leaders.dtd",
140 "MLB_Pitching_Shut_Outs_Leaders.dtd",
141 "MLB_Pitching_Starts_Leaders.dtd",
142 "MLB_Pitching_Strike_Outs_Leaders.dtd",
143 "MLB_Pitching_Walks_Leaders.dtd",
144 "MLB_Pitching_WHIP_Leaders.dtd",
145 "MLB_Pitching_Wild_Pitches_Leaders.dtd",
146 "MLB_Pitching_Win_Percentage_Leaders.dtd",
147 "MLB_Pitching_WL_Leaders.dtd",
148 "NBA_Team_Stats_XML.dtd",
149 "NBA3PPctXML.dtd",
150 "NBAAssistsXML.dtd",
151 "NBABlocksXML.dtd",
152 "nbaconfrecxml.dtd",
153 "nbadaysxml.dtd",
154 "nbadivisionsxml.dtd",
155 "NBAFGPctXML.dtd",
156 "NBAFoulsXML.dtd",
157 "NBAFTPctXML.dtd",
158 "NBAMinutesXML.dtd",
159 "NBAReboundsXML.dtd",
160 "NBAScorersXML.dtd",
161 "nbastandxml.dtd",
162 "NBAStealsXML.dtd",
163 "nbateamleadersxml.dtd",
164 "nbatripledoublexml.dtd",
165 "NBATurnoversXML.dtd",
166 "NCAA_Conference_Schedule_XML.dtd",
167 "nflfirstdownxml.dtd",
168 "NFLFumbleLeaderXML.dtd",
169 "NFLGrassTurfDomeOutsideXML.dtd",
170 "NFLGiveTakeXML.dtd",
171 "NFLInside20XML.dtd",
172 "NFLInterceptionLeadersXML.dtd",
173 "NFLKickoffsXML.dtd",
174 "NFLMondayNightXML.dtd",
175 "NFLPassingLeadersXML.dtd",
176 "NFLPassLeadXML.dtd",
177 "NFLQBStartsXML.dtd",
178 "NFLReceivingLeadersXML.dtd",
179 "NFLRushingLeadersXML.dtd",
180 "NFLSackLeadersXML.dtd",
181 "nflstandxml.dtd",
182 "NFLTackleFFLeadersXML.dtd",
183 "NFLTeamRankingsXML.dtd",
184 "NFLTopKickoffReturnXML.dtd",
185 "NFLTopPerformanceXML.dtd",
186 "NFLTopPuntReturnXML.dtd",
187 "NFLTotalYardageXML.dtd",
188 "NFLYardsXML.dtd",
189 "NFL_KickingLeaders_XML.dtd",
190 "NFL_NBA_Draft_XML.dtd",
191 "NFL_PuntingLeaders_XML.dtd",
192 "NFL_Roster_XML.dtd",
193 "NFL_Team_Stats_XML.dtd",
194 "Transactions_XML.dtd",
195 "Weekly_Sched_XML.dtd",
196 "WNBA_Team_Leaders_XML.dtd",
197 "WNBA3PPctXML.dtd",
198 "WNBAAssistsXML.dtd",
199 "WNBABlocksXML.dtd",
200 "WNBAFGPctXML.dtd",
201 "WNBAFoulsXML.dtd",
202 "WNBAFTPctXML.dtd",
203 "WNBAMinutesXML.dtd",
204 "WNBAReboundsXML.dtd",
205 "WNBAScorersXML.dtd",
206 "wnbastandxml.dtd",
207 "WNBAStealsXML.dtd",
208 "WNBATurnoversXML.dtd" ]
209
210
211 -- | This serves as both the database and XML representation of a
212 -- SportInfo \<message\>.
213 --
214 data SportInfo =
215 SportInfo {
216 dtd :: String,
217 xml_file_id :: Int,
218 time_stamp :: UTCTime,
219 xml :: String }
220 deriving (Eq, Show)
221
222
223 -- | Attempt to parse a 'SportInfo' from an 'XmlTree'. If we cannot,
224 -- we fail with an error message.
225 --
226 parse_xml :: String -> XmlTree -> Either ParseError SportInfo
227 parse_xml dtdname xmltree = do
228 xmlfid <- parse_xmlfid xmltree
229 timestamp <- parse_xml_time_stamp xmltree
230 message <- parse_message xmltree
231 return $ SportInfo dtdname xmlfid timestamp (xshow [message])
232
233
234 --
235 -- Database code
236 --
237
238 instance DbImport SportInfo where
239 dbmigrate _ =
240 run_dbmigrate $ migrate (undefined :: SportInfo)
241
242 -- | We import a 'SportInfo' by inserting the whole thing at
243 -- once. Nothing fancy going on here.
244 dbimport msg = do
245 insert_ msg
246 return ImportSucceeded
247
248
249 -- | The database schema for SportInfo is trivial; all we need is for
250 -- the XML_File_ID to be unique.
251 --
252 mkPersist defaultCodegenConfig [groundhog|
253 - entity: SportInfo
254 dbName: sport_info
255 constructors:
256 - name: SportInfo
257 uniques:
258 - name: unique_sport_info
259 type: constraint
260 # Prevent multiple imports of the same message.
261 fields: [xml_file_id]
262 |]
263
264
265 --
266 -- Tasty Tests
267 --
268
269 -- | A list of all tests for this module.
270 --
271 sport_info_tests :: TestTree
272 sport_info_tests =
273 testGroup
274 "SportInfo tests"
275 [ test_accessors,
276 test_parse_xml_succeeds,
277 test_dbimport_succeeds ]
278
279
280 -- | Make sure the accessors work and that we can parse one file. Ok,
281 -- so the real point of this is to make the unused fields (dtd, xml,
282 -- ...) warning go away without having to mangle the groundhog code.
283 --
284 test_accessors :: TestTree
285 test_accessors = testCase "we can access a parsed sport_info" $ do
286 xmltree <- unsafe_read_document "test/xml/sportinfo/wnbastandxml.xml"
287 let Right t = parse_xml "wnbastandxml.dtd" xmltree
288 let a1 = dtd t
289 let ex1 = "wnbastandxml.dtd"
290 let a2 = xml_file_id t
291 let ex2 = 2011
292 let a3 = show $ time_stamp t
293 let ex3 = "2009-09-27 19:50:00 UTC"
294 let a4 = take 9 (xml t)
295 let ex4 = "<message>"
296 let actual = (a1,a2,a3,a4)
297 let expected = (ex1,ex2,ex3,ex4)
298 actual @?= expected
299
300
301 -- | Sample XML documents for SportInfo types.
302 --
303 sport_info_test_files :: [FilePath]
304 sport_info_test_files =
305 map (change_suffix . add_path) dtds
306 where
307 add_path = ("test/xml/sportinfo/" ++ )
308 change_suffix = replace ".dtd" ".xml"
309
310
311 -- | Make sure we can parse every element of 'sport_info_test_files'.
312 --
313 test_parse_xml_succeeds :: TestTree
314 test_parse_xml_succeeds =
315 testGroup "parse_xml" $ map check sport_info_test_files
316 where
317 check t = testCase t $ do
318 x <- unsafe_read_document t
319 let result = parse_xml "dummy" x
320 let actual = case result of -- isRight appears in base-4.7
321 Left _ -> False
322 Right _ -> True
323 let expected = True
324 actual @?= expected
325
326
327 -- | Ensure that each element of 'sport_info_test_files' can be imported
328 -- by counting the total number of database records (after
329 -- importing) and comparing it against the length of
330 -- 'sport_info_test_files'.
331 --
332 test_dbimport_succeeds :: TestTree
333 test_dbimport_succeeds = testCase "dbimport succeeds" $ do
334 xmltrees <- mapM unsafe_read_document sport_info_test_files
335 let msgs = rights $ map (parse_xml "dummy") xmltrees
336 actual <- withSqliteConn ":memory:" $ runDbConn $ do
337 runMigration silentMigrationLogger $
338 migrate (undefined :: SportInfo)
339 mapM_ dbimport msgs
340 countAll (undefined :: SportInfo)
341
342 actual @?= expected
343 where
344 expected = length sport_info_test_files