]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/TSN/XML/SportInfo.hs
Add SportInfo support for Cbask_Indv_Shooting_XML.dtd.
[dead/htsn-import.git] / src / TSN / XML / SportInfo.hs
1 {-# LANGUAGE FlexibleInstances #-}
2 {-# LANGUAGE GADTs #-}
3 {-# LANGUAGE QuasiQuotes #-}
4 {-# LANGUAGE RecordWildCards #-}
5 {-# LANGUAGE TemplateHaskell #-}
6 {-# LANGUAGE TypeFamilies #-}
7
8 -- | SportInfo represents a collection of DTDs that we don't really
9 -- handle but want to make available. The raw XML gets stored in the
10 -- database along with the XML_File_ID, but we don't parse any of it.
11 --
12 -- This is almost completely redundant with "TSN.XML.GameInfo", but
13 -- the redundancy is necessary: we need separate message types so
14 -- that we can have separate 'DbImport' instances. It would take
15 -- more code/work to abstract (if it's even possible) than to
16 -- duplicate.
17 --
18 module TSN.XML.SportInfo (
19 dtds,
20 parse_xml,
21 sport_info_tests,
22 -- * WARNING: these are private but exported to silence warnings
23 SportInfoConstructor(..) )
24 where
25
26 -- System imports.
27 import Data.Either ( rights )
28 import Data.String.Utils ( replace )
29 import Data.Time.Clock ( UTCTime )
30 import Database.Groundhog (
31 countAll,
32 insert_,
33 migrate,
34 runMigration,
35 silentMigrationLogger )
36 import Database.Groundhog.Generic ( runDbConn )
37 import Database.Groundhog.Sqlite ( withSqliteConn )
38 import Database.Groundhog.TH (
39 defaultCodegenConfig,
40 groundhog,
41 mkPersist )
42 import Test.Tasty ( TestTree, testGroup )
43 import Test.Tasty.HUnit ( (@?=), testCase )
44 import Text.XML.HXT.Core ( XmlTree )
45 import Text.XML.HXT.DOM.ShowXml ( xshow )
46
47 -- Local imports.
48 import TSN.DbImport (
49 DbImport(..),
50 ImportResult(..),
51 run_dbmigrate )
52 import TSN.Parse (
53 ParseError,
54 parse_message,
55 parse_xmlfid,
56 parse_xml_time_stamp )
57 import Xml ( unsafe_read_document )
58
59
60 -- | The DTDs for everything that we consider \"Sport Info.\"
61 --
62 dtds :: [String]
63 dtds =
64 [ "CBASK_3PPctXML.dtd",
65 "Cbask_All_Tourn_Teams_XML.dtd",
66 "CBASK_AssistsXML.dtd",
67 "Cbask_Awards_XML.dtd",
68 "CBASK_BlocksXML.dtd",
69 "Cbask_Conf_Standings_XML.dtd",
70 "Cbask_DivII_III_Indv_Stats_XML.dtd",
71 "Cbask_DivII_Team_Stats_XML.dtd",
72 "Cbask_DivIII_Team_Stats_XML.dtd",
73 "CBASK_FGPctXML.dtd",
74 "CBASK_FoulsXML.dtd",
75 "CBASK_FTPctXML.dtd",
76 "Cbask_Indv_No_Avg_XML.dtd",
77 "Cbask_Indv_Scoring_XML.dtd",
78 "Cbask_Indv_Shooting_XML.dtd",
79 "CBASK_MinutesXML.dtd",
80 "Cbask_Polls_XML.dtd",
81 "CBASK_ReboundsXML.dtd",
82 "CBASK_ScoringLeadersXML.dtd",
83 "Cbask_Team_ThreePT_Made_XML.dtd",
84 "Cbask_Team_ThreePT_PCT_XML.dtd",
85 "Cbask_Team_Win_Pct_XML.dtd",
86 "Cbask_Top_Twenty_Five_XML.dtd",
87 "CBASK_TopTwentyFiveResult_XML.dtd",
88 "Cbask_Tourn_Awards_XML.dtd",
89 "Cbask_Tourn_Champs_XML.dtd",
90 "Cbask_Tourn_Indiv_XML.dtd",
91 "Cbask_Tourn_Leaders_XML.dtd",
92 "Cbask_Tourn_MVP_XML.dtd",
93 "Cbask_Tourn_Records_XML.dtd",
94 "LeagueScheduleXML.dtd",
95 "minorscoresxml.dtd",
96 "Minor_Baseball_League_Leaders_XML.dtd",
97 "Minor_Baseball_Standings_XML.dtd",
98 "Minor_Baseball_Transactions_XML.dtd",
99 "mlbbattingavgxml.dtd",
100 "mlbdoublesleadersxml.dtd",
101 "MLBGamesPlayedXML.dtd",
102 "MLBGIDPXML.dtd",
103 "MLBHitByPitchXML.dtd",
104 "mlbhitsleadersxml.dtd",
105 "mlbhomerunsxml.dtd",
106 "MLBHRFreqXML.dtd",
107 "MLBIntWalksXML.dtd",
108 "MLBKORateXML.dtd",
109 "mlbonbasepctxml.dtd",
110 "MLBOPSXML.dtd",
111 "MLBPlateAppsXML.dtd",
112 "mlbrbisxml.dtd",
113 "mlbrunsleadersxml.dtd",
114 "MLBSacFliesXML.dtd",
115 "MLBSacrificesXML.dtd",
116 "MLBSBSuccessXML.dtd",
117 "mlbsluggingpctxml.dtd",
118 "mlbstandxml.dtd",
119 "mlbstandxml_preseason.dtd",
120 "mlbstolenbasexml.dtd",
121 "mlbtotalbasesleadersxml.dtd",
122 "mlbtriplesleadersxml.dtd",
123 "MLBWalkRateXML.dtd",
124 "mlbwalksleadersxml.dtd",
125 "MLBXtraBaseHitsXML.dtd",
126 "MLB_ERA_Leaders.dtd",
127 "MLB_Fielding_XML.dtd",
128 "MLB_Pitching_Appearances_Leaders.dtd",
129 "MLB_Pitching_Balks_Leaders.dtd",
130 "MLB_Pitching_CG_Leaders.dtd",
131 "MLB_Pitching_ER_Allowed_Leaders.dtd",
132 "MLB_Pitching_Hits_Allowed_Leaders.dtd",
133 "MLB_Pitching_Hit_Batters_Leaders.dtd",
134 "MLB_Pitching_HR_Allowed_Leaders.dtd",
135 "MLB_Pitching_IP_Leaders.dtd",
136 "MLB_Pitching_Runs_Allowed_Leaders.dtd",
137 "MLB_Pitching_Saves_Leaders.dtd",
138 "MLB_Pitching_Shut_Outs_Leaders.dtd",
139 "MLB_Pitching_Starts_Leaders.dtd",
140 "MLB_Pitching_Strike_Outs_Leaders.dtd",
141 "MLB_Pitching_Walks_Leaders.dtd",
142 "MLB_Pitching_WHIP_Leaders.dtd",
143 "MLB_Pitching_Wild_Pitches_Leaders.dtd",
144 "MLB_Pitching_Win_Percentage_Leaders.dtd",
145 "MLB_Pitching_WL_Leaders.dtd",
146 "NBA_Team_Stats_XML.dtd",
147 "NBA3PPctXML.dtd",
148 "NBAAssistsXML.dtd",
149 "NBABlocksXML.dtd",
150 "nbaconfrecxml.dtd",
151 "nbadaysxml.dtd",
152 "nbadivisionsxml.dtd",
153 "NBAFGPctXML.dtd",
154 "NBAFoulsXML.dtd",
155 "NBAFTPctXML.dtd",
156 "NBAMinutesXML.dtd",
157 "NBAReboundsXML.dtd",
158 "NBAScorersXML.dtd",
159 "nbastandxml.dtd",
160 "NBAStealsXML.dtd",
161 "nbateamleadersxml.dtd",
162 "nbatripledoublexml.dtd",
163 "NBATurnoversXML.dtd",
164 "NCAA_Conference_Schedule_XML.dtd",
165 "nflfirstdownxml.dtd",
166 "NFLFumbleLeaderXML.dtd",
167 "NFLGrassTurfDomeOutsideXML.dtd",
168 "NFLGiveTakeXML.dtd",
169 "NFLInside20XML.dtd",
170 "NFLInterceptionLeadersXML.dtd",
171 "NFLKickoffsXML.dtd",
172 "NFLMondayNightXML.dtd",
173 "NFLPassingLeadersXML.dtd",
174 "NFLPassLeadXML.dtd",
175 "NFLQBStartsXML.dtd",
176 "NFLReceivingLeadersXML.dtd",
177 "NFLRushingLeadersXML.dtd",
178 "NFLSackLeadersXML.dtd",
179 "nflstandxml.dtd",
180 "NFLTackleFFLeadersXML.dtd",
181 "NFLTeamRankingsXML.dtd",
182 "NFLTopKickoffReturnXML.dtd",
183 "NFLTopPerformanceXML.dtd",
184 "NFLTopPuntReturnXML.dtd",
185 "NFLTotalYardageXML.dtd",
186 "NFLYardsXML.dtd",
187 "NFL_KickingLeaders_XML.dtd",
188 "NFL_NBA_Draft_XML.dtd",
189 "NFL_PuntingLeaders_XML.dtd",
190 "NFL_Roster_XML.dtd",
191 "NFL_Team_Stats_XML.dtd",
192 "Transactions_XML.dtd",
193 "Weekly_Sched_XML.dtd",
194 "WNBA_Team_Leaders_XML.dtd",
195 "WNBA3PPctXML.dtd",
196 "WNBAAssistsXML.dtd",
197 "WNBABlocksXML.dtd",
198 "WNBAFGPctXML.dtd",
199 "WNBAFoulsXML.dtd",
200 "WNBAFTPctXML.dtd",
201 "WNBAMinutesXML.dtd",
202 "WNBAReboundsXML.dtd",
203 "WNBAScorersXML.dtd",
204 "wnbastandxml.dtd",
205 "WNBAStealsXML.dtd",
206 "WNBATurnoversXML.dtd" ]
207
208
209 -- | This serves as both the database and XML representation of a
210 -- SportInfo \<message\>.
211 --
212 data SportInfo =
213 SportInfo {
214 dtd :: String,
215 xml_file_id :: Int,
216 time_stamp :: UTCTime,
217 xml :: String }
218 deriving (Eq, Show)
219
220
221 -- | Attempt to parse a 'SportInfo' from an 'XmlTree'. If we cannot,
222 -- we fail with an error message.
223 --
224 parse_xml :: String -> XmlTree -> Either ParseError SportInfo
225 parse_xml dtdname xmltree = do
226 xmlfid <- parse_xmlfid xmltree
227 timestamp <- parse_xml_time_stamp xmltree
228 message <- parse_message xmltree
229 return $ SportInfo dtdname xmlfid timestamp (xshow [message])
230
231
232 --
233 -- Database code
234 --
235
236 instance DbImport SportInfo where
237 dbmigrate _ =
238 run_dbmigrate $ migrate (undefined :: SportInfo)
239
240 -- | We import a 'SportInfo' by inserting the whole thing at
241 -- once. Nothing fancy going on here.
242 dbimport msg = do
243 insert_ msg
244 return ImportSucceeded
245
246
247 -- | The database schema for SportInfo is trivial; all we need is for
248 -- the XML_File_ID to be unique.
249 --
250 mkPersist defaultCodegenConfig [groundhog|
251 - entity: SportInfo
252 dbName: sport_info
253 constructors:
254 - name: SportInfo
255 uniques:
256 - name: unique_sport_info
257 type: constraint
258 # Prevent multiple imports of the same message.
259 fields: [xml_file_id]
260 |]
261
262
263 --
264 -- Tasty Tests
265 --
266
267 -- | A list of all tests for this module.
268 --
269 sport_info_tests :: TestTree
270 sport_info_tests =
271 testGroup
272 "SportInfo tests"
273 [ test_accessors,
274 test_parse_xml_succeeds,
275 test_dbimport_succeeds ]
276
277
278 -- | Make sure the accessors work and that we can parse one file. Ok,
279 -- so the real point of this is to make the unused fields (dtd, xml,
280 -- ...) warning go away without having to mangle the groundhog code.
281 --
282 test_accessors :: TestTree
283 test_accessors = testCase "we can access a parsed sport_info" $ do
284 xmltree <- unsafe_read_document "test/xml/sportinfo/wnbastandxml.xml"
285 let Right t = parse_xml "wnbastandxml.dtd" xmltree
286 let a1 = dtd t
287 let ex1 = "wnbastandxml.dtd"
288 let a2 = xml_file_id t
289 let ex2 = 2011
290 let a3 = show $ time_stamp t
291 let ex3 = "2009-09-27 19:50:00 UTC"
292 let a4 = take 9 (xml t)
293 let ex4 = "<message>"
294 let actual = (a1,a2,a3,a4)
295 let expected = (ex1,ex2,ex3,ex4)
296 actual @?= expected
297
298
299 -- | Sample XML documents for SportInfo types.
300 --
301 sport_info_test_files :: [FilePath]
302 sport_info_test_files =
303 map (change_suffix . add_path) dtds
304 where
305 add_path = ("test/xml/sportinfo/" ++ )
306 change_suffix = replace ".dtd" ".xml"
307
308
309 -- | Make sure we can parse every element of 'sport_info_test_files'.
310 --
311 test_parse_xml_succeeds :: TestTree
312 test_parse_xml_succeeds =
313 testGroup "parse_xml" $ map check sport_info_test_files
314 where
315 check t = testCase t $ do
316 x <- unsafe_read_document t
317 let result = parse_xml "dummy" x
318 let actual = case result of -- isRight appears in base-4.7
319 Left _ -> False
320 Right _ -> True
321 let expected = True
322 actual @?= expected
323
324
325 -- | Ensure that each element of 'sport_info_test_files' can be imported
326 -- by counting the total number of database records (after
327 -- importing) and comparing it against the length of
328 -- 'sport_info_test_files'.
329 --
330 test_dbimport_succeeds :: TestTree
331 test_dbimport_succeeds = testCase "dbimport succeeds" $ do
332 xmltrees <- mapM unsafe_read_document sport_info_test_files
333 let msgs = rights $ map (parse_xml "dummy") xmltrees
334 actual <- withSqliteConn ":memory:" $ runDbConn $ do
335 runMigration silentMigrationLogger $
336 migrate (undefined :: SportInfo)
337 mapM_ dbimport msgs
338 countAll (undefined :: SportInfo)
339
340 actual @?= expected
341 where
342 expected = length sport_info_test_files