]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/TSN/XML/SportInfo.hs
1fcd3abb4ed91063c93786849d60429bc3a56e1b
[dead/htsn-import.git] / src / TSN / XML / SportInfo.hs
1 {-# LANGUAGE FlexibleInstances #-}
2 {-# LANGUAGE GADTs #-}
3 {-# LANGUAGE QuasiQuotes #-}
4 {-# LANGUAGE RecordWildCards #-}
5 {-# LANGUAGE TemplateHaskell #-}
6 {-# LANGUAGE TypeFamilies #-}
7
8 -- | SportInfo represents a collection of DTDs that we don't really
9 -- handle but want to make available. The raw XML gets stored in the
10 -- database along with the XML_File_ID, but we don't parse any of it.
11 --
12 -- This is almost completely redundant with "TSN.XML.GameInfo", but
13 -- the redundancy is necessary: we need separate message types so
14 -- that we can have separate 'DbImport' instances. It would take
15 -- more code/work to abstract (if it's even possible) than to
16 -- duplicate.
17 --
18 module TSN.XML.SportInfo (
19 dtds,
20 parse_xml,
21 sport_info_tests,
22 -- * WARNING: these are private but exported to silence warnings
23 SportInfoConstructor(..) )
24 where
25
26 -- System imports.
27 import Data.Either ( rights )
28 import Data.String.Utils ( replace )
29 import Data.Time.Clock ( UTCTime )
30 import Database.Groundhog (
31 countAll,
32 insert_,
33 migrate,
34 runMigration,
35 silentMigrationLogger )
36 import Database.Groundhog.Generic ( runDbConn )
37 import Database.Groundhog.Sqlite ( withSqliteConn )
38 import Database.Groundhog.TH (
39 defaultCodegenConfig,
40 groundhog,
41 mkPersist )
42 import Test.Tasty ( TestTree, testGroup )
43 import Test.Tasty.HUnit ( (@?=), testCase )
44 import Text.XML.HXT.Core ( XmlTree )
45 import Text.XML.HXT.DOM.ShowXml ( xshow )
46
47 -- Local imports.
48 import TSN.DbImport (
49 DbImport(..),
50 ImportResult(..),
51 run_dbmigrate )
52 import TSN.Parse (
53 ParseError,
54 parse_message,
55 parse_xmlfid,
56 parse_xml_time_stamp )
57 import Xml ( unsafe_read_document )
58
59
60 -- | The DTDs for everything that we consider \"Sport Info.\"
61 --
62 dtds :: [String]
63 dtds =
64 [ "CBASK_3PPctXML.dtd",
65 "Cbask_All_Tourn_Teams_XML.dtd",
66 "CBASK_AssistsXML.dtd",
67 "Cbask_Awards_XML.dtd",
68 "CBASK_BlocksXML.dtd",
69 "Cbask_Conf_Standings_XML.dtd",
70 "Cbask_DivII_III_Indv_Stats_XML.dtd",
71 "Cbask_DivII_Team_Stats_XML.dtd",
72 "Cbask_DivIII_Team_Stats_XML.dtd",
73 "CBASK_FGPctXML.dtd",
74 "CBASK_FoulsXML.dtd",
75 "CBASK_FTPctXML.dtd",
76 "Cbask_Indv_No_Avg_XML.dtd",
77 "Cbask_Indv_Scoring_XML.dtd",
78 "Cbask_Indv_Shooting_XML.dtd",
79 "CBASK_MinutesXML.dtd",
80 "Cbask_Polls_XML.dtd",
81 "CBASK_ReboundsXML.dtd",
82 "CBASK_ScoringLeadersXML.dtd",
83 "Cbask_Team_Scoring_Rebound_Margin_XML.dtd",
84 "Cbask_Team_Scoring_XML.dtd",
85 "Cbask_Team_Shooting_Pct_XML.dtd",
86 "Cbask_Team_ThreePT_Made_XML.dtd",
87 "Cbask_Team_ThreePT_PCT_XML.dtd",
88 "Cbask_Team_Win_Pct_XML.dtd",
89 "Cbask_Top_Twenty_Five_XML.dtd",
90 "CBASK_TopTwentyFiveResult_XML.dtd",
91 "Cbask_Tourn_Awards_XML.dtd",
92 "Cbask_Tourn_Champs_XML.dtd",
93 "Cbask_Tourn_Indiv_XML.dtd",
94 "Cbask_Tourn_Leaders_XML.dtd",
95 "Cbask_Tourn_MVP_XML.dtd",
96 "Cbask_Tourn_Records_XML.dtd",
97 "LeagueScheduleXML.dtd",
98 "minorscoresxml.dtd",
99 "Minor_Baseball_League_Leaders_XML.dtd",
100 "Minor_Baseball_Standings_XML.dtd",
101 "Minor_Baseball_Transactions_XML.dtd",
102 "mlbbattingavgxml.dtd",
103 "mlbdoublesleadersxml.dtd",
104 "MLBGamesPlayedXML.dtd",
105 "MLBGIDPXML.dtd",
106 "MLBHitByPitchXML.dtd",
107 "mlbhitsleadersxml.dtd",
108 "mlbhomerunsxml.dtd",
109 "MLBHRFreqXML.dtd",
110 "MLBIntWalksXML.dtd",
111 "MLBKORateXML.dtd",
112 "mlbonbasepctxml.dtd",
113 "MLBOPSXML.dtd",
114 "MLBPlateAppsXML.dtd",
115 "mlbrbisxml.dtd",
116 "mlbrunsleadersxml.dtd",
117 "MLBSacFliesXML.dtd",
118 "MLBSacrificesXML.dtd",
119 "MLBSBSuccessXML.dtd",
120 "mlbsluggingpctxml.dtd",
121 "mlbstandxml.dtd",
122 "mlbstandxml_preseason.dtd",
123 "mlbstolenbasexml.dtd",
124 "mlbtotalbasesleadersxml.dtd",
125 "mlbtriplesleadersxml.dtd",
126 "MLBWalkRateXML.dtd",
127 "mlbwalksleadersxml.dtd",
128 "MLBXtraBaseHitsXML.dtd",
129 "MLB_ERA_Leaders.dtd",
130 "MLB_Fielding_XML.dtd",
131 "MLB_Pitching_Appearances_Leaders.dtd",
132 "MLB_Pitching_Balks_Leaders.dtd",
133 "MLB_Pitching_CG_Leaders.dtd",
134 "MLB_Pitching_ER_Allowed_Leaders.dtd",
135 "MLB_Pitching_Hits_Allowed_Leaders.dtd",
136 "MLB_Pitching_Hit_Batters_Leaders.dtd",
137 "MLB_Pitching_HR_Allowed_Leaders.dtd",
138 "MLB_Pitching_IP_Leaders.dtd",
139 "MLB_Pitching_Runs_Allowed_Leaders.dtd",
140 "MLB_Pitching_Saves_Leaders.dtd",
141 "MLB_Pitching_Shut_Outs_Leaders.dtd",
142 "MLB_Pitching_Starts_Leaders.dtd",
143 "MLB_Pitching_Strike_Outs_Leaders.dtd",
144 "MLB_Pitching_Walks_Leaders.dtd",
145 "MLB_Pitching_WHIP_Leaders.dtd",
146 "MLB_Pitching_Wild_Pitches_Leaders.dtd",
147 "MLB_Pitching_Win_Percentage_Leaders.dtd",
148 "MLB_Pitching_WL_Leaders.dtd",
149 "NBA_Team_Stats_XML.dtd",
150 "NBA3PPctXML.dtd",
151 "NBAAssistsXML.dtd",
152 "NBABlocksXML.dtd",
153 "nbaconfrecxml.dtd",
154 "nbadaysxml.dtd",
155 "nbadivisionsxml.dtd",
156 "NBAFGPctXML.dtd",
157 "NBAFoulsXML.dtd",
158 "NBAFTPctXML.dtd",
159 "NBAMinutesXML.dtd",
160 "NBAReboundsXML.dtd",
161 "NBAScorersXML.dtd",
162 "nbastandxml.dtd",
163 "NBAStealsXML.dtd",
164 "nbateamleadersxml.dtd",
165 "nbatripledoublexml.dtd",
166 "NBATurnoversXML.dtd",
167 "NCAA_Conference_Schedule_XML.dtd",
168 "nflfirstdownxml.dtd",
169 "NFLFumbleLeaderXML.dtd",
170 "NFLGrassTurfDomeOutsideXML.dtd",
171 "NFLGiveTakeXML.dtd",
172 "NFLInside20XML.dtd",
173 "NFLInterceptionLeadersXML.dtd",
174 "NFLKickoffsXML.dtd",
175 "NFLMondayNightXML.dtd",
176 "NFLPassingLeadersXML.dtd",
177 "NFLPassLeadXML.dtd",
178 "NFLQBStartsXML.dtd",
179 "NFLReceivingLeadersXML.dtd",
180 "NFLRushingLeadersXML.dtd",
181 "NFLSackLeadersXML.dtd",
182 "nflstandxml.dtd",
183 "NFLTackleFFLeadersXML.dtd",
184 "NFLTeamRankingsXML.dtd",
185 "NFLTopKickoffReturnXML.dtd",
186 "NFLTopPerformanceXML.dtd",
187 "NFLTopPuntReturnXML.dtd",
188 "NFLTotalYardageXML.dtd",
189 "NFLYardsXML.dtd",
190 "NFL_KickingLeaders_XML.dtd",
191 "NFL_NBA_Draft_XML.dtd",
192 "NFL_PuntingLeaders_XML.dtd",
193 "NFL_Roster_XML.dtd",
194 "NFL_Team_Stats_XML.dtd",
195 "Transactions_XML.dtd",
196 "Weekly_Sched_XML.dtd",
197 "WNBA_Team_Leaders_XML.dtd",
198 "WNBA3PPctXML.dtd",
199 "WNBAAssistsXML.dtd",
200 "WNBABlocksXML.dtd",
201 "WNBAFGPctXML.dtd",
202 "WNBAFoulsXML.dtd",
203 "WNBAFTPctXML.dtd",
204 "WNBAMinutesXML.dtd",
205 "WNBAReboundsXML.dtd",
206 "WNBAScorersXML.dtd",
207 "wnbastandxml.dtd",
208 "WNBAStealsXML.dtd",
209 "WNBATurnoversXML.dtd" ]
210
211
212 -- | This serves as both the database and XML representation of a
213 -- SportInfo \<message\>.
214 --
215 data SportInfo =
216 SportInfo {
217 dtd :: String,
218 xml_file_id :: Int,
219 time_stamp :: UTCTime,
220 xml :: String }
221 deriving (Eq, Show)
222
223
224 -- | Attempt to parse a 'SportInfo' from an 'XmlTree'. If we cannot,
225 -- we fail with an error message.
226 --
227 parse_xml :: String -> XmlTree -> Either ParseError SportInfo
228 parse_xml dtdname xmltree = do
229 xmlfid <- parse_xmlfid xmltree
230 timestamp <- parse_xml_time_stamp xmltree
231 message <- parse_message xmltree
232 return $ SportInfo dtdname xmlfid timestamp (xshow [message])
233
234
235 --
236 -- Database code
237 --
238
239 instance DbImport SportInfo where
240 dbmigrate _ =
241 run_dbmigrate $ migrate (undefined :: SportInfo)
242
243 -- | We import a 'SportInfo' by inserting the whole thing at
244 -- once. Nothing fancy going on here.
245 dbimport msg = do
246 insert_ msg
247 return ImportSucceeded
248
249
250 -- | The database schema for SportInfo is trivial; all we need is for
251 -- the XML_File_ID to be unique.
252 --
253 mkPersist defaultCodegenConfig [groundhog|
254 - entity: SportInfo
255 dbName: sport_info
256 constructors:
257 - name: SportInfo
258 uniques:
259 - name: unique_sport_info
260 type: constraint
261 # Prevent multiple imports of the same message.
262 fields: [xml_file_id]
263 |]
264
265
266 --
267 -- Tasty Tests
268 --
269
270 -- | A list of all tests for this module.
271 --
272 sport_info_tests :: TestTree
273 sport_info_tests =
274 testGroup
275 "SportInfo tests"
276 [ test_accessors,
277 test_parse_xml_succeeds,
278 test_dbimport_succeeds ]
279
280
281 -- | Make sure the accessors work and that we can parse one file. Ok,
282 -- so the real point of this is to make the unused fields (dtd, xml,
283 -- ...) warning go away without having to mangle the groundhog code.
284 --
285 test_accessors :: TestTree
286 test_accessors = testCase "we can access a parsed sport_info" $ do
287 xmltree <- unsafe_read_document "test/xml/sportinfo/wnbastandxml.xml"
288 let Right t = parse_xml "wnbastandxml.dtd" xmltree
289 let a1 = dtd t
290 let ex1 = "wnbastandxml.dtd"
291 let a2 = xml_file_id t
292 let ex2 = 2011
293 let a3 = show $ time_stamp t
294 let ex3 = "2009-09-27 19:50:00 UTC"
295 let a4 = take 9 (xml t)
296 let ex4 = "<message>"
297 let actual = (a1,a2,a3,a4)
298 let expected = (ex1,ex2,ex3,ex4)
299 actual @?= expected
300
301
302 -- | Sample XML documents for SportInfo types.
303 --
304 sport_info_test_files :: [FilePath]
305 sport_info_test_files =
306 map (change_suffix . add_path) dtds
307 where
308 add_path = ("test/xml/sportinfo/" ++ )
309 change_suffix = replace ".dtd" ".xml"
310
311
312 -- | Make sure we can parse every element of 'sport_info_test_files'.
313 --
314 test_parse_xml_succeeds :: TestTree
315 test_parse_xml_succeeds =
316 testGroup "parse_xml" $ map check sport_info_test_files
317 where
318 check t = testCase t $ do
319 x <- unsafe_read_document t
320 let result = parse_xml "dummy" x
321 let actual = case result of -- isRight appears in base-4.7
322 Left _ -> False
323 Right _ -> True
324 let expected = True
325 actual @?= expected
326
327
328 -- | Ensure that each element of 'sport_info_test_files' can be imported
329 -- by counting the total number of database records (after
330 -- importing) and comparing it against the length of
331 -- 'sport_info_test_files'.
332 --
333 test_dbimport_succeeds :: TestTree
334 test_dbimport_succeeds = testCase "dbimport succeeds" $ do
335 xmltrees <- mapM unsafe_read_document sport_info_test_files
336 let msgs = rights $ map (parse_xml "dummy") xmltrees
337 actual <- withSqliteConn ":memory:" $ runDbConn $ do
338 runMigration silentMigrationLogger $
339 migrate (undefined :: SportInfo)
340 mapM_ dbimport msgs
341 countAll (undefined :: SportInfo)
342
343 actual @?= expected
344 where
345 expected = length sport_info_test_files