]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/TSN/XML/SportInfo.hs
Add SportInfo support for NFLInterceptionLeadersXML.dtd.
[dead/htsn-import.git] / src / TSN / XML / SportInfo.hs
1 {-# LANGUAGE FlexibleInstances #-}
2 {-# LANGUAGE GADTs #-}
3 {-# LANGUAGE QuasiQuotes #-}
4 {-# LANGUAGE RecordWildCards #-}
5 {-# LANGUAGE TemplateHaskell #-}
6 {-# LANGUAGE TypeFamilies #-}
7
8 -- | SportInfo represents a collection of DTDs that we don't really
9 -- handle but want to make available. The raw XML gets stored in the
10 -- database along with the XML_File_ID, but we don't parse any of it.
11 --
12 -- This is almost completely redundant with "TSN.XML.GameInfo", but
13 -- the redundancy is necessary: we need separate message types so
14 -- that we can have separate 'DbImport' instances. It would take
15 -- more code/work to abstract (if it's even possible) than to
16 -- duplicate.
17 --
18 module TSN.XML.SportInfo (
19 dtds,
20 parse_xml,
21 sport_info_tests,
22 -- * WARNING: these are private but exported to silence warnings
23 SportInfoConstructor(..) )
24 where
25
26 -- System imports.
27 import Data.Either ( rights )
28 import Data.String.Utils ( replace )
29 import Data.Time.Clock ( UTCTime )
30 import Database.Groundhog (
31 countAll,
32 insert_,
33 migrate,
34 runMigration,
35 silentMigrationLogger )
36 import Database.Groundhog.Generic ( runDbConn )
37 import Database.Groundhog.Sqlite ( withSqliteConn )
38 import Database.Groundhog.TH (
39 defaultCodegenConfig,
40 groundhog,
41 mkPersist )
42 import Test.Tasty ( TestTree, testGroup )
43 import Test.Tasty.HUnit ( (@?=), testCase )
44 import Text.XML.HXT.Core ( XmlTree )
45 import Text.XML.HXT.DOM.ShowXml ( xshow )
46
47 -- Local imports.
48 import TSN.DbImport (
49 DbImport(..),
50 ImportResult(..),
51 run_dbmigrate )
52 import TSN.Parse (
53 ParseError,
54 parse_message,
55 parse_xmlfid,
56 parse_xml_time_stamp )
57 import Xml ( unsafe_read_document )
58
59
60 -- | The DTDs for everything that we consider \"Sport Info.\"
61 --
62 dtds :: [String]
63 dtds =
64 [ "CBASK_3PPctXML.dtd",
65 "Cbask_All_Tourn_Teams_XML.dtd",
66 "CBASK_AssistsXML.dtd",
67 "Cbask_Awards_XML.dtd",
68 "CBASK_BlocksXML.dtd",
69 "Cbask_Conf_Standings_XML.dtd",
70 "Cbask_DivII_III_Indv_Stats_XML.dtd",
71 "Cbask_DivII_Team_Stats_XML.dtd",
72 "Cbask_DivIII_Team_Stats_XML.dtd",
73 "CBASK_FGPctXML.dtd",
74 "CBASK_FoulsXML.dtd",
75 "CBASK_FTPctXML.dtd",
76 "Cbask_Indv_Scoring_XML.dtd",
77 "CBASK_MinutesXML.dtd",
78 "Cbask_Polls_XML.dtd",
79 "CBASK_ReboundsXML.dtd",
80 "CBASK_ScoringLeadersXML.dtd",
81 "Cbask_Team_ThreePT_Made_XML.dtd",
82 "Cbask_Team_ThreePT_PCT_XML.dtd",
83 "Cbask_Team_Win_Pct_XML.dtd",
84 "Cbask_Top_Twenty_Five_XML.dtd",
85 "CBASK_TopTwentyFiveResult_XML.dtd",
86 "Cbask_Tourn_Awards_XML.dtd",
87 "Cbask_Tourn_Champs_XML.dtd",
88 "Cbask_Tourn_Indiv_XML.dtd",
89 "Cbask_Tourn_Leaders_XML.dtd",
90 "Cbask_Tourn_MVP_XML.dtd",
91 "Cbask_Tourn_Records_XML.dtd",
92 "LeagueScheduleXML.dtd",
93 "minorscoresxml.dtd",
94 "Minor_Baseball_League_Leaders_XML.dtd",
95 "Minor_Baseball_Standings_XML.dtd",
96 "Minor_Baseball_Transactions_XML.dtd",
97 "mlbbattingavgxml.dtd",
98 "mlbdoublesleadersxml.dtd",
99 "MLBGamesPlayedXML.dtd",
100 "MLBGIDPXML.dtd",
101 "MLBHitByPitchXML.dtd",
102 "mlbhitsleadersxml.dtd",
103 "mlbhomerunsxml.dtd",
104 "MLBHRFreqXML.dtd",
105 "MLBIntWalksXML.dtd",
106 "MLBKORateXML.dtd",
107 "mlbonbasepctxml.dtd",
108 "MLBOPSXML.dtd",
109 "MLBPlateAppsXML.dtd",
110 "mlbrbisxml.dtd",
111 "mlbrunsleadersxml.dtd",
112 "MLBSacFliesXML.dtd",
113 "MLBSacrificesXML.dtd",
114 "MLBSBSuccessXML.dtd",
115 "mlbsluggingpctxml.dtd",
116 "mlbstandxml.dtd",
117 "mlbstandxml_preseason.dtd",
118 "mlbstolenbasexml.dtd",
119 "mlbtotalbasesleadersxml.dtd",
120 "mlbtriplesleadersxml.dtd",
121 "MLBWalkRateXML.dtd",
122 "mlbwalksleadersxml.dtd",
123 "MLBXtraBaseHitsXML.dtd",
124 "MLB_ERA_Leaders.dtd",
125 "MLB_Fielding_XML.dtd",
126 "MLB_Pitching_Appearances_Leaders.dtd",
127 "MLB_Pitching_Balks_Leaders.dtd",
128 "MLB_Pitching_CG_Leaders.dtd",
129 "MLB_Pitching_ER_Allowed_Leaders.dtd",
130 "MLB_Pitching_Hits_Allowed_Leaders.dtd",
131 "MLB_Pitching_Hit_Batters_Leaders.dtd",
132 "MLB_Pitching_HR_Allowed_Leaders.dtd",
133 "MLB_Pitching_IP_Leaders.dtd",
134 "MLB_Pitching_Runs_Allowed_Leaders.dtd",
135 "MLB_Pitching_Saves_Leaders.dtd",
136 "MLB_Pitching_Shut_Outs_Leaders.dtd",
137 "MLB_Pitching_Starts_Leaders.dtd",
138 "MLB_Pitching_Strike_Outs_Leaders.dtd",
139 "MLB_Pitching_Walks_Leaders.dtd",
140 "MLB_Pitching_WHIP_Leaders.dtd",
141 "MLB_Pitching_Wild_Pitches_Leaders.dtd",
142 "MLB_Pitching_Win_Percentage_Leaders.dtd",
143 "MLB_Pitching_WL_Leaders.dtd",
144 "NBA_Team_Stats_XML.dtd",
145 "NBA3PPctXML.dtd",
146 "NBAAssistsXML.dtd",
147 "NBABlocksXML.dtd",
148 "nbaconfrecxml.dtd",
149 "nbadaysxml.dtd",
150 "nbadivisionsxml.dtd",
151 "NBAFGPctXML.dtd",
152 "NBAFoulsXML.dtd",
153 "NBAFTPctXML.dtd",
154 "NBAMinutesXML.dtd",
155 "NBAReboundsXML.dtd",
156 "NBAScorersXML.dtd",
157 "nbastandxml.dtd",
158 "NBAStealsXML.dtd",
159 "nbateamleadersxml.dtd",
160 "nbatripledoublexml.dtd",
161 "NBATurnoversXML.dtd",
162 "NCAA_Conference_Schedule_XML.dtd",
163 "nflfirstdownxml.dtd",
164 "NFLFumbleLeaderXML.dtd",
165 "NFLGrassTurfDomeOutsideXML.dtd",
166 "NFLGiveTakeXML.dtd",
167 "NFLInside20XML.dtd",
168 "NFLInterceptionLeadersXML.dtd",
169 "NFLKickoffsXML.dtd",
170 "NFLMondayNightXML.dtd",
171 "NFLPassLeadXML.dtd",
172 "NFLQBStartsXML.dtd",
173 "NFLSackLeadersXML.dtd",
174 "nflstandxml.dtd",
175 "NFLTeamRankingsXML.dtd",
176 "NFLTopPerformanceXML.dtd",
177 "NFLTotalYardageXML.dtd",
178 "NFL_KickingLeaders_XML.dtd",
179 "NFL_NBA_Draft_XML.dtd",
180 "NFL_Roster_XML.dtd",
181 "NFL_Team_Stats_XML.dtd",
182 "Transactions_XML.dtd",
183 "Weekly_Sched_XML.dtd",
184 "WNBA_Team_Leaders_XML.dtd",
185 "WNBA3PPctXML.dtd",
186 "WNBAAssistsXML.dtd",
187 "WNBABlocksXML.dtd",
188 "WNBAFGPctXML.dtd",
189 "WNBAFoulsXML.dtd",
190 "WNBAFTPctXML.dtd",
191 "WNBAMinutesXML.dtd",
192 "WNBAReboundsXML.dtd",
193 "WNBAScorersXML.dtd",
194 "wnbastandxml.dtd",
195 "WNBAStealsXML.dtd",
196 "WNBATurnoversXML.dtd" ]
197
198
199 -- | This serves as both the database and XML representation of a
200 -- SportInfo \<message\>.
201 --
202 data SportInfo =
203 SportInfo {
204 dtd :: String,
205 xml_file_id :: Int,
206 time_stamp :: UTCTime,
207 xml :: String }
208 deriving (Eq, Show)
209
210
211 -- | Attempt to parse a 'SportInfo' from an 'XmlTree'. If we cannot,
212 -- we fail with an error message.
213 --
214 parse_xml :: String -> XmlTree -> Either ParseError SportInfo
215 parse_xml dtdname xmltree = do
216 xmlfid <- parse_xmlfid xmltree
217 timestamp <- parse_xml_time_stamp xmltree
218 message <- parse_message xmltree
219 return $ SportInfo dtdname xmlfid timestamp (xshow [message])
220
221
222 --
223 -- Database code
224 --
225
226 instance DbImport SportInfo where
227 dbmigrate _ =
228 run_dbmigrate $ migrate (undefined :: SportInfo)
229
230 -- | We import a 'SportInfo' by inserting the whole thing at
231 -- once. Nothing fancy going on here.
232 dbimport msg = do
233 insert_ msg
234 return ImportSucceeded
235
236
237 -- | The database schema for SportInfo is trivial; all we need is for
238 -- the XML_File_ID to be unique.
239 --
240 mkPersist defaultCodegenConfig [groundhog|
241 - entity: SportInfo
242 dbName: sport_info
243 constructors:
244 - name: SportInfo
245 uniques:
246 - name: unique_sport_info
247 type: constraint
248 # Prevent multiple imports of the same message.
249 fields: [xml_file_id]
250 |]
251
252
253 --
254 -- Tasty Tests
255 --
256
257 -- | A list of all tests for this module.
258 --
259 sport_info_tests :: TestTree
260 sport_info_tests =
261 testGroup
262 "SportInfo tests"
263 [ test_accessors,
264 test_parse_xml_succeeds,
265 test_dbimport_succeeds ]
266
267
268 -- | Make sure the accessors work and that we can parse one file. Ok,
269 -- so the real point of this is to make the unused fields (dtd, xml,
270 -- ...) warning go away without having to mangle the groundhog code.
271 --
272 test_accessors :: TestTree
273 test_accessors = testCase "we can access a parsed sport_info" $ do
274 xmltree <- unsafe_read_document "test/xml/sportinfo/wnbastandxml.xml"
275 let Right t = parse_xml "wnbastandxml.dtd" xmltree
276 let a1 = dtd t
277 let ex1 = "wnbastandxml.dtd"
278 let a2 = xml_file_id t
279 let ex2 = 2011
280 let a3 = show $ time_stamp t
281 let ex3 = "2009-09-27 19:50:00 UTC"
282 let a4 = take 9 (xml t)
283 let ex4 = "<message>"
284 let actual = (a1,a2,a3,a4)
285 let expected = (ex1,ex2,ex3,ex4)
286 actual @?= expected
287
288
289 -- | Sample XML documents for SportInfo types.
290 --
291 sport_info_test_files :: [FilePath]
292 sport_info_test_files =
293 map (change_suffix . add_path) dtds
294 where
295 add_path = ("test/xml/sportinfo/" ++ )
296 change_suffix = replace ".dtd" ".xml"
297
298
299 -- | Make sure we can parse every element of 'sport_info_test_files'.
300 --
301 test_parse_xml_succeeds :: TestTree
302 test_parse_xml_succeeds =
303 testGroup "parse_xml" $ map check sport_info_test_files
304 where
305 check t = testCase t $ do
306 x <- unsafe_read_document t
307 let result = parse_xml "dummy" x
308 let actual = case result of -- isRight appears in base-4.7
309 Left _ -> False
310 Right _ -> True
311 let expected = True
312 actual @?= expected
313
314
315 -- | Ensure that each element of 'sport_info_test_files' can be imported
316 -- by counting the total number of database records (after
317 -- importing) and comparing it against the length of
318 -- 'sport_info_test_files'.
319 --
320 test_dbimport_succeeds :: TestTree
321 test_dbimport_succeeds = testCase "dbimport succeeds" $ do
322 xmltrees <- mapM unsafe_read_document sport_info_test_files
323 let msgs = rights $ map (parse_xml "dummy") xmltrees
324 actual <- withSqliteConn ":memory:" $ runDbConn $ do
325 runMigration silentMigrationLogger $
326 migrate (undefined :: SportInfo)
327 mapM_ dbimport msgs
328 countAll (undefined :: SportInfo)
329
330 actual @?= expected
331 where
332 expected = length sport_info_test_files