]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/TSN/XML/SportInfo.hs
Add SportInfo support for NFLTopPuntReturnXML.dtd.
[dead/htsn-import.git] / src / TSN / XML / SportInfo.hs
1 {-# LANGUAGE FlexibleInstances #-}
2 {-# LANGUAGE GADTs #-}
3 {-# LANGUAGE QuasiQuotes #-}
4 {-# LANGUAGE RecordWildCards #-}
5 {-# LANGUAGE TemplateHaskell #-}
6 {-# LANGUAGE TypeFamilies #-}
7
8 -- | SportInfo represents a collection of DTDs that we don't really
9 -- handle but want to make available. The raw XML gets stored in the
10 -- database along with the XML_File_ID, but we don't parse any of it.
11 --
12 -- This is almost completely redundant with "TSN.XML.GameInfo", but
13 -- the redundancy is necessary: we need separate message types so
14 -- that we can have separate 'DbImport' instances. It would take
15 -- more code/work to abstract (if it's even possible) than to
16 -- duplicate.
17 --
18 module TSN.XML.SportInfo (
19 dtds,
20 parse_xml,
21 sport_info_tests,
22 -- * WARNING: these are private but exported to silence warnings
23 SportInfoConstructor(..) )
24 where
25
26 -- System imports.
27 import Data.Either ( rights )
28 import Data.String.Utils ( replace )
29 import Data.Time.Clock ( UTCTime )
30 import Database.Groundhog (
31 countAll,
32 insert_,
33 migrate,
34 runMigration,
35 silentMigrationLogger )
36 import Database.Groundhog.Generic ( runDbConn )
37 import Database.Groundhog.Sqlite ( withSqliteConn )
38 import Database.Groundhog.TH (
39 defaultCodegenConfig,
40 groundhog,
41 mkPersist )
42 import Test.Tasty ( TestTree, testGroup )
43 import Test.Tasty.HUnit ( (@?=), testCase )
44 import Text.XML.HXT.Core ( XmlTree )
45 import Text.XML.HXT.DOM.ShowXml ( xshow )
46
47 -- Local imports.
48 import TSN.DbImport (
49 DbImport(..),
50 ImportResult(..),
51 run_dbmigrate )
52 import TSN.Parse (
53 ParseError,
54 parse_message,
55 parse_xmlfid,
56 parse_xml_time_stamp )
57 import Xml ( unsafe_read_document )
58
59
60 -- | The DTDs for everything that we consider \"Sport Info.\"
61 --
62 dtds :: [String]
63 dtds =
64 [ "CBASK_3PPctXML.dtd",
65 "Cbask_All_Tourn_Teams_XML.dtd",
66 "CBASK_AssistsXML.dtd",
67 "Cbask_Awards_XML.dtd",
68 "CBASK_BlocksXML.dtd",
69 "Cbask_Conf_Standings_XML.dtd",
70 "Cbask_DivII_III_Indv_Stats_XML.dtd",
71 "Cbask_DivII_Team_Stats_XML.dtd",
72 "Cbask_DivIII_Team_Stats_XML.dtd",
73 "CBASK_FGPctXML.dtd",
74 "CBASK_FoulsXML.dtd",
75 "CBASK_FTPctXML.dtd",
76 "Cbask_Indv_Scoring_XML.dtd",
77 "CBASK_MinutesXML.dtd",
78 "Cbask_Polls_XML.dtd",
79 "CBASK_ReboundsXML.dtd",
80 "CBASK_ScoringLeadersXML.dtd",
81 "Cbask_Team_ThreePT_Made_XML.dtd",
82 "Cbask_Team_ThreePT_PCT_XML.dtd",
83 "Cbask_Team_Win_Pct_XML.dtd",
84 "Cbask_Top_Twenty_Five_XML.dtd",
85 "CBASK_TopTwentyFiveResult_XML.dtd",
86 "Cbask_Tourn_Awards_XML.dtd",
87 "Cbask_Tourn_Champs_XML.dtd",
88 "Cbask_Tourn_Indiv_XML.dtd",
89 "Cbask_Tourn_Leaders_XML.dtd",
90 "Cbask_Tourn_MVP_XML.dtd",
91 "Cbask_Tourn_Records_XML.dtd",
92 "LeagueScheduleXML.dtd",
93 "minorscoresxml.dtd",
94 "Minor_Baseball_League_Leaders_XML.dtd",
95 "Minor_Baseball_Standings_XML.dtd",
96 "Minor_Baseball_Transactions_XML.dtd",
97 "mlbbattingavgxml.dtd",
98 "mlbdoublesleadersxml.dtd",
99 "MLBGamesPlayedXML.dtd",
100 "MLBGIDPXML.dtd",
101 "MLBHitByPitchXML.dtd",
102 "mlbhitsleadersxml.dtd",
103 "mlbhomerunsxml.dtd",
104 "MLBHRFreqXML.dtd",
105 "MLBIntWalksXML.dtd",
106 "MLBKORateXML.dtd",
107 "mlbonbasepctxml.dtd",
108 "MLBOPSXML.dtd",
109 "MLBPlateAppsXML.dtd",
110 "mlbrbisxml.dtd",
111 "mlbrunsleadersxml.dtd",
112 "MLBSacFliesXML.dtd",
113 "MLBSacrificesXML.dtd",
114 "MLBSBSuccessXML.dtd",
115 "mlbsluggingpctxml.dtd",
116 "mlbstandxml.dtd",
117 "mlbstandxml_preseason.dtd",
118 "mlbstolenbasexml.dtd",
119 "mlbtotalbasesleadersxml.dtd",
120 "mlbtriplesleadersxml.dtd",
121 "MLBWalkRateXML.dtd",
122 "mlbwalksleadersxml.dtd",
123 "MLBXtraBaseHitsXML.dtd",
124 "MLB_ERA_Leaders.dtd",
125 "MLB_Fielding_XML.dtd",
126 "MLB_Pitching_Appearances_Leaders.dtd",
127 "MLB_Pitching_Balks_Leaders.dtd",
128 "MLB_Pitching_CG_Leaders.dtd",
129 "MLB_Pitching_ER_Allowed_Leaders.dtd",
130 "MLB_Pitching_Hits_Allowed_Leaders.dtd",
131 "MLB_Pitching_Hit_Batters_Leaders.dtd",
132 "MLB_Pitching_HR_Allowed_Leaders.dtd",
133 "MLB_Pitching_IP_Leaders.dtd",
134 "MLB_Pitching_Runs_Allowed_Leaders.dtd",
135 "MLB_Pitching_Saves_Leaders.dtd",
136 "MLB_Pitching_Shut_Outs_Leaders.dtd",
137 "MLB_Pitching_Starts_Leaders.dtd",
138 "MLB_Pitching_Strike_Outs_Leaders.dtd",
139 "MLB_Pitching_Walks_Leaders.dtd",
140 "MLB_Pitching_WHIP_Leaders.dtd",
141 "MLB_Pitching_Wild_Pitches_Leaders.dtd",
142 "MLB_Pitching_Win_Percentage_Leaders.dtd",
143 "MLB_Pitching_WL_Leaders.dtd",
144 "NBA_Team_Stats_XML.dtd",
145 "NBA3PPctXML.dtd",
146 "NBAAssistsXML.dtd",
147 "NBABlocksXML.dtd",
148 "nbaconfrecxml.dtd",
149 "nbadaysxml.dtd",
150 "nbadivisionsxml.dtd",
151 "NBAFGPctXML.dtd",
152 "NBAFoulsXML.dtd",
153 "NBAFTPctXML.dtd",
154 "NBAMinutesXML.dtd",
155 "NBAReboundsXML.dtd",
156 "NBAScorersXML.dtd",
157 "nbastandxml.dtd",
158 "NBAStealsXML.dtd",
159 "nbateamleadersxml.dtd",
160 "nbatripledoublexml.dtd",
161 "NBATurnoversXML.dtd",
162 "NCAA_Conference_Schedule_XML.dtd",
163 "nflfirstdownxml.dtd",
164 "NFLFumbleLeaderXML.dtd",
165 "NFLGrassTurfDomeOutsideXML.dtd",
166 "NFLGiveTakeXML.dtd",
167 "NFLInside20XML.dtd",
168 "NFLInterceptionLeadersXML.dtd",
169 "NFLKickoffsXML.dtd",
170 "NFLMondayNightXML.dtd",
171 "NFLPassingLeadersXML.dtd",
172 "NFLPassLeadXML.dtd",
173 "NFLQBStartsXML.dtd",
174 "NFLReceivingLeadersXML.dtd",
175 "NFLRushingLeadersXML.dtd",
176 "NFLSackLeadersXML.dtd",
177 "nflstandxml.dtd",
178 "NFLTeamRankingsXML.dtd",
179 "NFLTopKickoffReturnXML.dtd",
180 "NFLTopPerformanceXML.dtd",
181 "NFLTopPuntReturnXML.dtd",
182 "NFLTotalYardageXML.dtd",
183 "NFL_KickingLeaders_XML.dtd",
184 "NFL_NBA_Draft_XML.dtd",
185 "NFL_Roster_XML.dtd",
186 "NFLTackleFFLeadersXML.dtd",
187 "NFL_Team_Stats_XML.dtd",
188 "Transactions_XML.dtd",
189 "Weekly_Sched_XML.dtd",
190 "WNBA_Team_Leaders_XML.dtd",
191 "WNBA3PPctXML.dtd",
192 "WNBAAssistsXML.dtd",
193 "WNBABlocksXML.dtd",
194 "WNBAFGPctXML.dtd",
195 "WNBAFoulsXML.dtd",
196 "WNBAFTPctXML.dtd",
197 "WNBAMinutesXML.dtd",
198 "WNBAReboundsXML.dtd",
199 "WNBAScorersXML.dtd",
200 "wnbastandxml.dtd",
201 "WNBAStealsXML.dtd",
202 "WNBATurnoversXML.dtd" ]
203
204
205 -- | This serves as both the database and XML representation of a
206 -- SportInfo \<message\>.
207 --
208 data SportInfo =
209 SportInfo {
210 dtd :: String,
211 xml_file_id :: Int,
212 time_stamp :: UTCTime,
213 xml :: String }
214 deriving (Eq, Show)
215
216
217 -- | Attempt to parse a 'SportInfo' from an 'XmlTree'. If we cannot,
218 -- we fail with an error message.
219 --
220 parse_xml :: String -> XmlTree -> Either ParseError SportInfo
221 parse_xml dtdname xmltree = do
222 xmlfid <- parse_xmlfid xmltree
223 timestamp <- parse_xml_time_stamp xmltree
224 message <- parse_message xmltree
225 return $ SportInfo dtdname xmlfid timestamp (xshow [message])
226
227
228 --
229 -- Database code
230 --
231
232 instance DbImport SportInfo where
233 dbmigrate _ =
234 run_dbmigrate $ migrate (undefined :: SportInfo)
235
236 -- | We import a 'SportInfo' by inserting the whole thing at
237 -- once. Nothing fancy going on here.
238 dbimport msg = do
239 insert_ msg
240 return ImportSucceeded
241
242
243 -- | The database schema for SportInfo is trivial; all we need is for
244 -- the XML_File_ID to be unique.
245 --
246 mkPersist defaultCodegenConfig [groundhog|
247 - entity: SportInfo
248 dbName: sport_info
249 constructors:
250 - name: SportInfo
251 uniques:
252 - name: unique_sport_info
253 type: constraint
254 # Prevent multiple imports of the same message.
255 fields: [xml_file_id]
256 |]
257
258
259 --
260 -- Tasty Tests
261 --
262
263 -- | A list of all tests for this module.
264 --
265 sport_info_tests :: TestTree
266 sport_info_tests =
267 testGroup
268 "SportInfo tests"
269 [ test_accessors,
270 test_parse_xml_succeeds,
271 test_dbimport_succeeds ]
272
273
274 -- | Make sure the accessors work and that we can parse one file. Ok,
275 -- so the real point of this is to make the unused fields (dtd, xml,
276 -- ...) warning go away without having to mangle the groundhog code.
277 --
278 test_accessors :: TestTree
279 test_accessors = testCase "we can access a parsed sport_info" $ do
280 xmltree <- unsafe_read_document "test/xml/sportinfo/wnbastandxml.xml"
281 let Right t = parse_xml "wnbastandxml.dtd" xmltree
282 let a1 = dtd t
283 let ex1 = "wnbastandxml.dtd"
284 let a2 = xml_file_id t
285 let ex2 = 2011
286 let a3 = show $ time_stamp t
287 let ex3 = "2009-09-27 19:50:00 UTC"
288 let a4 = take 9 (xml t)
289 let ex4 = "<message>"
290 let actual = (a1,a2,a3,a4)
291 let expected = (ex1,ex2,ex3,ex4)
292 actual @?= expected
293
294
295 -- | Sample XML documents for SportInfo types.
296 --
297 sport_info_test_files :: [FilePath]
298 sport_info_test_files =
299 map (change_suffix . add_path) dtds
300 where
301 add_path = ("test/xml/sportinfo/" ++ )
302 change_suffix = replace ".dtd" ".xml"
303
304
305 -- | Make sure we can parse every element of 'sport_info_test_files'.
306 --
307 test_parse_xml_succeeds :: TestTree
308 test_parse_xml_succeeds =
309 testGroup "parse_xml" $ map check sport_info_test_files
310 where
311 check t = testCase t $ do
312 x <- unsafe_read_document t
313 let result = parse_xml "dummy" x
314 let actual = case result of -- isRight appears in base-4.7
315 Left _ -> False
316 Right _ -> True
317 let expected = True
318 actual @?= expected
319
320
321 -- | Ensure that each element of 'sport_info_test_files' can be imported
322 -- by counting the total number of database records (after
323 -- importing) and comparing it against the length of
324 -- 'sport_info_test_files'.
325 --
326 test_dbimport_succeeds :: TestTree
327 test_dbimport_succeeds = testCase "dbimport succeeds" $ do
328 xmltrees <- mapM unsafe_read_document sport_info_test_files
329 let msgs = rights $ map (parse_xml "dummy") xmltrees
330 actual <- withSqliteConn ":memory:" $ runDbConn $ do
331 runMigration silentMigrationLogger $
332 migrate (undefined :: SportInfo)
333 mapM_ dbimport msgs
334 countAll (undefined :: SportInfo)
335
336 actual @?= expected
337 where
338 expected = length sport_info_test_files