]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/TSN/XML/SportInfo.hs
Add SportInfo support for NFLYardsXML.dtd.
[dead/htsn-import.git] / src / TSN / XML / SportInfo.hs
1 {-# LANGUAGE FlexibleInstances #-}
2 {-# LANGUAGE GADTs #-}
3 {-# LANGUAGE QuasiQuotes #-}
4 {-# LANGUAGE RecordWildCards #-}
5 {-# LANGUAGE TemplateHaskell #-}
6 {-# LANGUAGE TypeFamilies #-}
7
8 -- | SportInfo represents a collection of DTDs that we don't really
9 -- handle but want to make available. The raw XML gets stored in the
10 -- database along with the XML_File_ID, but we don't parse any of it.
11 --
12 -- This is almost completely redundant with "TSN.XML.GameInfo", but
13 -- the redundancy is necessary: we need separate message types so
14 -- that we can have separate 'DbImport' instances. It would take
15 -- more code/work to abstract (if it's even possible) than to
16 -- duplicate.
17 --
18 module TSN.XML.SportInfo (
19 dtds,
20 parse_xml,
21 sport_info_tests,
22 -- * WARNING: these are private but exported to silence warnings
23 SportInfoConstructor(..) )
24 where
25
26 -- System imports.
27 import Data.Either ( rights )
28 import Data.String.Utils ( replace )
29 import Data.Time.Clock ( UTCTime )
30 import Database.Groundhog (
31 countAll,
32 insert_,
33 migrate,
34 runMigration,
35 silentMigrationLogger )
36 import Database.Groundhog.Generic ( runDbConn )
37 import Database.Groundhog.Sqlite ( withSqliteConn )
38 import Database.Groundhog.TH (
39 defaultCodegenConfig,
40 groundhog,
41 mkPersist )
42 import Test.Tasty ( TestTree, testGroup )
43 import Test.Tasty.HUnit ( (@?=), testCase )
44 import Text.XML.HXT.Core ( XmlTree )
45 import Text.XML.HXT.DOM.ShowXml ( xshow )
46
47 -- Local imports.
48 import TSN.DbImport (
49 DbImport(..),
50 ImportResult(..),
51 run_dbmigrate )
52 import TSN.Parse (
53 ParseError,
54 parse_message,
55 parse_xmlfid,
56 parse_xml_time_stamp )
57 import Xml ( unsafe_read_document )
58
59
60 -- | The DTDs for everything that we consider \"Sport Info.\"
61 --
62 dtds :: [String]
63 dtds =
64 [ "CBASK_3PPctXML.dtd",
65 "Cbask_All_Tourn_Teams_XML.dtd",
66 "CBASK_AssistsXML.dtd",
67 "Cbask_Awards_XML.dtd",
68 "CBASK_BlocksXML.dtd",
69 "Cbask_Conf_Standings_XML.dtd",
70 "Cbask_DivII_III_Indv_Stats_XML.dtd",
71 "Cbask_DivII_Team_Stats_XML.dtd",
72 "Cbask_DivIII_Team_Stats_XML.dtd",
73 "CBASK_FGPctXML.dtd",
74 "CBASK_FoulsXML.dtd",
75 "CBASK_FTPctXML.dtd",
76 "Cbask_Indv_Scoring_XML.dtd",
77 "CBASK_MinutesXML.dtd",
78 "Cbask_Polls_XML.dtd",
79 "CBASK_ReboundsXML.dtd",
80 "CBASK_ScoringLeadersXML.dtd",
81 "Cbask_Team_ThreePT_Made_XML.dtd",
82 "Cbask_Team_ThreePT_PCT_XML.dtd",
83 "Cbask_Team_Win_Pct_XML.dtd",
84 "Cbask_Top_Twenty_Five_XML.dtd",
85 "CBASK_TopTwentyFiveResult_XML.dtd",
86 "Cbask_Tourn_Awards_XML.dtd",
87 "Cbask_Tourn_Champs_XML.dtd",
88 "Cbask_Tourn_Indiv_XML.dtd",
89 "Cbask_Tourn_Leaders_XML.dtd",
90 "Cbask_Tourn_MVP_XML.dtd",
91 "Cbask_Tourn_Records_XML.dtd",
92 "LeagueScheduleXML.dtd",
93 "minorscoresxml.dtd",
94 "Minor_Baseball_League_Leaders_XML.dtd",
95 "Minor_Baseball_Standings_XML.dtd",
96 "Minor_Baseball_Transactions_XML.dtd",
97 "mlbbattingavgxml.dtd",
98 "mlbdoublesleadersxml.dtd",
99 "MLBGamesPlayedXML.dtd",
100 "MLBGIDPXML.dtd",
101 "MLBHitByPitchXML.dtd",
102 "mlbhitsleadersxml.dtd",
103 "mlbhomerunsxml.dtd",
104 "MLBHRFreqXML.dtd",
105 "MLBIntWalksXML.dtd",
106 "MLBKORateXML.dtd",
107 "mlbonbasepctxml.dtd",
108 "MLBOPSXML.dtd",
109 "MLBPlateAppsXML.dtd",
110 "mlbrbisxml.dtd",
111 "mlbrunsleadersxml.dtd",
112 "MLBSacFliesXML.dtd",
113 "MLBSacrificesXML.dtd",
114 "MLBSBSuccessXML.dtd",
115 "mlbsluggingpctxml.dtd",
116 "mlbstandxml.dtd",
117 "mlbstandxml_preseason.dtd",
118 "mlbstolenbasexml.dtd",
119 "mlbtotalbasesleadersxml.dtd",
120 "mlbtriplesleadersxml.dtd",
121 "MLBWalkRateXML.dtd",
122 "mlbwalksleadersxml.dtd",
123 "MLBXtraBaseHitsXML.dtd",
124 "MLB_ERA_Leaders.dtd",
125 "MLB_Fielding_XML.dtd",
126 "MLB_Pitching_Appearances_Leaders.dtd",
127 "MLB_Pitching_Balks_Leaders.dtd",
128 "MLB_Pitching_CG_Leaders.dtd",
129 "MLB_Pitching_ER_Allowed_Leaders.dtd",
130 "MLB_Pitching_Hits_Allowed_Leaders.dtd",
131 "MLB_Pitching_Hit_Batters_Leaders.dtd",
132 "MLB_Pitching_HR_Allowed_Leaders.dtd",
133 "MLB_Pitching_IP_Leaders.dtd",
134 "MLB_Pitching_Runs_Allowed_Leaders.dtd",
135 "MLB_Pitching_Saves_Leaders.dtd",
136 "MLB_Pitching_Shut_Outs_Leaders.dtd",
137 "MLB_Pitching_Starts_Leaders.dtd",
138 "MLB_Pitching_Strike_Outs_Leaders.dtd",
139 "MLB_Pitching_Walks_Leaders.dtd",
140 "MLB_Pitching_WHIP_Leaders.dtd",
141 "MLB_Pitching_Wild_Pitches_Leaders.dtd",
142 "MLB_Pitching_Win_Percentage_Leaders.dtd",
143 "MLB_Pitching_WL_Leaders.dtd",
144 "NBA_Team_Stats_XML.dtd",
145 "NBA3PPctXML.dtd",
146 "NBAAssistsXML.dtd",
147 "NBABlocksXML.dtd",
148 "nbaconfrecxml.dtd",
149 "nbadaysxml.dtd",
150 "nbadivisionsxml.dtd",
151 "NBAFGPctXML.dtd",
152 "NBAFoulsXML.dtd",
153 "NBAFTPctXML.dtd",
154 "NBAMinutesXML.dtd",
155 "NBAReboundsXML.dtd",
156 "NBAScorersXML.dtd",
157 "nbastandxml.dtd",
158 "NBAStealsXML.dtd",
159 "nbateamleadersxml.dtd",
160 "nbatripledoublexml.dtd",
161 "NBATurnoversXML.dtd",
162 "NCAA_Conference_Schedule_XML.dtd",
163 "nflfirstdownxml.dtd",
164 "NFLFumbleLeaderXML.dtd",
165 "NFLGrassTurfDomeOutsideXML.dtd",
166 "NFLGiveTakeXML.dtd",
167 "NFLInside20XML.dtd",
168 "NFLInterceptionLeadersXML.dtd",
169 "NFLKickoffsXML.dtd",
170 "NFLMondayNightXML.dtd",
171 "NFLPassingLeadersXML.dtd",
172 "NFLPassLeadXML.dtd",
173 "NFLQBStartsXML.dtd",
174 "NFLReceivingLeadersXML.dtd",
175 "NFLRushingLeadersXML.dtd",
176 "NFLSackLeadersXML.dtd",
177 "nflstandxml.dtd",
178 "NFLTackleFFLeadersXML.dtd",
179 "NFLTeamRankingsXML.dtd",
180 "NFLTopKickoffReturnXML.dtd",
181 "NFLTopPerformanceXML.dtd",
182 "NFLTopPuntReturnXML.dtd",
183 "NFLTotalYardageXML.dtd",
184 "NFLYardsXML.dtd",
185 "NFL_KickingLeaders_XML.dtd",
186 "NFL_NBA_Draft_XML.dtd",
187 "NFL_Roster_XML.dtd",
188 "NFL_Team_Stats_XML.dtd",
189 "Transactions_XML.dtd",
190 "Weekly_Sched_XML.dtd",
191 "WNBA_Team_Leaders_XML.dtd",
192 "WNBA3PPctXML.dtd",
193 "WNBAAssistsXML.dtd",
194 "WNBABlocksXML.dtd",
195 "WNBAFGPctXML.dtd",
196 "WNBAFoulsXML.dtd",
197 "WNBAFTPctXML.dtd",
198 "WNBAMinutesXML.dtd",
199 "WNBAReboundsXML.dtd",
200 "WNBAScorersXML.dtd",
201 "wnbastandxml.dtd",
202 "WNBAStealsXML.dtd",
203 "WNBATurnoversXML.dtd" ]
204
205
206 -- | This serves as both the database and XML representation of a
207 -- SportInfo \<message\>.
208 --
209 data SportInfo =
210 SportInfo {
211 dtd :: String,
212 xml_file_id :: Int,
213 time_stamp :: UTCTime,
214 xml :: String }
215 deriving (Eq, Show)
216
217
218 -- | Attempt to parse a 'SportInfo' from an 'XmlTree'. If we cannot,
219 -- we fail with an error message.
220 --
221 parse_xml :: String -> XmlTree -> Either ParseError SportInfo
222 parse_xml dtdname xmltree = do
223 xmlfid <- parse_xmlfid xmltree
224 timestamp <- parse_xml_time_stamp xmltree
225 message <- parse_message xmltree
226 return $ SportInfo dtdname xmlfid timestamp (xshow [message])
227
228
229 --
230 -- Database code
231 --
232
233 instance DbImport SportInfo where
234 dbmigrate _ =
235 run_dbmigrate $ migrate (undefined :: SportInfo)
236
237 -- | We import a 'SportInfo' by inserting the whole thing at
238 -- once. Nothing fancy going on here.
239 dbimport msg = do
240 insert_ msg
241 return ImportSucceeded
242
243
244 -- | The database schema for SportInfo is trivial; all we need is for
245 -- the XML_File_ID to be unique.
246 --
247 mkPersist defaultCodegenConfig [groundhog|
248 - entity: SportInfo
249 dbName: sport_info
250 constructors:
251 - name: SportInfo
252 uniques:
253 - name: unique_sport_info
254 type: constraint
255 # Prevent multiple imports of the same message.
256 fields: [xml_file_id]
257 |]
258
259
260 --
261 -- Tasty Tests
262 --
263
264 -- | A list of all tests for this module.
265 --
266 sport_info_tests :: TestTree
267 sport_info_tests =
268 testGroup
269 "SportInfo tests"
270 [ test_accessors,
271 test_parse_xml_succeeds,
272 test_dbimport_succeeds ]
273
274
275 -- | Make sure the accessors work and that we can parse one file. Ok,
276 -- so the real point of this is to make the unused fields (dtd, xml,
277 -- ...) warning go away without having to mangle the groundhog code.
278 --
279 test_accessors :: TestTree
280 test_accessors = testCase "we can access a parsed sport_info" $ do
281 xmltree <- unsafe_read_document "test/xml/sportinfo/wnbastandxml.xml"
282 let Right t = parse_xml "wnbastandxml.dtd" xmltree
283 let a1 = dtd t
284 let ex1 = "wnbastandxml.dtd"
285 let a2 = xml_file_id t
286 let ex2 = 2011
287 let a3 = show $ time_stamp t
288 let ex3 = "2009-09-27 19:50:00 UTC"
289 let a4 = take 9 (xml t)
290 let ex4 = "<message>"
291 let actual = (a1,a2,a3,a4)
292 let expected = (ex1,ex2,ex3,ex4)
293 actual @?= expected
294
295
296 -- | Sample XML documents for SportInfo types.
297 --
298 sport_info_test_files :: [FilePath]
299 sport_info_test_files =
300 map (change_suffix . add_path) dtds
301 where
302 add_path = ("test/xml/sportinfo/" ++ )
303 change_suffix = replace ".dtd" ".xml"
304
305
306 -- | Make sure we can parse every element of 'sport_info_test_files'.
307 --
308 test_parse_xml_succeeds :: TestTree
309 test_parse_xml_succeeds =
310 testGroup "parse_xml" $ map check sport_info_test_files
311 where
312 check t = testCase t $ do
313 x <- unsafe_read_document t
314 let result = parse_xml "dummy" x
315 let actual = case result of -- isRight appears in base-4.7
316 Left _ -> False
317 Right _ -> True
318 let expected = True
319 actual @?= expected
320
321
322 -- | Ensure that each element of 'sport_info_test_files' can be imported
323 -- by counting the total number of database records (after
324 -- importing) and comparing it against the length of
325 -- 'sport_info_test_files'.
326 --
327 test_dbimport_succeeds :: TestTree
328 test_dbimport_succeeds = testCase "dbimport succeeds" $ do
329 xmltrees <- mapM unsafe_read_document sport_info_test_files
330 let msgs = rights $ map (parse_xml "dummy") xmltrees
331 actual <- withSqliteConn ":memory:" $ runDbConn $ do
332 runMigration silentMigrationLogger $
333 migrate (undefined :: SportInfo)
334 mapM_ dbimport msgs
335 countAll (undefined :: SportInfo)
336
337 actual @?= expected
338 where
339 expected = length sport_info_test_files