]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/TSN/XML/SportInfo.hs
Add SportInfo support for NFLRushingLeadersXML.dtd.
[dead/htsn-import.git] / src / TSN / XML / SportInfo.hs
1 {-# LANGUAGE FlexibleInstances #-}
2 {-# LANGUAGE GADTs #-}
3 {-# LANGUAGE QuasiQuotes #-}
4 {-# LANGUAGE RecordWildCards #-}
5 {-# LANGUAGE TemplateHaskell #-}
6 {-# LANGUAGE TypeFamilies #-}
7
8 -- | SportInfo represents a collection of DTDs that we don't really
9 -- handle but want to make available. The raw XML gets stored in the
10 -- database along with the XML_File_ID, but we don't parse any of it.
11 --
12 -- This is almost completely redundant with "TSN.XML.GameInfo", but
13 -- the redundancy is necessary: we need separate message types so
14 -- that we can have separate 'DbImport' instances. It would take
15 -- more code/work to abstract (if it's even possible) than to
16 -- duplicate.
17 --
18 module TSN.XML.SportInfo (
19 dtds,
20 parse_xml,
21 sport_info_tests,
22 -- * WARNING: these are private but exported to silence warnings
23 SportInfoConstructor(..) )
24 where
25
26 -- System imports.
27 import Data.Either ( rights )
28 import Data.String.Utils ( replace )
29 import Data.Time.Clock ( UTCTime )
30 import Database.Groundhog (
31 countAll,
32 insert_,
33 migrate,
34 runMigration,
35 silentMigrationLogger )
36 import Database.Groundhog.Generic ( runDbConn )
37 import Database.Groundhog.Sqlite ( withSqliteConn )
38 import Database.Groundhog.TH (
39 defaultCodegenConfig,
40 groundhog,
41 mkPersist )
42 import Test.Tasty ( TestTree, testGroup )
43 import Test.Tasty.HUnit ( (@?=), testCase )
44 import Text.XML.HXT.Core ( XmlTree )
45 import Text.XML.HXT.DOM.ShowXml ( xshow )
46
47 -- Local imports.
48 import TSN.DbImport (
49 DbImport(..),
50 ImportResult(..),
51 run_dbmigrate )
52 import TSN.Parse (
53 ParseError,
54 parse_message,
55 parse_xmlfid,
56 parse_xml_time_stamp )
57 import Xml ( unsafe_read_document )
58
59
60 -- | The DTDs for everything that we consider \"Sport Info.\"
61 --
62 dtds :: [String]
63 dtds =
64 [ "CBASK_3PPctXML.dtd",
65 "Cbask_All_Tourn_Teams_XML.dtd",
66 "CBASK_AssistsXML.dtd",
67 "Cbask_Awards_XML.dtd",
68 "CBASK_BlocksXML.dtd",
69 "Cbask_Conf_Standings_XML.dtd",
70 "Cbask_DivII_III_Indv_Stats_XML.dtd",
71 "Cbask_DivII_Team_Stats_XML.dtd",
72 "Cbask_DivIII_Team_Stats_XML.dtd",
73 "CBASK_FGPctXML.dtd",
74 "CBASK_FoulsXML.dtd",
75 "CBASK_FTPctXML.dtd",
76 "Cbask_Indv_Scoring_XML.dtd",
77 "CBASK_MinutesXML.dtd",
78 "Cbask_Polls_XML.dtd",
79 "CBASK_ReboundsXML.dtd",
80 "CBASK_ScoringLeadersXML.dtd",
81 "Cbask_Team_ThreePT_Made_XML.dtd",
82 "Cbask_Team_ThreePT_PCT_XML.dtd",
83 "Cbask_Team_Win_Pct_XML.dtd",
84 "Cbask_Top_Twenty_Five_XML.dtd",
85 "CBASK_TopTwentyFiveResult_XML.dtd",
86 "Cbask_Tourn_Awards_XML.dtd",
87 "Cbask_Tourn_Champs_XML.dtd",
88 "Cbask_Tourn_Indiv_XML.dtd",
89 "Cbask_Tourn_Leaders_XML.dtd",
90 "Cbask_Tourn_MVP_XML.dtd",
91 "Cbask_Tourn_Records_XML.dtd",
92 "LeagueScheduleXML.dtd",
93 "minorscoresxml.dtd",
94 "Minor_Baseball_League_Leaders_XML.dtd",
95 "Minor_Baseball_Standings_XML.dtd",
96 "Minor_Baseball_Transactions_XML.dtd",
97 "mlbbattingavgxml.dtd",
98 "mlbdoublesleadersxml.dtd",
99 "MLBGamesPlayedXML.dtd",
100 "MLBGIDPXML.dtd",
101 "MLBHitByPitchXML.dtd",
102 "mlbhitsleadersxml.dtd",
103 "mlbhomerunsxml.dtd",
104 "MLBHRFreqXML.dtd",
105 "MLBIntWalksXML.dtd",
106 "MLBKORateXML.dtd",
107 "mlbonbasepctxml.dtd",
108 "MLBOPSXML.dtd",
109 "MLBPlateAppsXML.dtd",
110 "mlbrbisxml.dtd",
111 "mlbrunsleadersxml.dtd",
112 "MLBSacFliesXML.dtd",
113 "MLBSacrificesXML.dtd",
114 "MLBSBSuccessXML.dtd",
115 "mlbsluggingpctxml.dtd",
116 "mlbstandxml.dtd",
117 "mlbstandxml_preseason.dtd",
118 "mlbstolenbasexml.dtd",
119 "mlbtotalbasesleadersxml.dtd",
120 "mlbtriplesleadersxml.dtd",
121 "MLBWalkRateXML.dtd",
122 "mlbwalksleadersxml.dtd",
123 "MLBXtraBaseHitsXML.dtd",
124 "MLB_ERA_Leaders.dtd",
125 "MLB_Fielding_XML.dtd",
126 "MLB_Pitching_Appearances_Leaders.dtd",
127 "MLB_Pitching_Balks_Leaders.dtd",
128 "MLB_Pitching_CG_Leaders.dtd",
129 "MLB_Pitching_ER_Allowed_Leaders.dtd",
130 "MLB_Pitching_Hits_Allowed_Leaders.dtd",
131 "MLB_Pitching_Hit_Batters_Leaders.dtd",
132 "MLB_Pitching_HR_Allowed_Leaders.dtd",
133 "MLB_Pitching_IP_Leaders.dtd",
134 "MLB_Pitching_Runs_Allowed_Leaders.dtd",
135 "MLB_Pitching_Saves_Leaders.dtd",
136 "MLB_Pitching_Shut_Outs_Leaders.dtd",
137 "MLB_Pitching_Starts_Leaders.dtd",
138 "MLB_Pitching_Strike_Outs_Leaders.dtd",
139 "MLB_Pitching_Walks_Leaders.dtd",
140 "MLB_Pitching_WHIP_Leaders.dtd",
141 "MLB_Pitching_Wild_Pitches_Leaders.dtd",
142 "MLB_Pitching_Win_Percentage_Leaders.dtd",
143 "MLB_Pitching_WL_Leaders.dtd",
144 "NBA_Team_Stats_XML.dtd",
145 "NBA3PPctXML.dtd",
146 "NBAAssistsXML.dtd",
147 "NBABlocksXML.dtd",
148 "nbaconfrecxml.dtd",
149 "nbadaysxml.dtd",
150 "nbadivisionsxml.dtd",
151 "NBAFGPctXML.dtd",
152 "NBAFoulsXML.dtd",
153 "NBAFTPctXML.dtd",
154 "NBAMinutesXML.dtd",
155 "NBAReboundsXML.dtd",
156 "NBAScorersXML.dtd",
157 "nbastandxml.dtd",
158 "NBAStealsXML.dtd",
159 "nbateamleadersxml.dtd",
160 "nbatripledoublexml.dtd",
161 "NBATurnoversXML.dtd",
162 "NCAA_Conference_Schedule_XML.dtd",
163 "nflfirstdownxml.dtd",
164 "NFLFumbleLeaderXML.dtd",
165 "NFLGrassTurfDomeOutsideXML.dtd",
166 "NFLGiveTakeXML.dtd",
167 "NFLInside20XML.dtd",
168 "NFLInterceptionLeadersXML.dtd",
169 "NFLKickoffsXML.dtd",
170 "NFLMondayNightXML.dtd",
171 "NFLPassingLeadersXML.dtd",
172 "NFLPassLeadXML.dtd",
173 "NFLQBStartsXML.dtd",
174 "NFLReceivingLeadersXML.dtd",
175 "NFLRushingLeadersXML.dtd",
176 "NFLSackLeadersXML.dtd",
177 "nflstandxml.dtd",
178 "NFLTeamRankingsXML.dtd",
179 "NFLTopPerformanceXML.dtd",
180 "NFLTotalYardageXML.dtd",
181 "NFL_KickingLeaders_XML.dtd",
182 "NFL_NBA_Draft_XML.dtd",
183 "NFL_Roster_XML.dtd",
184 "NFL_Team_Stats_XML.dtd",
185 "Transactions_XML.dtd",
186 "Weekly_Sched_XML.dtd",
187 "WNBA_Team_Leaders_XML.dtd",
188 "WNBA3PPctXML.dtd",
189 "WNBAAssistsXML.dtd",
190 "WNBABlocksXML.dtd",
191 "WNBAFGPctXML.dtd",
192 "WNBAFoulsXML.dtd",
193 "WNBAFTPctXML.dtd",
194 "WNBAMinutesXML.dtd",
195 "WNBAReboundsXML.dtd",
196 "WNBAScorersXML.dtd",
197 "wnbastandxml.dtd",
198 "WNBAStealsXML.dtd",
199 "WNBATurnoversXML.dtd" ]
200
201
202 -- | This serves as both the database and XML representation of a
203 -- SportInfo \<message\>.
204 --
205 data SportInfo =
206 SportInfo {
207 dtd :: String,
208 xml_file_id :: Int,
209 time_stamp :: UTCTime,
210 xml :: String }
211 deriving (Eq, Show)
212
213
214 -- | Attempt to parse a 'SportInfo' from an 'XmlTree'. If we cannot,
215 -- we fail with an error message.
216 --
217 parse_xml :: String -> XmlTree -> Either ParseError SportInfo
218 parse_xml dtdname xmltree = do
219 xmlfid <- parse_xmlfid xmltree
220 timestamp <- parse_xml_time_stamp xmltree
221 message <- parse_message xmltree
222 return $ SportInfo dtdname xmlfid timestamp (xshow [message])
223
224
225 --
226 -- Database code
227 --
228
229 instance DbImport SportInfo where
230 dbmigrate _ =
231 run_dbmigrate $ migrate (undefined :: SportInfo)
232
233 -- | We import a 'SportInfo' by inserting the whole thing at
234 -- once. Nothing fancy going on here.
235 dbimport msg = do
236 insert_ msg
237 return ImportSucceeded
238
239
240 -- | The database schema for SportInfo is trivial; all we need is for
241 -- the XML_File_ID to be unique.
242 --
243 mkPersist defaultCodegenConfig [groundhog|
244 - entity: SportInfo
245 dbName: sport_info
246 constructors:
247 - name: SportInfo
248 uniques:
249 - name: unique_sport_info
250 type: constraint
251 # Prevent multiple imports of the same message.
252 fields: [xml_file_id]
253 |]
254
255
256 --
257 -- Tasty Tests
258 --
259
260 -- | A list of all tests for this module.
261 --
262 sport_info_tests :: TestTree
263 sport_info_tests =
264 testGroup
265 "SportInfo tests"
266 [ test_accessors,
267 test_parse_xml_succeeds,
268 test_dbimport_succeeds ]
269
270
271 -- | Make sure the accessors work and that we can parse one file. Ok,
272 -- so the real point of this is to make the unused fields (dtd, xml,
273 -- ...) warning go away without having to mangle the groundhog code.
274 --
275 test_accessors :: TestTree
276 test_accessors = testCase "we can access a parsed sport_info" $ do
277 xmltree <- unsafe_read_document "test/xml/sportinfo/wnbastandxml.xml"
278 let Right t = parse_xml "wnbastandxml.dtd" xmltree
279 let a1 = dtd t
280 let ex1 = "wnbastandxml.dtd"
281 let a2 = xml_file_id t
282 let ex2 = 2011
283 let a3 = show $ time_stamp t
284 let ex3 = "2009-09-27 19:50:00 UTC"
285 let a4 = take 9 (xml t)
286 let ex4 = "<message>"
287 let actual = (a1,a2,a3,a4)
288 let expected = (ex1,ex2,ex3,ex4)
289 actual @?= expected
290
291
292 -- | Sample XML documents for SportInfo types.
293 --
294 sport_info_test_files :: [FilePath]
295 sport_info_test_files =
296 map (change_suffix . add_path) dtds
297 where
298 add_path = ("test/xml/sportinfo/" ++ )
299 change_suffix = replace ".dtd" ".xml"
300
301
302 -- | Make sure we can parse every element of 'sport_info_test_files'.
303 --
304 test_parse_xml_succeeds :: TestTree
305 test_parse_xml_succeeds =
306 testGroup "parse_xml" $ map check sport_info_test_files
307 where
308 check t = testCase t $ do
309 x <- unsafe_read_document t
310 let result = parse_xml "dummy" x
311 let actual = case result of -- isRight appears in base-4.7
312 Left _ -> False
313 Right _ -> True
314 let expected = True
315 actual @?= expected
316
317
318 -- | Ensure that each element of 'sport_info_test_files' can be imported
319 -- by counting the total number of database records (after
320 -- importing) and comparing it against the length of
321 -- 'sport_info_test_files'.
322 --
323 test_dbimport_succeeds :: TestTree
324 test_dbimport_succeeds = testCase "dbimport succeeds" $ do
325 xmltrees <- mapM unsafe_read_document sport_info_test_files
326 let msgs = rights $ map (parse_xml "dummy") xmltrees
327 actual <- withSqliteConn ":memory:" $ runDbConn $ do
328 runMigration silentMigrationLogger $
329 migrate (undefined :: SportInfo)
330 mapM_ dbimport msgs
331 countAll (undefined :: SportInfo)
332
333 actual @?= expected
334 where
335 expected = length sport_info_test_files