]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/TSN/XML/SportInfo.hs
f37029121e27bc7ed30d4e1c8fb6f4b86b86fc69
[dead/htsn-import.git] / src / TSN / XML / SportInfo.hs
1 {-# LANGUAGE FlexibleInstances #-}
2 {-# LANGUAGE GADTs #-}
3 {-# LANGUAGE QuasiQuotes #-}
4 {-# LANGUAGE RecordWildCards #-}
5 {-# LANGUAGE TemplateHaskell #-}
6 {-# LANGUAGE TypeFamilies #-}
7
8 -- | SportInfo represents a collection of DTDs that we don't really
9 -- handle but want to make available. The raw XML gets stored in the
10 -- database along with the XML_File_ID, but we don't parse any of it.
11 --
12 -- This is almost completely redundant with "TSN.XML.GameInfo", but
13 -- the redundancy is necessary: we need separate message types so
14 -- that we can have separate 'DbImport' instances. It would take
15 -- more code/work to abstract (if it's even possible) than to
16 -- duplicate.
17 --
18 module TSN.XML.SportInfo (
19 dtds,
20 parse_xml,
21 sport_info_tests,
22 -- * WARNING: these are private but exported to silence warnings
23 SportInfoConstructor(..) )
24 where
25
26 -- System imports.
27 import Data.Either ( rights )
28 import Data.String.Utils ( replace )
29 import Data.Time.Clock ( UTCTime )
30 import Database.Groundhog (
31 countAll,
32 insert_,
33 migrate,
34 runMigration,
35 silentMigrationLogger )
36 import Database.Groundhog.Generic ( runDbConn )
37 import Database.Groundhog.Sqlite ( withSqliteConn )
38 import Database.Groundhog.TH (
39 defaultCodegenConfig,
40 groundhog,
41 mkPersist )
42 import Test.Tasty ( TestTree, testGroup )
43 import Test.Tasty.HUnit ( (@?=), testCase )
44 import Text.XML.HXT.Core ( XmlTree )
45 import Text.XML.HXT.DOM.ShowXml ( xshow )
46
47 -- Local imports.
48 import TSN.DbImport (
49 DbImport(..),
50 ImportResult(..),
51 run_dbmigrate )
52 import TSN.Parse (
53 ParseError,
54 parse_message,
55 parse_xmlfid,
56 parse_xml_time_stamp )
57 import Xml ( unsafe_read_document )
58
59
60 -- | The DTDs for everything that we consider \"Sport Info.\"
61 --
62 dtds :: [String]
63 dtds =
64 [ "CBASK_3PPctXML.dtd",
65 "Cbask_All_Tourn_Teams_XML.dtd",
66 "CBASK_AssistsXML.dtd",
67 "Cbask_Awards_XML.dtd",
68 "CBASK_BlocksXML.dtd",
69 "Cbask_Conf_Standings_XML.dtd",
70 "Cbask_DivII_III_Indv_Stats_XML.dtd",
71 "Cbask_DivII_Team_Stats_XML.dtd",
72 "Cbask_DivIII_Team_Stats_XML.dtd",
73 "CBASK_FGPctXML.dtd",
74 "CBASK_FoulsXML.dtd",
75 "CBASK_FTPctXML.dtd",
76 "Cbask_Indv_No_Avg_XML.dtd",
77 "Cbask_Indv_Scoring_XML.dtd",
78 "CBASK_MinutesXML.dtd",
79 "Cbask_Polls_XML.dtd",
80 "CBASK_ReboundsXML.dtd",
81 "CBASK_ScoringLeadersXML.dtd",
82 "Cbask_Team_ThreePT_Made_XML.dtd",
83 "Cbask_Team_ThreePT_PCT_XML.dtd",
84 "Cbask_Team_Win_Pct_XML.dtd",
85 "Cbask_Top_Twenty_Five_XML.dtd",
86 "CBASK_TopTwentyFiveResult_XML.dtd",
87 "Cbask_Tourn_Awards_XML.dtd",
88 "Cbask_Tourn_Champs_XML.dtd",
89 "Cbask_Tourn_Indiv_XML.dtd",
90 "Cbask_Tourn_Leaders_XML.dtd",
91 "Cbask_Tourn_MVP_XML.dtd",
92 "Cbask_Tourn_Records_XML.dtd",
93 "LeagueScheduleXML.dtd",
94 "minorscoresxml.dtd",
95 "Minor_Baseball_League_Leaders_XML.dtd",
96 "Minor_Baseball_Standings_XML.dtd",
97 "Minor_Baseball_Transactions_XML.dtd",
98 "mlbbattingavgxml.dtd",
99 "mlbdoublesleadersxml.dtd",
100 "MLBGamesPlayedXML.dtd",
101 "MLBGIDPXML.dtd",
102 "MLBHitByPitchXML.dtd",
103 "mlbhitsleadersxml.dtd",
104 "mlbhomerunsxml.dtd",
105 "MLBHRFreqXML.dtd",
106 "MLBIntWalksXML.dtd",
107 "MLBKORateXML.dtd",
108 "mlbonbasepctxml.dtd",
109 "MLBOPSXML.dtd",
110 "MLBPlateAppsXML.dtd",
111 "mlbrbisxml.dtd",
112 "mlbrunsleadersxml.dtd",
113 "MLBSacFliesXML.dtd",
114 "MLBSacrificesXML.dtd",
115 "MLBSBSuccessXML.dtd",
116 "mlbsluggingpctxml.dtd",
117 "mlbstandxml.dtd",
118 "mlbstandxml_preseason.dtd",
119 "mlbstolenbasexml.dtd",
120 "mlbtotalbasesleadersxml.dtd",
121 "mlbtriplesleadersxml.dtd",
122 "MLBWalkRateXML.dtd",
123 "mlbwalksleadersxml.dtd",
124 "MLBXtraBaseHitsXML.dtd",
125 "MLB_ERA_Leaders.dtd",
126 "MLB_Fielding_XML.dtd",
127 "MLB_Pitching_Appearances_Leaders.dtd",
128 "MLB_Pitching_Balks_Leaders.dtd",
129 "MLB_Pitching_CG_Leaders.dtd",
130 "MLB_Pitching_ER_Allowed_Leaders.dtd",
131 "MLB_Pitching_Hits_Allowed_Leaders.dtd",
132 "MLB_Pitching_Hit_Batters_Leaders.dtd",
133 "MLB_Pitching_HR_Allowed_Leaders.dtd",
134 "MLB_Pitching_IP_Leaders.dtd",
135 "MLB_Pitching_Runs_Allowed_Leaders.dtd",
136 "MLB_Pitching_Saves_Leaders.dtd",
137 "MLB_Pitching_Shut_Outs_Leaders.dtd",
138 "MLB_Pitching_Starts_Leaders.dtd",
139 "MLB_Pitching_Strike_Outs_Leaders.dtd",
140 "MLB_Pitching_Walks_Leaders.dtd",
141 "MLB_Pitching_WHIP_Leaders.dtd",
142 "MLB_Pitching_Wild_Pitches_Leaders.dtd",
143 "MLB_Pitching_Win_Percentage_Leaders.dtd",
144 "MLB_Pitching_WL_Leaders.dtd",
145 "NBA_Team_Stats_XML.dtd",
146 "NBA3PPctXML.dtd",
147 "NBAAssistsXML.dtd",
148 "NBABlocksXML.dtd",
149 "nbaconfrecxml.dtd",
150 "nbadaysxml.dtd",
151 "nbadivisionsxml.dtd",
152 "NBAFGPctXML.dtd",
153 "NBAFoulsXML.dtd",
154 "NBAFTPctXML.dtd",
155 "NBAMinutesXML.dtd",
156 "NBAReboundsXML.dtd",
157 "NBAScorersXML.dtd",
158 "nbastandxml.dtd",
159 "NBAStealsXML.dtd",
160 "nbateamleadersxml.dtd",
161 "nbatripledoublexml.dtd",
162 "NBATurnoversXML.dtd",
163 "NCAA_Conference_Schedule_XML.dtd",
164 "nflfirstdownxml.dtd",
165 "NFLFumbleLeaderXML.dtd",
166 "NFLGrassTurfDomeOutsideXML.dtd",
167 "NFLGiveTakeXML.dtd",
168 "NFLInside20XML.dtd",
169 "NFLInterceptionLeadersXML.dtd",
170 "NFLKickoffsXML.dtd",
171 "NFLMondayNightXML.dtd",
172 "NFLPassingLeadersXML.dtd",
173 "NFLPassLeadXML.dtd",
174 "NFLQBStartsXML.dtd",
175 "NFLReceivingLeadersXML.dtd",
176 "NFLRushingLeadersXML.dtd",
177 "NFLSackLeadersXML.dtd",
178 "nflstandxml.dtd",
179 "NFLTackleFFLeadersXML.dtd",
180 "NFLTeamRankingsXML.dtd",
181 "NFLTopKickoffReturnXML.dtd",
182 "NFLTopPerformanceXML.dtd",
183 "NFLTopPuntReturnXML.dtd",
184 "NFLTotalYardageXML.dtd",
185 "NFLYardsXML.dtd",
186 "NFL_KickingLeaders_XML.dtd",
187 "NFL_NBA_Draft_XML.dtd",
188 "NFL_PuntingLeaders_XML.dtd",
189 "NFL_Roster_XML.dtd",
190 "NFL_Team_Stats_XML.dtd",
191 "Transactions_XML.dtd",
192 "Weekly_Sched_XML.dtd",
193 "WNBA_Team_Leaders_XML.dtd",
194 "WNBA3PPctXML.dtd",
195 "WNBAAssistsXML.dtd",
196 "WNBABlocksXML.dtd",
197 "WNBAFGPctXML.dtd",
198 "WNBAFoulsXML.dtd",
199 "WNBAFTPctXML.dtd",
200 "WNBAMinutesXML.dtd",
201 "WNBAReboundsXML.dtd",
202 "WNBAScorersXML.dtd",
203 "wnbastandxml.dtd",
204 "WNBAStealsXML.dtd",
205 "WNBATurnoversXML.dtd" ]
206
207
208 -- | This serves as both the database and XML representation of a
209 -- SportInfo \<message\>.
210 --
211 data SportInfo =
212 SportInfo {
213 dtd :: String,
214 xml_file_id :: Int,
215 time_stamp :: UTCTime,
216 xml :: String }
217 deriving (Eq, Show)
218
219
220 -- | Attempt to parse a 'SportInfo' from an 'XmlTree'. If we cannot,
221 -- we fail with an error message.
222 --
223 parse_xml :: String -> XmlTree -> Either ParseError SportInfo
224 parse_xml dtdname xmltree = do
225 xmlfid <- parse_xmlfid xmltree
226 timestamp <- parse_xml_time_stamp xmltree
227 message <- parse_message xmltree
228 return $ SportInfo dtdname xmlfid timestamp (xshow [message])
229
230
231 --
232 -- Database code
233 --
234
235 instance DbImport SportInfo where
236 dbmigrate _ =
237 run_dbmigrate $ migrate (undefined :: SportInfo)
238
239 -- | We import a 'SportInfo' by inserting the whole thing at
240 -- once. Nothing fancy going on here.
241 dbimport msg = do
242 insert_ msg
243 return ImportSucceeded
244
245
246 -- | The database schema for SportInfo is trivial; all we need is for
247 -- the XML_File_ID to be unique.
248 --
249 mkPersist defaultCodegenConfig [groundhog|
250 - entity: SportInfo
251 dbName: sport_info
252 constructors:
253 - name: SportInfo
254 uniques:
255 - name: unique_sport_info
256 type: constraint
257 # Prevent multiple imports of the same message.
258 fields: [xml_file_id]
259 |]
260
261
262 --
263 -- Tasty Tests
264 --
265
266 -- | A list of all tests for this module.
267 --
268 sport_info_tests :: TestTree
269 sport_info_tests =
270 testGroup
271 "SportInfo tests"
272 [ test_accessors,
273 test_parse_xml_succeeds,
274 test_dbimport_succeeds ]
275
276
277 -- | Make sure the accessors work and that we can parse one file. Ok,
278 -- so the real point of this is to make the unused fields (dtd, xml,
279 -- ...) warning go away without having to mangle the groundhog code.
280 --
281 test_accessors :: TestTree
282 test_accessors = testCase "we can access a parsed sport_info" $ do
283 xmltree <- unsafe_read_document "test/xml/sportinfo/wnbastandxml.xml"
284 let Right t = parse_xml "wnbastandxml.dtd" xmltree
285 let a1 = dtd t
286 let ex1 = "wnbastandxml.dtd"
287 let a2 = xml_file_id t
288 let ex2 = 2011
289 let a3 = show $ time_stamp t
290 let ex3 = "2009-09-27 19:50:00 UTC"
291 let a4 = take 9 (xml t)
292 let ex4 = "<message>"
293 let actual = (a1,a2,a3,a4)
294 let expected = (ex1,ex2,ex3,ex4)
295 actual @?= expected
296
297
298 -- | Sample XML documents for SportInfo types.
299 --
300 sport_info_test_files :: [FilePath]
301 sport_info_test_files =
302 map (change_suffix . add_path) dtds
303 where
304 add_path = ("test/xml/sportinfo/" ++ )
305 change_suffix = replace ".dtd" ".xml"
306
307
308 -- | Make sure we can parse every element of 'sport_info_test_files'.
309 --
310 test_parse_xml_succeeds :: TestTree
311 test_parse_xml_succeeds =
312 testGroup "parse_xml" $ map check sport_info_test_files
313 where
314 check t = testCase t $ do
315 x <- unsafe_read_document t
316 let result = parse_xml "dummy" x
317 let actual = case result of -- isRight appears in base-4.7
318 Left _ -> False
319 Right _ -> True
320 let expected = True
321 actual @?= expected
322
323
324 -- | Ensure that each element of 'sport_info_test_files' can be imported
325 -- by counting the total number of database records (after
326 -- importing) and comparing it against the length of
327 -- 'sport_info_test_files'.
328 --
329 test_dbimport_succeeds :: TestTree
330 test_dbimport_succeeds = testCase "dbimport succeeds" $ do
331 xmltrees <- mapM unsafe_read_document sport_info_test_files
332 let msgs = rights $ map (parse_xml "dummy") xmltrees
333 actual <- withSqliteConn ":memory:" $ runDbConn $ do
334 runMigration silentMigrationLogger $
335 migrate (undefined :: SportInfo)
336 mapM_ dbimport msgs
337 countAll (undefined :: SportInfo)
338
339 actual @?= expected
340 where
341 expected = length sport_info_test_files