]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/TSN/XML/SportInfo.hs
478f48ab18d58989bc2cb57c38f8c4e1739bc184
[dead/htsn-import.git] / src / TSN / XML / SportInfo.hs
1 {-# LANGUAGE FlexibleInstances #-}
2 {-# LANGUAGE GADTs #-}
3 {-# LANGUAGE QuasiQuotes #-}
4 {-# LANGUAGE RecordWildCards #-}
5 {-# LANGUAGE TemplateHaskell #-}
6 {-# LANGUAGE TypeFamilies #-}
7
8 -- | SportInfo represents a collection of DTDs that we don't really
9 -- handle but want to make available. The raw XML gets stored in the
10 -- database along with the XML_File_ID, but we don't parse any of it.
11 --
12 -- This is almost completely redundant with "TSN.XML.GameInfo", but
13 -- the redundancy is necessary: we need separate message types so
14 -- that we can have separate 'DbImport' instances. It would take
15 -- more code/work to abstract (if it's even possible) than to
16 -- duplicate.
17 --
18 module TSN.XML.SportInfo (
19 dtds,
20 parse_xml,
21 sport_info_tests,
22 -- * WARNING: these are private but exported to silence warnings
23 SportInfoConstructor(..) )
24 where
25
26 -- System imports.
27 import Data.Either ( rights )
28 import Data.String.Utils ( replace )
29 import Data.Time.Clock ( UTCTime )
30 import Database.Groundhog (
31 countAll,
32 insert_,
33 migrate,
34 runMigration,
35 silentMigrationLogger )
36 import Database.Groundhog.Generic ( runDbConn )
37 import Database.Groundhog.Sqlite ( withSqliteConn )
38 import Database.Groundhog.TH (
39 defaultCodegenConfig,
40 groundhog,
41 mkPersist )
42 import Test.Tasty ( TestTree, testGroup )
43 import Test.Tasty.HUnit ( (@?=), testCase )
44 import Text.XML.HXT.Core ( XmlTree )
45 import Text.XML.HXT.DOM.ShowXml ( xshow )
46
47 -- Local imports.
48 import TSN.DbImport (
49 DbImport(..),
50 ImportResult(..),
51 run_dbmigrate )
52 import TSN.Parse (
53 ParseError,
54 parse_message,
55 parse_xmlfid,
56 parse_xml_time_stamp )
57 import Xml ( unsafe_read_document )
58
59
60 -- | The DTDs for everything that we consider \"Sport Info.\"
61 --
62 dtds :: [String]
63 dtds =
64 [ "CBASK_3PPctXML.dtd",
65 "Cbask_All_Tourn_Teams_XML.dtd",
66 "CBASK_AssistsXML.dtd",
67 "Cbask_Awards_XML.dtd",
68 "CBASK_BlocksXML.dtd",
69 "Cbask_Conf_Standings_XML.dtd",
70 "Cbask_DivII_III_Indv_Stats_XML.dtd",
71 "Cbask_DivII_Team_Stats_XML.dtd",
72 "Cbask_DivIII_Team_Stats_XML.dtd",
73 "CBASK_FGPctXML.dtd",
74 "CBASK_FoulsXML.dtd",
75 "CBASK_FTPctXML.dtd",
76 "Cbask_Indv_Scoring_XML.dtd",
77 "CBASK_MinutesXML.dtd",
78 "Cbask_Polls_XML.dtd",
79 "CBASK_ReboundsXML.dtd",
80 "CBASK_ScoringLeadersXML.dtd",
81 "Cbask_Team_ThreePT_Made_XML.dtd",
82 "Cbask_Team_ThreePT_PCT_XML.dtd",
83 "Cbask_Team_Win_Pct_XML.dtd",
84 "Cbask_Top_Twenty_Five_XML.dtd",
85 "CBASK_TopTwentyFiveResult_XML.dtd",
86 "Cbask_Tourn_Awards_XML.dtd",
87 "Cbask_Tourn_Champs_XML.dtd",
88 "Cbask_Tourn_Indiv_XML.dtd",
89 "Cbask_Tourn_Leaders_XML.dtd",
90 "Cbask_Tourn_MVP_XML.dtd",
91 "Cbask_Tourn_Records_XML.dtd",
92 "LeagueScheduleXML.dtd",
93 "minorscoresxml.dtd",
94 "Minor_Baseball_League_Leaders_XML.dtd",
95 "Minor_Baseball_Standings_XML.dtd",
96 "Minor_Baseball_Transactions_XML.dtd",
97 "mlbbattingavgxml.dtd",
98 "mlbdoublesleadersxml.dtd",
99 "MLBGamesPlayedXML.dtd",
100 "MLBGIDPXML.dtd",
101 "MLBHitByPitchXML.dtd",
102 "mlbhitsleadersxml.dtd",
103 "mlbhomerunsxml.dtd",
104 "MLBHRFreqXML.dtd",
105 "MLBIntWalksXML.dtd",
106 "MLBKORateXML.dtd",
107 "mlbonbasepctxml.dtd",
108 "MLBOPSXML.dtd",
109 "MLBPlateAppsXML.dtd",
110 "mlbrbisxml.dtd",
111 "mlbrunsleadersxml.dtd",
112 "MLBSacFliesXML.dtd",
113 "MLBSacrificesXML.dtd",
114 "MLBSBSuccessXML.dtd",
115 "mlbsluggingpctxml.dtd",
116 "mlbstandxml.dtd",
117 "mlbstandxml_preseason.dtd",
118 "mlbstolenbasexml.dtd",
119 "mlbtotalbasesleadersxml.dtd",
120 "mlbtriplesleadersxml.dtd",
121 "MLBWalkRateXML.dtd",
122 "mlbwalksleadersxml.dtd",
123 "MLBXtraBaseHitsXML.dtd",
124 "MLB_ERA_Leaders.dtd",
125 "MLB_Fielding_XML.dtd",
126 "MLB_Pitching_Appearances_Leaders.dtd",
127 "MLB_Pitching_Balks_Leaders.dtd",
128 "MLB_Pitching_CG_Leaders.dtd",
129 "MLB_Pitching_ER_Allowed_Leaders.dtd",
130 "MLB_Pitching_Hits_Allowed_Leaders.dtd",
131 "MLB_Pitching_Hit_Batters_Leaders.dtd",
132 "MLB_Pitching_HR_Allowed_Leaders.dtd",
133 "MLB_Pitching_IP_Leaders.dtd",
134 "MLB_Pitching_Runs_Allowed_Leaders.dtd",
135 "MLB_Pitching_Saves_Leaders.dtd",
136 "MLB_Pitching_Shut_Outs_Leaders.dtd",
137 "MLB_Pitching_Starts_Leaders.dtd",
138 "MLB_Pitching_Strike_Outs_Leaders.dtd",
139 "MLB_Pitching_Walks_Leaders.dtd",
140 "MLB_Pitching_WHIP_Leaders.dtd",
141 "MLB_Pitching_Wild_Pitches_Leaders.dtd",
142 "MLB_Pitching_Win_Percentage_Leaders.dtd",
143 "MLB_Pitching_WL_Leaders.dtd",
144 "NBA_Team_Stats_XML.dtd",
145 "NBA3PPctXML.dtd",
146 "NBAAssistsXML.dtd",
147 "NBABlocksXML.dtd",
148 "nbaconfrecxml.dtd",
149 "nbadaysxml.dtd",
150 "nbadivisionsxml.dtd",
151 "NBAFGPctXML.dtd",
152 "NBAFoulsXML.dtd",
153 "NBAFTPctXML.dtd",
154 "NBAMinutesXML.dtd",
155 "NBAReboundsXML.dtd",
156 "NBAScorersXML.dtd",
157 "nbastandxml.dtd",
158 "NBAStealsXML.dtd",
159 "nbateamleadersxml.dtd",
160 "nbatripledoublexml.dtd",
161 "NBATurnoversXML.dtd",
162 "NCAA_Conference_Schedule_XML.dtd",
163 "nflfirstdownxml.dtd",
164 "NFLFumbleLeaderXML.dtd",
165 "NFLGrassTurfDomeOutsideXML.dtd",
166 "NFLGiveTakeXML.dtd",
167 "NFLInside20XML.dtd",
168 "NFLInterceptionLeadersXML.dtd",
169 "NFLKickoffsXML.dtd",
170 "NFLMondayNightXML.dtd",
171 "NFLPassingLeadersXML.dtd",
172 "NFLPassLeadXML.dtd",
173 "NFLQBStartsXML.dtd",
174 "NFLReceivingLeadersXML.dtd",
175 "NFLRushingLeadersXML.dtd",
176 "NFLSackLeadersXML.dtd",
177 "nflstandxml.dtd",
178 "NFLTackleFFLeadersXML.dtd",
179 "NFLTeamRankingsXML.dtd",
180 "NFLTopKickoffReturnXML.dtd",
181 "NFLTopPerformanceXML.dtd",
182 "NFLTopPuntReturnXML.dtd",
183 "NFLTotalYardageXML.dtd",
184 "NFLYardsXML.dtd",
185 "NFL_KickingLeaders_XML.dtd",
186 "NFL_NBA_Draft_XML.dtd",
187 "NFL_PuntingLeaders_XML.dtd",
188 "NFL_Roster_XML.dtd",
189 "NFL_Team_Stats_XML.dtd",
190 "Transactions_XML.dtd",
191 "Weekly_Sched_XML.dtd",
192 "WNBA_Team_Leaders_XML.dtd",
193 "WNBA3PPctXML.dtd",
194 "WNBAAssistsXML.dtd",
195 "WNBABlocksXML.dtd",
196 "WNBAFGPctXML.dtd",
197 "WNBAFoulsXML.dtd",
198 "WNBAFTPctXML.dtd",
199 "WNBAMinutesXML.dtd",
200 "WNBAReboundsXML.dtd",
201 "WNBAScorersXML.dtd",
202 "wnbastandxml.dtd",
203 "WNBAStealsXML.dtd",
204 "WNBATurnoversXML.dtd" ]
205
206
207 -- | This serves as both the database and XML representation of a
208 -- SportInfo \<message\>.
209 --
210 data SportInfo =
211 SportInfo {
212 dtd :: String,
213 xml_file_id :: Int,
214 time_stamp :: UTCTime,
215 xml :: String }
216 deriving (Eq, Show)
217
218
219 -- | Attempt to parse a 'SportInfo' from an 'XmlTree'. If we cannot,
220 -- we fail with an error message.
221 --
222 parse_xml :: String -> XmlTree -> Either ParseError SportInfo
223 parse_xml dtdname xmltree = do
224 xmlfid <- parse_xmlfid xmltree
225 timestamp <- parse_xml_time_stamp xmltree
226 message <- parse_message xmltree
227 return $ SportInfo dtdname xmlfid timestamp (xshow [message])
228
229
230 --
231 -- Database code
232 --
233
234 instance DbImport SportInfo where
235 dbmigrate _ =
236 run_dbmigrate $ migrate (undefined :: SportInfo)
237
238 -- | We import a 'SportInfo' by inserting the whole thing at
239 -- once. Nothing fancy going on here.
240 dbimport msg = do
241 insert_ msg
242 return ImportSucceeded
243
244
245 -- | The database schema for SportInfo is trivial; all we need is for
246 -- the XML_File_ID to be unique.
247 --
248 mkPersist defaultCodegenConfig [groundhog|
249 - entity: SportInfo
250 dbName: sport_info
251 constructors:
252 - name: SportInfo
253 uniques:
254 - name: unique_sport_info
255 type: constraint
256 # Prevent multiple imports of the same message.
257 fields: [xml_file_id]
258 |]
259
260
261 --
262 -- Tasty Tests
263 --
264
265 -- | A list of all tests for this module.
266 --
267 sport_info_tests :: TestTree
268 sport_info_tests =
269 testGroup
270 "SportInfo tests"
271 [ test_accessors,
272 test_parse_xml_succeeds,
273 test_dbimport_succeeds ]
274
275
276 -- | Make sure the accessors work and that we can parse one file. Ok,
277 -- so the real point of this is to make the unused fields (dtd, xml,
278 -- ...) warning go away without having to mangle the groundhog code.
279 --
280 test_accessors :: TestTree
281 test_accessors = testCase "we can access a parsed sport_info" $ do
282 xmltree <- unsafe_read_document "test/xml/sportinfo/wnbastandxml.xml"
283 let Right t = parse_xml "wnbastandxml.dtd" xmltree
284 let a1 = dtd t
285 let ex1 = "wnbastandxml.dtd"
286 let a2 = xml_file_id t
287 let ex2 = 2011
288 let a3 = show $ time_stamp t
289 let ex3 = "2009-09-27 19:50:00 UTC"
290 let a4 = take 9 (xml t)
291 let ex4 = "<message>"
292 let actual = (a1,a2,a3,a4)
293 let expected = (ex1,ex2,ex3,ex4)
294 actual @?= expected
295
296
297 -- | Sample XML documents for SportInfo types.
298 --
299 sport_info_test_files :: [FilePath]
300 sport_info_test_files =
301 map (change_suffix . add_path) dtds
302 where
303 add_path = ("test/xml/sportinfo/" ++ )
304 change_suffix = replace ".dtd" ".xml"
305
306
307 -- | Make sure we can parse every element of 'sport_info_test_files'.
308 --
309 test_parse_xml_succeeds :: TestTree
310 test_parse_xml_succeeds =
311 testGroup "parse_xml" $ map check sport_info_test_files
312 where
313 check t = testCase t $ do
314 x <- unsafe_read_document t
315 let result = parse_xml "dummy" x
316 let actual = case result of -- isRight appears in base-4.7
317 Left _ -> False
318 Right _ -> True
319 let expected = True
320 actual @?= expected
321
322
323 -- | Ensure that each element of 'sport_info_test_files' can be imported
324 -- by counting the total number of database records (after
325 -- importing) and comparing it against the length of
326 -- 'sport_info_test_files'.
327 --
328 test_dbimport_succeeds :: TestTree
329 test_dbimport_succeeds = testCase "dbimport succeeds" $ do
330 xmltrees <- mapM unsafe_read_document sport_info_test_files
331 let msgs = rights $ map (parse_xml "dummy") xmltrees
332 actual <- withSqliteConn ":memory:" $ runDbConn $ do
333 runMigration silentMigrationLogger $
334 migrate (undefined :: SportInfo)
335 mapM_ dbimport msgs
336 countAll (undefined :: SportInfo)
337
338 actual @?= expected
339 where
340 expected = length sport_info_test_files