]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/TSN/XML/SportInfo.hs
ffa8f6478ebb6f95195b9191ac46589f2fc53b59
[dead/htsn-import.git] / src / TSN / XML / SportInfo.hs
1 {-# LANGUAGE FlexibleInstances #-}
2 {-# LANGUAGE GADTs #-}
3 {-# LANGUAGE QuasiQuotes #-}
4 {-# LANGUAGE RecordWildCards #-}
5 {-# LANGUAGE TemplateHaskell #-}
6 {-# LANGUAGE TypeFamilies #-}
7
8 -- | SportInfo represents a collection of DTDs that we don't really
9 -- handle but want to make available. The raw XML gets stored in the
10 -- database along with the XML_File_ID, but we don't parse any of it.
11 --
12 -- This is almost completely redundant with "TSN.XML.GameInfo", but
13 -- the redundancy is necessary: we need separate message types so
14 -- that we can have separate 'DbImport' instances. It would take
15 -- more code/work to abstract (if it's even possible) than to
16 -- duplicate.
17 --
18 module TSN.XML.SportInfo (
19 dtds,
20 parse_xml,
21 sport_info_tests,
22 -- * WARNING: these are private but exported to silence warnings
23 SportInfoConstructor(..) )
24 where
25
26 -- System imports.
27 import Data.Either ( rights )
28 import Data.String.Utils ( replace )
29 import Data.Time.Clock ( UTCTime )
30 import Database.Groundhog (
31 countAll,
32 insert_,
33 migrate,
34 runMigration,
35 silentMigrationLogger )
36 import Database.Groundhog.Generic ( runDbConn )
37 import Database.Groundhog.Sqlite ( withSqliteConn )
38 import Database.Groundhog.TH (
39 defaultCodegenConfig,
40 groundhog,
41 mkPersist )
42 import Test.Tasty ( TestTree, testGroup )
43 import Test.Tasty.HUnit ( (@?=), testCase )
44 import Text.XML.HXT.Core ( XmlTree )
45 import Text.XML.HXT.DOM.ShowXml ( xshow )
46
47 -- Local imports.
48 import TSN.DbImport (
49 DbImport(..),
50 ImportResult(..),
51 run_dbmigrate )
52 import TSN.Parse (
53 ParseError,
54 parse_message,
55 parse_xmlfid,
56 parse_xml_time_stamp )
57 import Xml ( unsafe_read_document )
58
59
60 -- | The DTDs for everything that we consider \"Sport Info.\"
61 --
62 dtds :: [String]
63 dtds =
64 [ "CBASK_3PPctXML.dtd",
65 "Cbask_All_Tourn_Teams_XML.dtd",
66 "CBASK_AssistsXML.dtd",
67 "Cbask_Awards_XML.dtd",
68 "CBASK_BlocksXML.dtd",
69 "Cbask_Conf_Standings_XML.dtd",
70 "Cbask_DivII_III_Indv_Stats_XML.dtd",
71 "Cbask_DivII_Team_Stats_XML.dtd",
72 "Cbask_DivIII_Team_Stats_XML.dtd",
73 "CBASK_FGPctXML.dtd",
74 "CBASK_FoulsXML.dtd",
75 "CBASK_FTPctXML.dtd",
76 "Cbask_Indv_Scoring_XML.dtd",
77 "CBASK_MinutesXML.dtd",
78 "Cbask_Polls_XML.dtd",
79 "CBASK_ReboundsXML.dtd",
80 "CBASK_ScoringLeadersXML.dtd",
81 "Cbask_Team_ThreePT_Made_XML.dtd",
82 "Cbask_Team_ThreePT_PCT_XML.dtd",
83 "Cbask_Team_Win_Pct_XML.dtd",
84 "Cbask_Top_Twenty_Five_XML.dtd",
85 "CBASK_TopTwentyFiveResult_XML.dtd",
86 "Cbask_Tourn_Awards_XML.dtd",
87 "Cbask_Tourn_Champs_XML.dtd",
88 "Cbask_Tourn_Indiv_XML.dtd",
89 "Cbask_Tourn_Leaders_XML.dtd",
90 "Cbask_Tourn_MVP_XML.dtd",
91 "Cbask_Tourn_Records_XML.dtd",
92 "LeagueScheduleXML.dtd",
93 "minorscoresxml.dtd",
94 "Minor_Baseball_League_Leaders_XML.dtd",
95 "Minor_Baseball_Standings_XML.dtd",
96 "Minor_Baseball_Transactions_XML.dtd",
97 "mlbbattingavgxml.dtd",
98 "mlbdoublesleadersxml.dtd",
99 "MLBGamesPlayedXML.dtd",
100 "MLBGIDPXML.dtd",
101 "MLBHitByPitchXML.dtd",
102 "mlbhitsleadersxml.dtd",
103 "mlbhomerunsxml.dtd",
104 "MLBHRFreqXML.dtd",
105 "MLBIntWalksXML.dtd",
106 "MLBKORateXML.dtd",
107 "mlbonbasepctxml.dtd",
108 "MLBOPSXML.dtd",
109 "MLBPlateAppsXML.dtd",
110 "mlbrbisxml.dtd",
111 "mlbrunsleadersxml.dtd",
112 "MLBSacFliesXML.dtd",
113 "MLBSacrificesXML.dtd",
114 "MLBSBSuccessXML.dtd",
115 "mlbsluggingpctxml.dtd",
116 "mlbstandxml.dtd",
117 "mlbstandxml_preseason.dtd",
118 "mlbstolenbasexml.dtd",
119 "mlbtotalbasesleadersxml.dtd",
120 "mlbtriplesleadersxml.dtd",
121 "MLBWalkRateXML.dtd",
122 "mlbwalksleadersxml.dtd",
123 "MLBXtraBaseHitsXML.dtd",
124 "MLB_ERA_Leaders.dtd",
125 "MLB_Fielding_XML.dtd",
126 "MLB_Pitching_Appearances_Leaders.dtd",
127 "MLB_Pitching_Balks_Leaders.dtd",
128 "MLB_Pitching_CG_Leaders.dtd",
129 "MLB_Pitching_ER_Allowed_Leaders.dtd",
130 "MLB_Pitching_Hits_Allowed_Leaders.dtd",
131 "MLB_Pitching_Hit_Batters_Leaders.dtd",
132 "MLB_Pitching_HR_Allowed_Leaders.dtd",
133 "MLB_Pitching_IP_Leaders.dtd",
134 "MLB_Pitching_Runs_Allowed_Leaders.dtd",
135 "MLB_Pitching_Saves_Leaders.dtd",
136 "MLB_Pitching_Shut_Outs_Leaders.dtd",
137 "MLB_Pitching_Starts_Leaders.dtd",
138 "MLB_Pitching_Strike_Outs_Leaders.dtd",
139 "MLB_Pitching_Walks_Leaders.dtd",
140 "MLB_Pitching_WHIP_Leaders.dtd",
141 "MLB_Pitching_Wild_Pitches_Leaders.dtd",
142 "MLB_Pitching_Win_Percentage_Leaders.dtd",
143 "MLB_Pitching_WL_Leaders.dtd",
144 "NBA_Team_Stats_XML.dtd",
145 "NBA3PPctXML.dtd",
146 "NBAAssistsXML.dtd",
147 "NBABlocksXML.dtd",
148 "nbaconfrecxml.dtd",
149 "nbadaysxml.dtd",
150 "nbadivisionsxml.dtd",
151 "NBAFGPctXML.dtd",
152 "NBAFoulsXML.dtd",
153 "NBAFTPctXML.dtd",
154 "NBAMinutesXML.dtd",
155 "NBAReboundsXML.dtd",
156 "NBAScorersXML.dtd",
157 "nbastandxml.dtd",
158 "NBAStealsXML.dtd",
159 "nbateamleadersxml.dtd",
160 "nbatripledoublexml.dtd",
161 "NBATurnoversXML.dtd",
162 "NCAA_Conference_Schedule_XML.dtd",
163 "nflfirstdownxml.dtd",
164 "NFLFumbleLeaderXML.dtd",
165 "NFLGrassTurfDomeOutsideXML.dtd",
166 "NFLGiveTakeXML.dtd",
167 "NFLInside20XML.dtd",
168 "NFLInterceptionLeadersXML.dtd",
169 "NFLKickoffsXML.dtd",
170 "NFLMondayNightXML.dtd",
171 "NFLPassingLeadersXML.dtd",
172 "NFLPassLeadXML.dtd",
173 "NFLQBStartsXML.dtd",
174 "NFLReceivingLeadersXML.dtd",
175 "NFLRushingLeadersXML.dtd",
176 "NFLSackLeadersXML.dtd",
177 "nflstandxml.dtd",
178 "NFLTeamRankingsXML.dtd",
179 "NFLTopKickoffReturnXML.dtd",
180 "NFLTopPerformanceXML.dtd",
181 "NFLTotalYardageXML.dtd",
182 "NFL_KickingLeaders_XML.dtd",
183 "NFL_NBA_Draft_XML.dtd",
184 "NFL_Roster_XML.dtd",
185 "NFLTackleFFLeadersXML.dtd",
186 "NFL_Team_Stats_XML.dtd",
187 "Transactions_XML.dtd",
188 "Weekly_Sched_XML.dtd",
189 "WNBA_Team_Leaders_XML.dtd",
190 "WNBA3PPctXML.dtd",
191 "WNBAAssistsXML.dtd",
192 "WNBABlocksXML.dtd",
193 "WNBAFGPctXML.dtd",
194 "WNBAFoulsXML.dtd",
195 "WNBAFTPctXML.dtd",
196 "WNBAMinutesXML.dtd",
197 "WNBAReboundsXML.dtd",
198 "WNBAScorersXML.dtd",
199 "wnbastandxml.dtd",
200 "WNBAStealsXML.dtd",
201 "WNBATurnoversXML.dtd" ]
202
203
204 -- | This serves as both the database and XML representation of a
205 -- SportInfo \<message\>.
206 --
207 data SportInfo =
208 SportInfo {
209 dtd :: String,
210 xml_file_id :: Int,
211 time_stamp :: UTCTime,
212 xml :: String }
213 deriving (Eq, Show)
214
215
216 -- | Attempt to parse a 'SportInfo' from an 'XmlTree'. If we cannot,
217 -- we fail with an error message.
218 --
219 parse_xml :: String -> XmlTree -> Either ParseError SportInfo
220 parse_xml dtdname xmltree = do
221 xmlfid <- parse_xmlfid xmltree
222 timestamp <- parse_xml_time_stamp xmltree
223 message <- parse_message xmltree
224 return $ SportInfo dtdname xmlfid timestamp (xshow [message])
225
226
227 --
228 -- Database code
229 --
230
231 instance DbImport SportInfo where
232 dbmigrate _ =
233 run_dbmigrate $ migrate (undefined :: SportInfo)
234
235 -- | We import a 'SportInfo' by inserting the whole thing at
236 -- once. Nothing fancy going on here.
237 dbimport msg = do
238 insert_ msg
239 return ImportSucceeded
240
241
242 -- | The database schema for SportInfo is trivial; all we need is for
243 -- the XML_File_ID to be unique.
244 --
245 mkPersist defaultCodegenConfig [groundhog|
246 - entity: SportInfo
247 dbName: sport_info
248 constructors:
249 - name: SportInfo
250 uniques:
251 - name: unique_sport_info
252 type: constraint
253 # Prevent multiple imports of the same message.
254 fields: [xml_file_id]
255 |]
256
257
258 --
259 -- Tasty Tests
260 --
261
262 -- | A list of all tests for this module.
263 --
264 sport_info_tests :: TestTree
265 sport_info_tests =
266 testGroup
267 "SportInfo tests"
268 [ test_accessors,
269 test_parse_xml_succeeds,
270 test_dbimport_succeeds ]
271
272
273 -- | Make sure the accessors work and that we can parse one file. Ok,
274 -- so the real point of this is to make the unused fields (dtd, xml,
275 -- ...) warning go away without having to mangle the groundhog code.
276 --
277 test_accessors :: TestTree
278 test_accessors = testCase "we can access a parsed sport_info" $ do
279 xmltree <- unsafe_read_document "test/xml/sportinfo/wnbastandxml.xml"
280 let Right t = parse_xml "wnbastandxml.dtd" xmltree
281 let a1 = dtd t
282 let ex1 = "wnbastandxml.dtd"
283 let a2 = xml_file_id t
284 let ex2 = 2011
285 let a3 = show $ time_stamp t
286 let ex3 = "2009-09-27 19:50:00 UTC"
287 let a4 = take 9 (xml t)
288 let ex4 = "<message>"
289 let actual = (a1,a2,a3,a4)
290 let expected = (ex1,ex2,ex3,ex4)
291 actual @?= expected
292
293
294 -- | Sample XML documents for SportInfo types.
295 --
296 sport_info_test_files :: [FilePath]
297 sport_info_test_files =
298 map (change_suffix . add_path) dtds
299 where
300 add_path = ("test/xml/sportinfo/" ++ )
301 change_suffix = replace ".dtd" ".xml"
302
303
304 -- | Make sure we can parse every element of 'sport_info_test_files'.
305 --
306 test_parse_xml_succeeds :: TestTree
307 test_parse_xml_succeeds =
308 testGroup "parse_xml" $ map check sport_info_test_files
309 where
310 check t = testCase t $ do
311 x <- unsafe_read_document t
312 let result = parse_xml "dummy" x
313 let actual = case result of -- isRight appears in base-4.7
314 Left _ -> False
315 Right _ -> True
316 let expected = True
317 actual @?= expected
318
319
320 -- | Ensure that each element of 'sport_info_test_files' can be imported
321 -- by counting the total number of database records (after
322 -- importing) and comparing it against the length of
323 -- 'sport_info_test_files'.
324 --
325 test_dbimport_succeeds :: TestTree
326 test_dbimport_succeeds = testCase "dbimport succeeds" $ do
327 xmltrees <- mapM unsafe_read_document sport_info_test_files
328 let msgs = rights $ map (parse_xml "dummy") xmltrees
329 actual <- withSqliteConn ":memory:" $ runDbConn $ do
330 runMigration silentMigrationLogger $
331 migrate (undefined :: SportInfo)
332 mapM_ dbimport msgs
333 countAll (undefined :: SportInfo)
334
335 actual @?= expected
336 where
337 expected = length sport_info_test_files