]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/TSN/XML/SportInfo.hs
Add SportInfo support for NFLGrassTurfDomeOutsideXML.dtd.
[dead/htsn-import.git] / src / TSN / XML / SportInfo.hs
1 {-# LANGUAGE FlexibleInstances #-}
2 {-# LANGUAGE GADTs #-}
3 {-# LANGUAGE QuasiQuotes #-}
4 {-# LANGUAGE RecordWildCards #-}
5 {-# LANGUAGE TemplateHaskell #-}
6 {-# LANGUAGE TypeFamilies #-}
7
8 -- | SportInfo represents a collection of DTDs that we don't really
9 -- handle but want to make available. The raw XML gets stored in the
10 -- database along with the XML_File_ID, but we don't parse any of it.
11 --
12 -- This is almost completely redundant with "TSN.XML.GameInfo", but
13 -- the redundancy is necessary: we need separate message types so
14 -- that we can have separate 'DbImport' instances. It would take
15 -- more code/work to abstract (if it's even possible) than to
16 -- duplicate.
17 --
18 module TSN.XML.SportInfo (
19 dtds,
20 parse_xml,
21 sport_info_tests,
22 -- * WARNING: these are private but exported to silence warnings
23 SportInfoConstructor(..) )
24 where
25
26 -- System imports.
27 import Data.Either ( rights )
28 import Data.String.Utils ( replace )
29 import Data.Time.Clock ( UTCTime )
30 import Database.Groundhog (
31 countAll,
32 insert_,
33 migrate,
34 runMigration,
35 silentMigrationLogger )
36 import Database.Groundhog.Generic ( runDbConn )
37 import Database.Groundhog.Sqlite ( withSqliteConn )
38 import Database.Groundhog.TH (
39 defaultCodegenConfig,
40 groundhog,
41 mkPersist )
42 import Test.Tasty ( TestTree, testGroup )
43 import Test.Tasty.HUnit ( (@?=), testCase )
44 import Text.XML.HXT.Core ( XmlTree )
45 import Text.XML.HXT.DOM.ShowXml ( xshow )
46
47 -- Local imports.
48 import TSN.DbImport (
49 DbImport(..),
50 ImportResult(..),
51 run_dbmigrate )
52 import TSN.Parse (
53 ParseError,
54 parse_message,
55 parse_xmlfid,
56 parse_xml_time_stamp )
57 import Xml ( unsafe_read_document )
58
59
60 -- | The DTDs for everything that we consider \"Sport Info.\"
61 --
62 dtds :: [String]
63 dtds =
64 [ "CBASK_3PPctXML.dtd",
65 "Cbask_All_Tourn_Teams_XML.dtd",
66 "CBASK_AssistsXML.dtd",
67 "Cbask_Awards_XML.dtd",
68 "CBASK_BlocksXML.dtd",
69 "Cbask_Conf_Standings_XML.dtd",
70 "Cbask_DivII_III_Indv_Stats_XML.dtd",
71 "Cbask_DivII_Team_Stats_XML.dtd",
72 "Cbask_DivIII_Team_Stats_XML.dtd",
73 "CBASK_FGPctXML.dtd",
74 "CBASK_FoulsXML.dtd",
75 "CBASK_FTPctXML.dtd",
76 "Cbask_Indv_Scoring_XML.dtd",
77 "CBASK_MinutesXML.dtd",
78 "Cbask_Polls_XML.dtd",
79 "CBASK_ReboundsXML.dtd",
80 "CBASK_ScoringLeadersXML.dtd",
81 "Cbask_Team_ThreePT_Made_XML.dtd",
82 "Cbask_Team_ThreePT_PCT_XML.dtd",
83 "Cbask_Team_Win_Pct_XML.dtd",
84 "Cbask_Top_Twenty_Five_XML.dtd",
85 "CBASK_TopTwentyFiveResult_XML.dtd",
86 "Cbask_Tourn_Awards_XML.dtd",
87 "Cbask_Tourn_Champs_XML.dtd",
88 "Cbask_Tourn_Indiv_XML.dtd",
89 "Cbask_Tourn_Leaders_XML.dtd",
90 "Cbask_Tourn_MVP_XML.dtd",
91 "Cbask_Tourn_Records_XML.dtd",
92 "LeagueScheduleXML.dtd",
93 "minorscoresxml.dtd",
94 "Minor_Baseball_League_Leaders_XML.dtd",
95 "Minor_Baseball_Standings_XML.dtd",
96 "Minor_Baseball_Transactions_XML.dtd",
97 "mlbbattingavgxml.dtd",
98 "mlbdoublesleadersxml.dtd",
99 "MLBGamesPlayedXML.dtd",
100 "MLBGIDPXML.dtd",
101 "MLBHitByPitchXML.dtd",
102 "mlbhitsleadersxml.dtd",
103 "mlbhomerunsxml.dtd",
104 "MLBHRFreqXML.dtd",
105 "MLBIntWalksXML.dtd",
106 "MLBKORateXML.dtd",
107 "mlbonbasepctxml.dtd",
108 "MLBOPSXML.dtd",
109 "MLBPlateAppsXML.dtd",
110 "mlbrbisxml.dtd",
111 "mlbrunsleadersxml.dtd",
112 "MLBSacFliesXML.dtd",
113 "MLBSacrificesXML.dtd",
114 "MLBSBSuccessXML.dtd",
115 "mlbsluggingpctxml.dtd",
116 "mlbstandxml.dtd",
117 "mlbstandxml_preseason.dtd",
118 "mlbstolenbasexml.dtd",
119 "mlbtotalbasesleadersxml.dtd",
120 "mlbtriplesleadersxml.dtd",
121 "MLBWalkRateXML.dtd",
122 "mlbwalksleadersxml.dtd",
123 "MLBXtraBaseHitsXML.dtd",
124 "MLB_ERA_Leaders.dtd",
125 "MLB_Fielding_XML.dtd",
126 "MLB_Pitching_Appearances_Leaders.dtd",
127 "MLB_Pitching_Balks_Leaders.dtd",
128 "MLB_Pitching_CG_Leaders.dtd",
129 "MLB_Pitching_ER_Allowed_Leaders.dtd",
130 "MLB_Pitching_Hits_Allowed_Leaders.dtd",
131 "MLB_Pitching_Hit_Batters_Leaders.dtd",
132 "MLB_Pitching_HR_Allowed_Leaders.dtd",
133 "MLB_Pitching_IP_Leaders.dtd",
134 "MLB_Pitching_Runs_Allowed_Leaders.dtd",
135 "MLB_Pitching_Saves_Leaders.dtd",
136 "MLB_Pitching_Shut_Outs_Leaders.dtd",
137 "MLB_Pitching_Starts_Leaders.dtd",
138 "MLB_Pitching_Strike_Outs_Leaders.dtd",
139 "MLB_Pitching_Walks_Leaders.dtd",
140 "MLB_Pitching_WHIP_Leaders.dtd",
141 "MLB_Pitching_Wild_Pitches_Leaders.dtd",
142 "MLB_Pitching_Win_Percentage_Leaders.dtd",
143 "MLB_Pitching_WL_Leaders.dtd",
144 "NBA_Team_Stats_XML.dtd",
145 "NBA3PPctXML.dtd",
146 "NBAAssistsXML.dtd",
147 "NBABlocksXML.dtd",
148 "nbaconfrecxml.dtd",
149 "nbadaysxml.dtd",
150 "nbadivisionsxml.dtd",
151 "NBAFGPctXML.dtd",
152 "NBAFoulsXML.dtd",
153 "NBAFTPctXML.dtd",
154 "NBAMinutesXML.dtd",
155 "NBAReboundsXML.dtd",
156 "NBAScorersXML.dtd",
157 "nbastandxml.dtd",
158 "NBAStealsXML.dtd",
159 "nbateamleadersxml.dtd",
160 "nbatripledoublexml.dtd",
161 "NBATurnoversXML.dtd",
162 "NCAA_Conference_Schedule_XML.dtd",
163 "nflfirstdownxml.dtd",
164 "NFLFumbleLeaderXML.dtd",
165 "NFLGrassTurfDomeOutsideXML.dtd",
166 "NFLGiveTakeXML.dtd",
167 "NFLInside20XML.dtd",
168 "NFLKickoffsXML.dtd",
169 "NFLMondayNightXML.dtd",
170 "NFLPassLeadXML.dtd",
171 "NFLQBStartsXML.dtd",
172 "NFLSackLeadersXML.dtd",
173 "nflstandxml.dtd",
174 "NFLTeamRankingsXML.dtd",
175 "NFLTopPerformanceXML.dtd",
176 "NFLTotalYardageXML.dtd",
177 "NFL_KickingLeaders_XML.dtd",
178 "NFL_NBA_Draft_XML.dtd",
179 "NFL_Roster_XML.dtd",
180 "NFL_Team_Stats_XML.dtd",
181 "Transactions_XML.dtd",
182 "Weekly_Sched_XML.dtd",
183 "WNBA_Team_Leaders_XML.dtd",
184 "WNBA3PPctXML.dtd",
185 "WNBAAssistsXML.dtd",
186 "WNBABlocksXML.dtd",
187 "WNBAFGPctXML.dtd",
188 "WNBAFoulsXML.dtd",
189 "WNBAFTPctXML.dtd",
190 "WNBAMinutesXML.dtd",
191 "WNBAReboundsXML.dtd",
192 "WNBAScorersXML.dtd",
193 "wnbastandxml.dtd",
194 "WNBAStealsXML.dtd",
195 "WNBATurnoversXML.dtd" ]
196
197
198 -- | This serves as both the database and XML representation of a
199 -- SportInfo \<message\>.
200 --
201 data SportInfo =
202 SportInfo {
203 dtd :: String,
204 xml_file_id :: Int,
205 time_stamp :: UTCTime,
206 xml :: String }
207 deriving (Eq, Show)
208
209
210 -- | Attempt to parse a 'SportInfo' from an 'XmlTree'. If we cannot,
211 -- we fail with an error message.
212 --
213 parse_xml :: String -> XmlTree -> Either ParseError SportInfo
214 parse_xml dtdname xmltree = do
215 xmlfid <- parse_xmlfid xmltree
216 timestamp <- parse_xml_time_stamp xmltree
217 message <- parse_message xmltree
218 return $ SportInfo dtdname xmlfid timestamp (xshow [message])
219
220
221 --
222 -- Database code
223 --
224
225 instance DbImport SportInfo where
226 dbmigrate _ =
227 run_dbmigrate $ migrate (undefined :: SportInfo)
228
229 -- | We import a 'SportInfo' by inserting the whole thing at
230 -- once. Nothing fancy going on here.
231 dbimport msg = do
232 insert_ msg
233 return ImportSucceeded
234
235
236 -- | The database schema for SportInfo is trivial; all we need is for
237 -- the XML_File_ID to be unique.
238 --
239 mkPersist defaultCodegenConfig [groundhog|
240 - entity: SportInfo
241 dbName: sport_info
242 constructors:
243 - name: SportInfo
244 uniques:
245 - name: unique_sport_info
246 type: constraint
247 # Prevent multiple imports of the same message.
248 fields: [xml_file_id]
249 |]
250
251
252 --
253 -- Tasty Tests
254 --
255
256 -- | A list of all tests for this module.
257 --
258 sport_info_tests :: TestTree
259 sport_info_tests =
260 testGroup
261 "SportInfo tests"
262 [ test_accessors,
263 test_parse_xml_succeeds,
264 test_dbimport_succeeds ]
265
266
267 -- | Make sure the accessors work and that we can parse one file. Ok,
268 -- so the real point of this is to make the unused fields (dtd, xml,
269 -- ...) warning go away without having to mangle the groundhog code.
270 --
271 test_accessors :: TestTree
272 test_accessors = testCase "we can access a parsed sport_info" $ do
273 xmltree <- unsafe_read_document "test/xml/sportinfo/wnbastandxml.xml"
274 let Right t = parse_xml "wnbastandxml.dtd" xmltree
275 let a1 = dtd t
276 let ex1 = "wnbastandxml.dtd"
277 let a2 = xml_file_id t
278 let ex2 = 2011
279 let a3 = show $ time_stamp t
280 let ex3 = "2009-09-27 19:50:00 UTC"
281 let a4 = take 9 (xml t)
282 let ex4 = "<message>"
283 let actual = (a1,a2,a3,a4)
284 let expected = (ex1,ex2,ex3,ex4)
285 actual @?= expected
286
287
288 -- | Sample XML documents for SportInfo types.
289 --
290 sport_info_test_files :: [FilePath]
291 sport_info_test_files =
292 map (change_suffix . add_path) dtds
293 where
294 add_path = ("test/xml/sportinfo/" ++ )
295 change_suffix = replace ".dtd" ".xml"
296
297
298 -- | Make sure we can parse every element of 'sport_info_test_files'.
299 --
300 test_parse_xml_succeeds :: TestTree
301 test_parse_xml_succeeds =
302 testGroup "parse_xml" $ map check sport_info_test_files
303 where
304 check t = testCase t $ do
305 x <- unsafe_read_document t
306 let result = parse_xml "dummy" x
307 let actual = case result of -- isRight appears in base-4.7
308 Left _ -> False
309 Right _ -> True
310 let expected = True
311 actual @?= expected
312
313
314 -- | Ensure that each element of 'sport_info_test_files' can be imported
315 -- by counting the total number of database records (after
316 -- importing) and comparing it against the length of
317 -- 'sport_info_test_files'.
318 --
319 test_dbimport_succeeds :: TestTree
320 test_dbimport_succeeds = testCase "dbimport succeeds" $ do
321 xmltrees <- mapM unsafe_read_document sport_info_test_files
322 let msgs = rights $ map (parse_xml "dummy") xmltrees
323 actual <- withSqliteConn ":memory:" $ runDbConn $ do
324 runMigration silentMigrationLogger $
325 migrate (undefined :: SportInfo)
326 mapM_ dbimport msgs
327 countAll (undefined :: SportInfo)
328
329 actual @?= expected
330 where
331 expected = length sport_info_test_files