]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/TSN/XML/SportInfo.hs
Add SportInfo support for NFLPassingLeadersXML.dtd.
[dead/htsn-import.git] / src / TSN / XML / SportInfo.hs
1 {-# LANGUAGE FlexibleInstances #-}
2 {-# LANGUAGE GADTs #-}
3 {-# LANGUAGE QuasiQuotes #-}
4 {-# LANGUAGE RecordWildCards #-}
5 {-# LANGUAGE TemplateHaskell #-}
6 {-# LANGUAGE TypeFamilies #-}
7
8 -- | SportInfo represents a collection of DTDs that we don't really
9 -- handle but want to make available. The raw XML gets stored in the
10 -- database along with the XML_File_ID, but we don't parse any of it.
11 --
12 -- This is almost completely redundant with "TSN.XML.GameInfo", but
13 -- the redundancy is necessary: we need separate message types so
14 -- that we can have separate 'DbImport' instances. It would take
15 -- more code/work to abstract (if it's even possible) than to
16 -- duplicate.
17 --
18 module TSN.XML.SportInfo (
19 dtds,
20 parse_xml,
21 sport_info_tests,
22 -- * WARNING: these are private but exported to silence warnings
23 SportInfoConstructor(..) )
24 where
25
26 -- System imports.
27 import Data.Either ( rights )
28 import Data.String.Utils ( replace )
29 import Data.Time.Clock ( UTCTime )
30 import Database.Groundhog (
31 countAll,
32 insert_,
33 migrate,
34 runMigration,
35 silentMigrationLogger )
36 import Database.Groundhog.Generic ( runDbConn )
37 import Database.Groundhog.Sqlite ( withSqliteConn )
38 import Database.Groundhog.TH (
39 defaultCodegenConfig,
40 groundhog,
41 mkPersist )
42 import Test.Tasty ( TestTree, testGroup )
43 import Test.Tasty.HUnit ( (@?=), testCase )
44 import Text.XML.HXT.Core ( XmlTree )
45 import Text.XML.HXT.DOM.ShowXml ( xshow )
46
47 -- Local imports.
48 import TSN.DbImport (
49 DbImport(..),
50 ImportResult(..),
51 run_dbmigrate )
52 import TSN.Parse (
53 ParseError,
54 parse_message,
55 parse_xmlfid,
56 parse_xml_time_stamp )
57 import Xml ( unsafe_read_document )
58
59
60 -- | The DTDs for everything that we consider \"Sport Info.\"
61 --
62 dtds :: [String]
63 dtds =
64 [ "CBASK_3PPctXML.dtd",
65 "Cbask_All_Tourn_Teams_XML.dtd",
66 "CBASK_AssistsXML.dtd",
67 "Cbask_Awards_XML.dtd",
68 "CBASK_BlocksXML.dtd",
69 "Cbask_Conf_Standings_XML.dtd",
70 "Cbask_DivII_III_Indv_Stats_XML.dtd",
71 "Cbask_DivII_Team_Stats_XML.dtd",
72 "Cbask_DivIII_Team_Stats_XML.dtd",
73 "CBASK_FGPctXML.dtd",
74 "CBASK_FoulsXML.dtd",
75 "CBASK_FTPctXML.dtd",
76 "Cbask_Indv_Scoring_XML.dtd",
77 "CBASK_MinutesXML.dtd",
78 "Cbask_Polls_XML.dtd",
79 "CBASK_ReboundsXML.dtd",
80 "CBASK_ScoringLeadersXML.dtd",
81 "Cbask_Team_ThreePT_Made_XML.dtd",
82 "Cbask_Team_ThreePT_PCT_XML.dtd",
83 "Cbask_Team_Win_Pct_XML.dtd",
84 "Cbask_Top_Twenty_Five_XML.dtd",
85 "CBASK_TopTwentyFiveResult_XML.dtd",
86 "Cbask_Tourn_Awards_XML.dtd",
87 "Cbask_Tourn_Champs_XML.dtd",
88 "Cbask_Tourn_Indiv_XML.dtd",
89 "Cbask_Tourn_Leaders_XML.dtd",
90 "Cbask_Tourn_MVP_XML.dtd",
91 "Cbask_Tourn_Records_XML.dtd",
92 "LeagueScheduleXML.dtd",
93 "minorscoresxml.dtd",
94 "Minor_Baseball_League_Leaders_XML.dtd",
95 "Minor_Baseball_Standings_XML.dtd",
96 "Minor_Baseball_Transactions_XML.dtd",
97 "mlbbattingavgxml.dtd",
98 "mlbdoublesleadersxml.dtd",
99 "MLBGamesPlayedXML.dtd",
100 "MLBGIDPXML.dtd",
101 "MLBHitByPitchXML.dtd",
102 "mlbhitsleadersxml.dtd",
103 "mlbhomerunsxml.dtd",
104 "MLBHRFreqXML.dtd",
105 "MLBIntWalksXML.dtd",
106 "MLBKORateXML.dtd",
107 "mlbonbasepctxml.dtd",
108 "MLBOPSXML.dtd",
109 "MLBPlateAppsXML.dtd",
110 "mlbrbisxml.dtd",
111 "mlbrunsleadersxml.dtd",
112 "MLBSacFliesXML.dtd",
113 "MLBSacrificesXML.dtd",
114 "MLBSBSuccessXML.dtd",
115 "mlbsluggingpctxml.dtd",
116 "mlbstandxml.dtd",
117 "mlbstandxml_preseason.dtd",
118 "mlbstolenbasexml.dtd",
119 "mlbtotalbasesleadersxml.dtd",
120 "mlbtriplesleadersxml.dtd",
121 "MLBWalkRateXML.dtd",
122 "mlbwalksleadersxml.dtd",
123 "MLBXtraBaseHitsXML.dtd",
124 "MLB_ERA_Leaders.dtd",
125 "MLB_Fielding_XML.dtd",
126 "MLB_Pitching_Appearances_Leaders.dtd",
127 "MLB_Pitching_Balks_Leaders.dtd",
128 "MLB_Pitching_CG_Leaders.dtd",
129 "MLB_Pitching_ER_Allowed_Leaders.dtd",
130 "MLB_Pitching_Hits_Allowed_Leaders.dtd",
131 "MLB_Pitching_Hit_Batters_Leaders.dtd",
132 "MLB_Pitching_HR_Allowed_Leaders.dtd",
133 "MLB_Pitching_IP_Leaders.dtd",
134 "MLB_Pitching_Runs_Allowed_Leaders.dtd",
135 "MLB_Pitching_Saves_Leaders.dtd",
136 "MLB_Pitching_Shut_Outs_Leaders.dtd",
137 "MLB_Pitching_Starts_Leaders.dtd",
138 "MLB_Pitching_Strike_Outs_Leaders.dtd",
139 "MLB_Pitching_Walks_Leaders.dtd",
140 "MLB_Pitching_WHIP_Leaders.dtd",
141 "MLB_Pitching_Wild_Pitches_Leaders.dtd",
142 "MLB_Pitching_Win_Percentage_Leaders.dtd",
143 "MLB_Pitching_WL_Leaders.dtd",
144 "NBA_Team_Stats_XML.dtd",
145 "NBA3PPctXML.dtd",
146 "NBAAssistsXML.dtd",
147 "NBABlocksXML.dtd",
148 "nbaconfrecxml.dtd",
149 "nbadaysxml.dtd",
150 "nbadivisionsxml.dtd",
151 "NBAFGPctXML.dtd",
152 "NBAFoulsXML.dtd",
153 "NBAFTPctXML.dtd",
154 "NBAMinutesXML.dtd",
155 "NBAReboundsXML.dtd",
156 "NBAScorersXML.dtd",
157 "nbastandxml.dtd",
158 "NBAStealsXML.dtd",
159 "nbateamleadersxml.dtd",
160 "nbatripledoublexml.dtd",
161 "NBATurnoversXML.dtd",
162 "NCAA_Conference_Schedule_XML.dtd",
163 "nflfirstdownxml.dtd",
164 "NFLFumbleLeaderXML.dtd",
165 "NFLGrassTurfDomeOutsideXML.dtd",
166 "NFLGiveTakeXML.dtd",
167 "NFLInside20XML.dtd",
168 "NFLInterceptionLeadersXML.dtd",
169 "NFLKickoffsXML.dtd",
170 "NFLMondayNightXML.dtd",
171 "NFLPassingLeadersXML.dtd",
172 "NFLPassLeadXML.dtd",
173 "NFLQBStartsXML.dtd",
174 "NFLSackLeadersXML.dtd",
175 "nflstandxml.dtd",
176 "NFLTeamRankingsXML.dtd",
177 "NFLTopPerformanceXML.dtd",
178 "NFLTotalYardageXML.dtd",
179 "NFL_KickingLeaders_XML.dtd",
180 "NFL_NBA_Draft_XML.dtd",
181 "NFL_Roster_XML.dtd",
182 "NFL_Team_Stats_XML.dtd",
183 "Transactions_XML.dtd",
184 "Weekly_Sched_XML.dtd",
185 "WNBA_Team_Leaders_XML.dtd",
186 "WNBA3PPctXML.dtd",
187 "WNBAAssistsXML.dtd",
188 "WNBABlocksXML.dtd",
189 "WNBAFGPctXML.dtd",
190 "WNBAFoulsXML.dtd",
191 "WNBAFTPctXML.dtd",
192 "WNBAMinutesXML.dtd",
193 "WNBAReboundsXML.dtd",
194 "WNBAScorersXML.dtd",
195 "wnbastandxml.dtd",
196 "WNBAStealsXML.dtd",
197 "WNBATurnoversXML.dtd" ]
198
199
200 -- | This serves as both the database and XML representation of a
201 -- SportInfo \<message\>.
202 --
203 data SportInfo =
204 SportInfo {
205 dtd :: String,
206 xml_file_id :: Int,
207 time_stamp :: UTCTime,
208 xml :: String }
209 deriving (Eq, Show)
210
211
212 -- | Attempt to parse a 'SportInfo' from an 'XmlTree'. If we cannot,
213 -- we fail with an error message.
214 --
215 parse_xml :: String -> XmlTree -> Either ParseError SportInfo
216 parse_xml dtdname xmltree = do
217 xmlfid <- parse_xmlfid xmltree
218 timestamp <- parse_xml_time_stamp xmltree
219 message <- parse_message xmltree
220 return $ SportInfo dtdname xmlfid timestamp (xshow [message])
221
222
223 --
224 -- Database code
225 --
226
227 instance DbImport SportInfo where
228 dbmigrate _ =
229 run_dbmigrate $ migrate (undefined :: SportInfo)
230
231 -- | We import a 'SportInfo' by inserting the whole thing at
232 -- once. Nothing fancy going on here.
233 dbimport msg = do
234 insert_ msg
235 return ImportSucceeded
236
237
238 -- | The database schema for SportInfo is trivial; all we need is for
239 -- the XML_File_ID to be unique.
240 --
241 mkPersist defaultCodegenConfig [groundhog|
242 - entity: SportInfo
243 dbName: sport_info
244 constructors:
245 - name: SportInfo
246 uniques:
247 - name: unique_sport_info
248 type: constraint
249 # Prevent multiple imports of the same message.
250 fields: [xml_file_id]
251 |]
252
253
254 --
255 -- Tasty Tests
256 --
257
258 -- | A list of all tests for this module.
259 --
260 sport_info_tests :: TestTree
261 sport_info_tests =
262 testGroup
263 "SportInfo tests"
264 [ test_accessors,
265 test_parse_xml_succeeds,
266 test_dbimport_succeeds ]
267
268
269 -- | Make sure the accessors work and that we can parse one file. Ok,
270 -- so the real point of this is to make the unused fields (dtd, xml,
271 -- ...) warning go away without having to mangle the groundhog code.
272 --
273 test_accessors :: TestTree
274 test_accessors = testCase "we can access a parsed sport_info" $ do
275 xmltree <- unsafe_read_document "test/xml/sportinfo/wnbastandxml.xml"
276 let Right t = parse_xml "wnbastandxml.dtd" xmltree
277 let a1 = dtd t
278 let ex1 = "wnbastandxml.dtd"
279 let a2 = xml_file_id t
280 let ex2 = 2011
281 let a3 = show $ time_stamp t
282 let ex3 = "2009-09-27 19:50:00 UTC"
283 let a4 = take 9 (xml t)
284 let ex4 = "<message>"
285 let actual = (a1,a2,a3,a4)
286 let expected = (ex1,ex2,ex3,ex4)
287 actual @?= expected
288
289
290 -- | Sample XML documents for SportInfo types.
291 --
292 sport_info_test_files :: [FilePath]
293 sport_info_test_files =
294 map (change_suffix . add_path) dtds
295 where
296 add_path = ("test/xml/sportinfo/" ++ )
297 change_suffix = replace ".dtd" ".xml"
298
299
300 -- | Make sure we can parse every element of 'sport_info_test_files'.
301 --
302 test_parse_xml_succeeds :: TestTree
303 test_parse_xml_succeeds =
304 testGroup "parse_xml" $ map check sport_info_test_files
305 where
306 check t = testCase t $ do
307 x <- unsafe_read_document t
308 let result = parse_xml "dummy" x
309 let actual = case result of -- isRight appears in base-4.7
310 Left _ -> False
311 Right _ -> True
312 let expected = True
313 actual @?= expected
314
315
316 -- | Ensure that each element of 'sport_info_test_files' can be imported
317 -- by counting the total number of database records (after
318 -- importing) and comparing it against the length of
319 -- 'sport_info_test_files'.
320 --
321 test_dbimport_succeeds :: TestTree
322 test_dbimport_succeeds = testCase "dbimport succeeds" $ do
323 xmltrees <- mapM unsafe_read_document sport_info_test_files
324 let msgs = rights $ map (parse_xml "dummy") xmltrees
325 actual <- withSqliteConn ":memory:" $ runDbConn $ do
326 runMigration silentMigrationLogger $
327 migrate (undefined :: SportInfo)
328 mapM_ dbimport msgs
329 countAll (undefined :: SportInfo)
330
331 actual @?= expected
332 where
333 expected = length sport_info_test_files