]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/TSN/XML/SportInfo.hs
a4fe93e16e590f9491f04754eb0b1a89adb38f84
[dead/htsn-import.git] / src / TSN / XML / SportInfo.hs
1 {-# LANGUAGE FlexibleInstances #-}
2 {-# LANGUAGE GADTs #-}
3 {-# LANGUAGE QuasiQuotes #-}
4 {-# LANGUAGE RecordWildCards #-}
5 {-# LANGUAGE TemplateHaskell #-}
6 {-# LANGUAGE TypeFamilies #-}
7
8 -- | SportInfo represents a collection of DTDs that we don't really
9 -- handle but want to make available. The raw XML gets stored in the
10 -- database along with the XML_File_ID, but we don't parse any of it.
11 --
12 -- This is almost completely redundant with "TSN.XML.GameInfo", but
13 -- the redundancy is necessary: we need separate message types so
14 -- that we can have separate 'DbImport' instances. It would take
15 -- more code/work to abstract (if it's even possible) than to
16 -- duplicate.
17 --
18 module TSN.XML.SportInfo (
19 dtds,
20 parse_xml,
21 sport_info_tests,
22 -- * WARNING: these are private but exported to silence warnings
23 SportInfoConstructor(..) )
24 where
25
26 -- System imports.
27 import Data.Either ( rights )
28 import Data.String.Utils ( replace )
29 import Data.Time.Clock ( UTCTime )
30 import Database.Groundhog (
31 countAll,
32 insert_,
33 migrate,
34 runMigration,
35 silentMigrationLogger )
36 import Database.Groundhog.Generic ( runDbConn )
37 import Database.Groundhog.Sqlite ( withSqliteConn )
38 import Database.Groundhog.TH (
39 defaultCodegenConfig,
40 groundhog,
41 mkPersist )
42 import Test.Tasty ( TestTree, testGroup )
43 import Test.Tasty.HUnit ( (@?=), testCase )
44 import Text.XML.HXT.Core ( XmlTree )
45 import Text.XML.HXT.DOM.ShowXml ( xshow )
46
47 -- Local imports.
48 import TSN.DbImport (
49 DbImport(..),
50 ImportResult(..),
51 run_dbmigrate )
52 import TSN.Parse (
53 ParseError,
54 parse_message,
55 parse_xmlfid,
56 parse_xml_time_stamp )
57 import Xml ( unsafe_read_document )
58
59
60 -- | The DTDs for everything that we consider \"Sport Info.\"
61 --
62 dtds :: [String]
63 dtds =
64 [ "CBASK_3PPctXML.dtd",
65 "Cbask_All_Tourn_Teams_XML.dtd",
66 "CBASK_AssistsXML.dtd",
67 "Cbask_Awards_XML.dtd",
68 "CBASK_BlocksXML.dtd",
69 "Cbask_Conf_Standings_XML.dtd",
70 "Cbask_DivII_III_Indv_Stats_XML.dtd",
71 "Cbask_DivII_Team_Stats_XML.dtd",
72 "Cbask_DivIII_Team_Stats_XML.dtd",
73 "CBASK_FGPctXML.dtd",
74 "CBASK_FoulsXML.dtd",
75 "CBASK_FTPctXML.dtd",
76 "Cbask_Indv_Scoring_XML.dtd",
77 "CBASK_MinutesXML.dtd",
78 "Cbask_Polls_XML.dtd",
79 "CBASK_ReboundsXML.dtd",
80 "CBASK_ScoringLeadersXML.dtd",
81 "Cbask_Team_ThreePT_Made_XML.dtd",
82 "Cbask_Team_ThreePT_PCT_XML.dtd",
83 "Cbask_Team_Win_Pct_XML.dtd",
84 "Cbask_Top_Twenty_Five_XML.dtd",
85 "CBASK_TopTwentyFiveResult_XML.dtd",
86 "Cbask_Tourn_Awards_XML.dtd",
87 "Cbask_Tourn_Champs_XML.dtd",
88 "Cbask_Tourn_Indiv_XML.dtd",
89 "Cbask_Tourn_Leaders_XML.dtd",
90 "Cbask_Tourn_MVP_XML.dtd",
91 "Cbask_Tourn_Records_XML.dtd",
92 "LeagueScheduleXML.dtd",
93 "minorscoresxml.dtd",
94 "Minor_Baseball_League_Leaders_XML.dtd",
95 "Minor_Baseball_Standings_XML.dtd",
96 "Minor_Baseball_Transactions_XML.dtd",
97 "mlbbattingavgxml.dtd",
98 "mlbdoublesleadersxml.dtd",
99 "MLBGamesPlayedXML.dtd",
100 "MLBGIDPXML.dtd",
101 "MLBHitByPitchXML.dtd",
102 "mlbhitsleadersxml.dtd",
103 "mlbhomerunsxml.dtd",
104 "MLBHRFreqXML.dtd",
105 "MLBIntWalksXML.dtd",
106 "MLBKORateXML.dtd",
107 "mlbonbasepctxml.dtd",
108 "MLBOPSXML.dtd",
109 "MLBPlateAppsXML.dtd",
110 "mlbrbisxml.dtd",
111 "mlbrunsleadersxml.dtd",
112 "MLBSacFliesXML.dtd",
113 "MLBSacrificesXML.dtd",
114 "MLBSBSuccessXML.dtd",
115 "mlbsluggingpctxml.dtd",
116 "mlbstandxml.dtd",
117 "mlbstandxml_preseason.dtd",
118 "mlbstolenbasexml.dtd",
119 "mlbtotalbasesleadersxml.dtd",
120 "mlbtriplesleadersxml.dtd",
121 "MLBWalkRateXML.dtd",
122 "mlbwalksleadersxml.dtd",
123 "MLBXtraBaseHitsXML.dtd",
124 "MLB_ERA_Leaders.dtd",
125 "MLB_Fielding_XML.dtd",
126 "MLB_Pitching_Appearances_Leaders.dtd",
127 "MLB_Pitching_Balks_Leaders.dtd",
128 "MLB_Pitching_CG_Leaders.dtd",
129 "MLB_Pitching_ER_Allowed_Leaders.dtd",
130 "MLB_Pitching_Hits_Allowed_Leaders.dtd",
131 "MLB_Pitching_Hit_Batters_Leaders.dtd",
132 "MLB_Pitching_HR_Allowed_Leaders.dtd",
133 "MLB_Pitching_IP_Leaders.dtd",
134 "MLB_Pitching_Runs_Allowed_Leaders.dtd",
135 "MLB_Pitching_Saves_Leaders.dtd",
136 "MLB_Pitching_Shut_Outs_Leaders.dtd",
137 "MLB_Pitching_Starts_Leaders.dtd",
138 "MLB_Pitching_Strike_Outs_Leaders.dtd",
139 "MLB_Pitching_Walks_Leaders.dtd",
140 "MLB_Pitching_WHIP_Leaders.dtd",
141 "MLB_Pitching_Wild_Pitches_Leaders.dtd",
142 "MLB_Pitching_Win_Percentage_Leaders.dtd",
143 "MLB_Pitching_WL_Leaders.dtd",
144 "NBA_Team_Stats_XML.dtd",
145 "NBA3PPctXML.dtd",
146 "NBAAssistsXML.dtd",
147 "NBABlocksXML.dtd",
148 "nbaconfrecxml.dtd",
149 "nbadaysxml.dtd",
150 "nbadivisionsxml.dtd",
151 "NBAFGPctXML.dtd",
152 "NBAFoulsXML.dtd",
153 "NBAFTPctXML.dtd",
154 "NBAMinutesXML.dtd",
155 "NBAReboundsXML.dtd",
156 "NBAScorersXML.dtd",
157 "nbastandxml.dtd",
158 "NBAStealsXML.dtd",
159 "nbateamleadersxml.dtd",
160 "nbatripledoublexml.dtd",
161 "NBATurnoversXML.dtd",
162 "NCAA_Conference_Schedule_XML.dtd",
163 "nflfirstdownxml.dtd",
164 "NFLFumbleLeaderXML.dtd",
165 "NFLGrassTurfDomeOutsideXML.dtd",
166 "NFLGiveTakeXML.dtd",
167 "NFLInside20XML.dtd",
168 "NFLInterceptionLeadersXML.dtd",
169 "NFLKickoffsXML.dtd",
170 "NFLMondayNightXML.dtd",
171 "NFLPassingLeadersXML.dtd",
172 "NFLPassLeadXML.dtd",
173 "NFLQBStartsXML.dtd",
174 "NFLReceivingLeadersXML.dtd",
175 "NFLRushingLeadersXML.dtd",
176 "NFLSackLeadersXML.dtd",
177 "nflstandxml.dtd",
178 "NFLTeamRankingsXML.dtd",
179 "NFLTopPerformanceXML.dtd",
180 "NFLTotalYardageXML.dtd",
181 "NFL_KickingLeaders_XML.dtd",
182 "NFL_NBA_Draft_XML.dtd",
183 "NFL_Roster_XML.dtd",
184 "NFLTackleFFLeadersXML.dtd",
185 "NFL_Team_Stats_XML.dtd",
186 "Transactions_XML.dtd",
187 "Weekly_Sched_XML.dtd",
188 "WNBA_Team_Leaders_XML.dtd",
189 "WNBA3PPctXML.dtd",
190 "WNBAAssistsXML.dtd",
191 "WNBABlocksXML.dtd",
192 "WNBAFGPctXML.dtd",
193 "WNBAFoulsXML.dtd",
194 "WNBAFTPctXML.dtd",
195 "WNBAMinutesXML.dtd",
196 "WNBAReboundsXML.dtd",
197 "WNBAScorersXML.dtd",
198 "wnbastandxml.dtd",
199 "WNBAStealsXML.dtd",
200 "WNBATurnoversXML.dtd" ]
201
202
203 -- | This serves as both the database and XML representation of a
204 -- SportInfo \<message\>.
205 --
206 data SportInfo =
207 SportInfo {
208 dtd :: String,
209 xml_file_id :: Int,
210 time_stamp :: UTCTime,
211 xml :: String }
212 deriving (Eq, Show)
213
214
215 -- | Attempt to parse a 'SportInfo' from an 'XmlTree'. If we cannot,
216 -- we fail with an error message.
217 --
218 parse_xml :: String -> XmlTree -> Either ParseError SportInfo
219 parse_xml dtdname xmltree = do
220 xmlfid <- parse_xmlfid xmltree
221 timestamp <- parse_xml_time_stamp xmltree
222 message <- parse_message xmltree
223 return $ SportInfo dtdname xmlfid timestamp (xshow [message])
224
225
226 --
227 -- Database code
228 --
229
230 instance DbImport SportInfo where
231 dbmigrate _ =
232 run_dbmigrate $ migrate (undefined :: SportInfo)
233
234 -- | We import a 'SportInfo' by inserting the whole thing at
235 -- once. Nothing fancy going on here.
236 dbimport msg = do
237 insert_ msg
238 return ImportSucceeded
239
240
241 -- | The database schema for SportInfo is trivial; all we need is for
242 -- the XML_File_ID to be unique.
243 --
244 mkPersist defaultCodegenConfig [groundhog|
245 - entity: SportInfo
246 dbName: sport_info
247 constructors:
248 - name: SportInfo
249 uniques:
250 - name: unique_sport_info
251 type: constraint
252 # Prevent multiple imports of the same message.
253 fields: [xml_file_id]
254 |]
255
256
257 --
258 -- Tasty Tests
259 --
260
261 -- | A list of all tests for this module.
262 --
263 sport_info_tests :: TestTree
264 sport_info_tests =
265 testGroup
266 "SportInfo tests"
267 [ test_accessors,
268 test_parse_xml_succeeds,
269 test_dbimport_succeeds ]
270
271
272 -- | Make sure the accessors work and that we can parse one file. Ok,
273 -- so the real point of this is to make the unused fields (dtd, xml,
274 -- ...) warning go away without having to mangle the groundhog code.
275 --
276 test_accessors :: TestTree
277 test_accessors = testCase "we can access a parsed sport_info" $ do
278 xmltree <- unsafe_read_document "test/xml/sportinfo/wnbastandxml.xml"
279 let Right t = parse_xml "wnbastandxml.dtd" xmltree
280 let a1 = dtd t
281 let ex1 = "wnbastandxml.dtd"
282 let a2 = xml_file_id t
283 let ex2 = 2011
284 let a3 = show $ time_stamp t
285 let ex3 = "2009-09-27 19:50:00 UTC"
286 let a4 = take 9 (xml t)
287 let ex4 = "<message>"
288 let actual = (a1,a2,a3,a4)
289 let expected = (ex1,ex2,ex3,ex4)
290 actual @?= expected
291
292
293 -- | Sample XML documents for SportInfo types.
294 --
295 sport_info_test_files :: [FilePath]
296 sport_info_test_files =
297 map (change_suffix . add_path) dtds
298 where
299 add_path = ("test/xml/sportinfo/" ++ )
300 change_suffix = replace ".dtd" ".xml"
301
302
303 -- | Make sure we can parse every element of 'sport_info_test_files'.
304 --
305 test_parse_xml_succeeds :: TestTree
306 test_parse_xml_succeeds =
307 testGroup "parse_xml" $ map check sport_info_test_files
308 where
309 check t = testCase t $ do
310 x <- unsafe_read_document t
311 let result = parse_xml "dummy" x
312 let actual = case result of -- isRight appears in base-4.7
313 Left _ -> False
314 Right _ -> True
315 let expected = True
316 actual @?= expected
317
318
319 -- | Ensure that each element of 'sport_info_test_files' can be imported
320 -- by counting the total number of database records (after
321 -- importing) and comparing it against the length of
322 -- 'sport_info_test_files'.
323 --
324 test_dbimport_succeeds :: TestTree
325 test_dbimport_succeeds = testCase "dbimport succeeds" $ do
326 xmltrees <- mapM unsafe_read_document sport_info_test_files
327 let msgs = rights $ map (parse_xml "dummy") xmltrees
328 actual <- withSqliteConn ":memory:" $ runDbConn $ do
329 runMigration silentMigrationLogger $
330 migrate (undefined :: SportInfo)
331 mapM_ dbimport msgs
332 countAll (undefined :: SportInfo)
333
334 actual @?= expected
335 where
336 expected = length sport_info_test_files