]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/TSN/XML/SportInfo.hs
9f454d9c473139147e71ac552ca508138832e03c
[dead/htsn-import.git] / src / TSN / XML / SportInfo.hs
1 {-# LANGUAGE FlexibleInstances #-}
2 {-# LANGUAGE GADTs #-}
3 {-# LANGUAGE QuasiQuotes #-}
4 {-# LANGUAGE RecordWildCards #-}
5 {-# LANGUAGE TemplateHaskell #-}
6 {-# LANGUAGE TypeFamilies #-}
7
8 -- | SportInfo represents a collection of DTDs that we don't really
9 -- handle but want to make available. The raw XML gets stored in the
10 -- database along with the XML_File_ID, but we don't parse any of it.
11 --
12 -- This is almost completely redundant with "TSN.XML.GameInfo", but
13 -- the redundancy is necessary: we need separate message types so
14 -- that we can have separate 'DbImport' instances. It would take
15 -- more code/work to abstract (if it's even possible) than to
16 -- duplicate.
17 --
18 module TSN.XML.SportInfo (
19 dtds,
20 parse_xml,
21 sport_info_tests,
22 -- * WARNING: these are private but exported to silence warnings
23 SportInfoConstructor(..) )
24 where
25
26 -- System imports.
27 import Data.Either ( rights )
28 import Data.String.Utils ( replace )
29 import Data.Time.Clock ( UTCTime )
30 import Database.Groundhog (
31 countAll,
32 insert_,
33 migrate,
34 runMigration,
35 silentMigrationLogger )
36 import Database.Groundhog.Generic ( runDbConn )
37 import Database.Groundhog.Sqlite ( withSqliteConn )
38 import Database.Groundhog.TH (
39 defaultCodegenConfig,
40 groundhog,
41 mkPersist )
42 import Test.Tasty ( TestTree, testGroup )
43 import Test.Tasty.HUnit ( (@?=), testCase )
44 import Text.XML.HXT.Core ( XmlTree )
45 import Text.XML.HXT.DOM.ShowXml ( xshow )
46
47 -- Local imports.
48 import TSN.DbImport (
49 DbImport(..),
50 ImportResult(..),
51 run_dbmigrate )
52 import TSN.Parse (
53 ParseError,
54 parse_message,
55 parse_xmlfid,
56 parse_xml_time_stamp )
57 import Xml ( unsafe_read_document )
58
59
60 -- | The DTDs for everything that we consider \"Sport Info.\"
61 --
62 dtds :: [String]
63 dtds =
64 [ "CBASK_3PPctXML.dtd",
65 "Cbask_All_Tourn_Teams_XML.dtd",
66 "CBASK_AssistsXML.dtd",
67 "Cbask_Awards_XML.dtd",
68 "CBASK_BlocksXML.dtd",
69 "Cbask_Conf_Standings_XML.dtd",
70 "Cbask_DivII_III_Indv_Stats_XML.dtd",
71 "Cbask_DivII_Team_Stats_XML.dtd",
72 "Cbask_DivIII_Team_Stats_XML.dtd",
73 "CBASK_FGPctXML.dtd",
74 "CBASK_FoulsXML.dtd",
75 "CBASK_FTPctXML.dtd",
76 "Cbask_Indv_Scoring_XML.dtd",
77 "CBASK_MinutesXML.dtd",
78 "Cbask_Polls_XML.dtd",
79 "CBASK_ReboundsXML.dtd",
80 "CBASK_ScoringLeadersXML.dtd",
81 "Cbask_Team_ThreePT_Made_XML.dtd",
82 "Cbask_Team_ThreePT_PCT_XML.dtd",
83 "Cbask_Team_Win_Pct_XML.dtd",
84 "Cbask_Top_Twenty_Five_XML.dtd",
85 "CBASK_TopTwentyFiveResult_XML.dtd",
86 "Cbask_Tourn_Awards_XML.dtd",
87 "Cbask_Tourn_Champs_XML.dtd",
88 "Cbask_Tourn_Indiv_XML.dtd",
89 "Cbask_Tourn_Leaders_XML.dtd",
90 "Cbask_Tourn_MVP_XML.dtd",
91 "Cbask_Tourn_Records_XML.dtd",
92 "LeagueScheduleXML.dtd",
93 "minorscoresxml.dtd",
94 "Minor_Baseball_League_Leaders_XML.dtd",
95 "Minor_Baseball_Standings_XML.dtd",
96 "Minor_Baseball_Transactions_XML.dtd",
97 "mlbbattingavgxml.dtd",
98 "mlbdoublesleadersxml.dtd",
99 "MLBGamesPlayedXML.dtd",
100 "MLBGIDPXML.dtd",
101 "MLBHitByPitchXML.dtd",
102 "mlbhitsleadersxml.dtd",
103 "mlbhomerunsxml.dtd",
104 "MLBHRFreqXML.dtd",
105 "MLBIntWalksXML.dtd",
106 "MLBKORateXML.dtd",
107 "mlbonbasepctxml.dtd",
108 "MLBOPSXML.dtd",
109 "MLBPlateAppsXML.dtd",
110 "mlbrbisxml.dtd",
111 "mlbrunsleadersxml.dtd",
112 "MLBSacFliesXML.dtd",
113 "MLBSacrificesXML.dtd",
114 "MLBSBSuccessXML.dtd",
115 "mlbsluggingpctxml.dtd",
116 "mlbstandxml.dtd",
117 "mlbstandxml_preseason.dtd",
118 "mlbstolenbasexml.dtd",
119 "mlbtotalbasesleadersxml.dtd",
120 "mlbtriplesleadersxml.dtd",
121 "MLBWalkRateXML.dtd",
122 "mlbwalksleadersxml.dtd",
123 "MLBXtraBaseHitsXML.dtd",
124 "MLB_ERA_Leaders.dtd",
125 "MLB_Fielding_XML.dtd",
126 "MLB_Pitching_Appearances_Leaders.dtd",
127 "MLB_Pitching_Balks_Leaders.dtd",
128 "MLB_Pitching_CG_Leaders.dtd",
129 "MLB_Pitching_ER_Allowed_Leaders.dtd",
130 "MLB_Pitching_Hits_Allowed_Leaders.dtd",
131 "MLB_Pitching_Hit_Batters_Leaders.dtd",
132 "MLB_Pitching_HR_Allowed_Leaders.dtd",
133 "MLB_Pitching_IP_Leaders.dtd",
134 "MLB_Pitching_Runs_Allowed_Leaders.dtd",
135 "MLB_Pitching_Saves_Leaders.dtd",
136 "MLB_Pitching_Shut_Outs_Leaders.dtd",
137 "MLB_Pitching_Starts_Leaders.dtd",
138 "MLB_Pitching_Strike_Outs_Leaders.dtd",
139 "MLB_Pitching_Walks_Leaders.dtd",
140 "MLB_Pitching_WHIP_Leaders.dtd",
141 "MLB_Pitching_Wild_Pitches_Leaders.dtd",
142 "MLB_Pitching_Win_Percentage_Leaders.dtd",
143 "MLB_Pitching_WL_Leaders.dtd",
144 "NBA_Team_Stats_XML.dtd",
145 "NBA3PPctXML.dtd",
146 "NBAAssistsXML.dtd",
147 "NBABlocksXML.dtd",
148 "nbaconfrecxml.dtd",
149 "nbadaysxml.dtd",
150 "nbadivisionsxml.dtd",
151 "NBAFGPctXML.dtd",
152 "NBAFoulsXML.dtd",
153 "NBAFTPctXML.dtd",
154 "NBAMinutesXML.dtd",
155 "NBAReboundsXML.dtd",
156 "NBAScorersXML.dtd",
157 "nbastandxml.dtd",
158 "NBAStealsXML.dtd",
159 "nbateamleadersxml.dtd",
160 "nbatripledoublexml.dtd",
161 "NBATurnoversXML.dtd",
162 "NCAA_Conference_Schedule_XML.dtd",
163 "nflfirstdownxml.dtd",
164 "NFLFumbleLeaderXML.dtd",
165 "NFLGrassTurfDomeOutsideXML.dtd",
166 "NFLGiveTakeXML.dtd",
167 "NFLInside20XML.dtd",
168 "NFLInterceptionLeadersXML.dtd",
169 "NFLKickoffsXML.dtd",
170 "NFLMondayNightXML.dtd",
171 "NFLPassingLeadersXML.dtd",
172 "NFLPassLeadXML.dtd",
173 "NFLQBStartsXML.dtd",
174 "NFLReceivingLeadersXML.dtd",
175 "NFLSackLeadersXML.dtd",
176 "nflstandxml.dtd",
177 "NFLTeamRankingsXML.dtd",
178 "NFLTopPerformanceXML.dtd",
179 "NFLTotalYardageXML.dtd",
180 "NFL_KickingLeaders_XML.dtd",
181 "NFL_NBA_Draft_XML.dtd",
182 "NFL_Roster_XML.dtd",
183 "NFL_Team_Stats_XML.dtd",
184 "Transactions_XML.dtd",
185 "Weekly_Sched_XML.dtd",
186 "WNBA_Team_Leaders_XML.dtd",
187 "WNBA3PPctXML.dtd",
188 "WNBAAssistsXML.dtd",
189 "WNBABlocksXML.dtd",
190 "WNBAFGPctXML.dtd",
191 "WNBAFoulsXML.dtd",
192 "WNBAFTPctXML.dtd",
193 "WNBAMinutesXML.dtd",
194 "WNBAReboundsXML.dtd",
195 "WNBAScorersXML.dtd",
196 "wnbastandxml.dtd",
197 "WNBAStealsXML.dtd",
198 "WNBATurnoversXML.dtd" ]
199
200
201 -- | This serves as both the database and XML representation of a
202 -- SportInfo \<message\>.
203 --
204 data SportInfo =
205 SportInfo {
206 dtd :: String,
207 xml_file_id :: Int,
208 time_stamp :: UTCTime,
209 xml :: String }
210 deriving (Eq, Show)
211
212
213 -- | Attempt to parse a 'SportInfo' from an 'XmlTree'. If we cannot,
214 -- we fail with an error message.
215 --
216 parse_xml :: String -> XmlTree -> Either ParseError SportInfo
217 parse_xml dtdname xmltree = do
218 xmlfid <- parse_xmlfid xmltree
219 timestamp <- parse_xml_time_stamp xmltree
220 message <- parse_message xmltree
221 return $ SportInfo dtdname xmlfid timestamp (xshow [message])
222
223
224 --
225 -- Database code
226 --
227
228 instance DbImport SportInfo where
229 dbmigrate _ =
230 run_dbmigrate $ migrate (undefined :: SportInfo)
231
232 -- | We import a 'SportInfo' by inserting the whole thing at
233 -- once. Nothing fancy going on here.
234 dbimport msg = do
235 insert_ msg
236 return ImportSucceeded
237
238
239 -- | The database schema for SportInfo is trivial; all we need is for
240 -- the XML_File_ID to be unique.
241 --
242 mkPersist defaultCodegenConfig [groundhog|
243 - entity: SportInfo
244 dbName: sport_info
245 constructors:
246 - name: SportInfo
247 uniques:
248 - name: unique_sport_info
249 type: constraint
250 # Prevent multiple imports of the same message.
251 fields: [xml_file_id]
252 |]
253
254
255 --
256 -- Tasty Tests
257 --
258
259 -- | A list of all tests for this module.
260 --
261 sport_info_tests :: TestTree
262 sport_info_tests =
263 testGroup
264 "SportInfo tests"
265 [ test_accessors,
266 test_parse_xml_succeeds,
267 test_dbimport_succeeds ]
268
269
270 -- | Make sure the accessors work and that we can parse one file. Ok,
271 -- so the real point of this is to make the unused fields (dtd, xml,
272 -- ...) warning go away without having to mangle the groundhog code.
273 --
274 test_accessors :: TestTree
275 test_accessors = testCase "we can access a parsed sport_info" $ do
276 xmltree <- unsafe_read_document "test/xml/sportinfo/wnbastandxml.xml"
277 let Right t = parse_xml "wnbastandxml.dtd" xmltree
278 let a1 = dtd t
279 let ex1 = "wnbastandxml.dtd"
280 let a2 = xml_file_id t
281 let ex2 = 2011
282 let a3 = show $ time_stamp t
283 let ex3 = "2009-09-27 19:50:00 UTC"
284 let a4 = take 9 (xml t)
285 let ex4 = "<message>"
286 let actual = (a1,a2,a3,a4)
287 let expected = (ex1,ex2,ex3,ex4)
288 actual @?= expected
289
290
291 -- | Sample XML documents for SportInfo types.
292 --
293 sport_info_test_files :: [FilePath]
294 sport_info_test_files =
295 map (change_suffix . add_path) dtds
296 where
297 add_path = ("test/xml/sportinfo/" ++ )
298 change_suffix = replace ".dtd" ".xml"
299
300
301 -- | Make sure we can parse every element of 'sport_info_test_files'.
302 --
303 test_parse_xml_succeeds :: TestTree
304 test_parse_xml_succeeds =
305 testGroup "parse_xml" $ map check sport_info_test_files
306 where
307 check t = testCase t $ do
308 x <- unsafe_read_document t
309 let result = parse_xml "dummy" x
310 let actual = case result of -- isRight appears in base-4.7
311 Left _ -> False
312 Right _ -> True
313 let expected = True
314 actual @?= expected
315
316
317 -- | Ensure that each element of 'sport_info_test_files' can be imported
318 -- by counting the total number of database records (after
319 -- importing) and comparing it against the length of
320 -- 'sport_info_test_files'.
321 --
322 test_dbimport_succeeds :: TestTree
323 test_dbimport_succeeds = testCase "dbimport succeeds" $ do
324 xmltrees <- mapM unsafe_read_document sport_info_test_files
325 let msgs = rights $ map (parse_xml "dummy") xmltrees
326 actual <- withSqliteConn ":memory:" $ runDbConn $ do
327 runMigration silentMigrationLogger $
328 migrate (undefined :: SportInfo)
329 mapM_ dbimport msgs
330 countAll (undefined :: SportInfo)
331
332 actual @?= expected
333 where
334 expected = length sport_info_test_files