]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/TSN/XML/SportInfo.hs
4661484ff9a17b44d40b562e4606d335a7656d2a
[dead/htsn-import.git] / src / TSN / XML / SportInfo.hs
1 {-# LANGUAGE FlexibleInstances #-}
2 {-# LANGUAGE GADTs #-}
3 {-# LANGUAGE QuasiQuotes #-}
4 {-# LANGUAGE RecordWildCards #-}
5 {-# LANGUAGE TemplateHaskell #-}
6 {-# LANGUAGE TypeFamilies #-}
7
8 -- | SportInfo represents a collection of DTDs that we don't really
9 -- handle but want to make available. The raw XML gets stored in the
10 -- database along with the XML_File_ID, but we don't parse any of it.
11 --
12 -- This is almost completely redundant with "TSN.XML.GameInfo", but
13 -- the redundancy is necessary: we need separate message types so
14 -- that we can have separate 'DbImport' instances. It would take
15 -- more code/work to abstract (if it's even possible) than to
16 -- duplicate.
17 --
18 module TSN.XML.SportInfo (
19 dtds,
20 parse_xml,
21 sport_info_tests,
22 -- * WARNING: these are private but exported to silence warnings
23 SportInfoConstructor(..) )
24 where
25
26 -- System imports.
27 import Data.Either ( rights )
28 import Data.String.Utils ( replace )
29 import Data.Time.Clock ( UTCTime )
30 import Database.Groundhog (
31 countAll,
32 insert_,
33 migrate,
34 runMigration,
35 silentMigrationLogger )
36 import Database.Groundhog.Generic ( runDbConn )
37 import Database.Groundhog.Sqlite ( withSqliteConn )
38 import Database.Groundhog.TH (
39 defaultCodegenConfig,
40 groundhog,
41 mkPersist )
42 import Test.Tasty ( TestTree, testGroup )
43 import Test.Tasty.HUnit ( (@?=), testCase )
44 import Text.XML.HXT.Core ( XmlTree )
45 import Text.XML.HXT.DOM.ShowXml ( xshow )
46
47 -- Local imports.
48 import TSN.DbImport (
49 DbImport(..),
50 ImportResult(..),
51 run_dbmigrate )
52 import TSN.Parse (
53 ParseError,
54 parse_message,
55 parse_xmlfid,
56 parse_xml_time_stamp )
57 import Xml ( unsafe_read_document )
58
59
60 -- | The DTDs for everything that we consider \"Sport Info.\"
61 --
62 dtds :: [String]
63 dtds =
64 [ "CBASK_3PPctXML.dtd",
65 "Cbask_All_Tourn_Teams_XML.dtd",
66 "CBASK_AssistsXML.dtd",
67 "Cbask_Awards_XML.dtd",
68 "CBASK_BlocksXML.dtd",
69 "Cbask_Conf_Standings_XML.dtd",
70 "Cbask_DivII_III_Indv_Stats_XML.dtd",
71 "Cbask_DivII_Team_Stats_XML.dtd",
72 "Cbask_DivIII_Team_Stats_XML.dtd",
73 "CBASK_FGPctXML.dtd",
74 "CBASK_FoulsXML.dtd",
75 "CBASK_FTPctXML.dtd",
76 "Cbask_Indv_Scoring_XML.dtd",
77 "CBASK_MinutesXML.dtd",
78 "Cbask_Polls_XML.dtd",
79 "CBASK_ReboundsXML.dtd",
80 "CBASK_ScoringLeadersXML.dtd",
81 "Cbask_Team_ThreePT_Made_XML.dtd",
82 "Cbask_Team_ThreePT_PCT_XML.dtd",
83 "Cbask_Team_Win_Pct_XML.dtd",
84 "Cbask_Top_Twenty_Five_XML.dtd",
85 "CBASK_TopTwentyFiveResult_XML.dtd",
86 "Cbask_Tourn_Awards_XML.dtd",
87 "Cbask_Tourn_Champs_XML.dtd",
88 "Cbask_Tourn_Indiv_XML.dtd",
89 "Cbask_Tourn_Leaders_XML.dtd",
90 "Cbask_Tourn_MVP_XML.dtd",
91 "Cbask_Tourn_Records_XML.dtd",
92 "LeagueScheduleXML.dtd",
93 "minorscoresxml.dtd",
94 "Minor_Baseball_League_Leaders_XML.dtd",
95 "Minor_Baseball_Standings_XML.dtd",
96 "Minor_Baseball_Transactions_XML.dtd",
97 "mlbbattingavgxml.dtd",
98 "mlbdoublesleadersxml.dtd",
99 "MLBGamesPlayedXML.dtd",
100 "MLBGIDPXML.dtd",
101 "MLBHitByPitchXML.dtd",
102 "mlbhitsleadersxml.dtd",
103 "mlbhomerunsxml.dtd",
104 "MLBHRFreqXML.dtd",
105 "MLBIntWalksXML.dtd",
106 "MLBKORateXML.dtd",
107 "mlbonbasepctxml.dtd",
108 "MLBOPSXML.dtd",
109 "MLBPlateAppsXML.dtd",
110 "mlbrbisxml.dtd",
111 "mlbrunsleadersxml.dtd",
112 "MLBSacFliesXML.dtd",
113 "MLBSacrificesXML.dtd",
114 "MLBSBSuccessXML.dtd",
115 "mlbsluggingpctxml.dtd",
116 "mlbstandxml.dtd",
117 "mlbstandxml_preseason.dtd",
118 "mlbstolenbasexml.dtd",
119 "mlbtotalbasesleadersxml.dtd",
120 "mlbtriplesleadersxml.dtd",
121 "MLBWalkRateXML.dtd",
122 "mlbwalksleadersxml.dtd",
123 "MLBXtraBaseHitsXML.dtd",
124 "MLB_ERA_Leaders.dtd",
125 "MLB_Fielding_XML.dtd",
126 "MLB_Pitching_Appearances_Leaders.dtd",
127 "MLB_Pitching_Balks_Leaders.dtd",
128 "MLB_Pitching_CG_Leaders.dtd",
129 "MLB_Pitching_ER_Allowed_Leaders.dtd",
130 "MLB_Pitching_Hits_Allowed_Leaders.dtd",
131 "MLB_Pitching_Hit_Batters_Leaders.dtd",
132 "MLB_Pitching_HR_Allowed_Leaders.dtd",
133 "MLB_Pitching_IP_Leaders.dtd",
134 "MLB_Pitching_Runs_Allowed_Leaders.dtd",
135 "MLB_Pitching_Saves_Leaders.dtd",
136 "MLB_Pitching_Shut_Outs_Leaders.dtd",
137 "MLB_Pitching_Starts_Leaders.dtd",
138 "MLB_Pitching_Strike_Outs_Leaders.dtd",
139 "MLB_Pitching_Walks_Leaders.dtd",
140 "MLB_Pitching_WHIP_Leaders.dtd",
141 "MLB_Pitching_Wild_Pitches_Leaders.dtd",
142 "MLB_Pitching_Win_Percentage_Leaders.dtd",
143 "MLB_Pitching_WL_Leaders.dtd",
144 "NBA_Team_Stats_XML.dtd",
145 "NBA3PPctXML.dtd",
146 "NBAAssistsXML.dtd",
147 "NBABlocksXML.dtd",
148 "nbaconfrecxml.dtd",
149 "nbadaysxml.dtd",
150 "nbadivisionsxml.dtd",
151 "NBAFGPctXML.dtd",
152 "NBAFoulsXML.dtd",
153 "NBAFTPctXML.dtd",
154 "NBAMinutesXML.dtd",
155 "NBAReboundsXML.dtd",
156 "NBAScorersXML.dtd",
157 "nbastandxml.dtd",
158 "NBAStealsXML.dtd",
159 "nbateamleadersxml.dtd",
160 "nbatripledoublexml.dtd",
161 "NBATurnoversXML.dtd",
162 "NCAA_Conference_Schedule_XML.dtd",
163 "nflfirstdownxml.dtd",
164 "NFLFumbleLeaderXML.dtd",
165 "NFLGiveTakeXML.dtd",
166 "NFLInside20XML.dtd",
167 "NFLKickoffsXML.dtd",
168 "NFLMondayNightXML.dtd",
169 "NFLPassLeadXML.dtd",
170 "NFLQBStartsXML.dtd",
171 "NFLSackLeadersXML.dtd",
172 "nflstandxml.dtd",
173 "NFLTeamRankingsXML.dtd",
174 "NFLTopPerformanceXML.dtd",
175 "NFLTotalYardageXML.dtd",
176 "NFL_KickingLeaders_XML.dtd",
177 "NFL_NBA_Draft_XML.dtd",
178 "NFL_Roster_XML.dtd",
179 "NFL_Team_Stats_XML.dtd",
180 "Transactions_XML.dtd",
181 "Weekly_Sched_XML.dtd",
182 "WNBA_Team_Leaders_XML.dtd",
183 "WNBA3PPctXML.dtd",
184 "WNBAAssistsXML.dtd",
185 "WNBABlocksXML.dtd",
186 "WNBAFGPctXML.dtd",
187 "WNBAFoulsXML.dtd",
188 "WNBAFTPctXML.dtd",
189 "WNBAMinutesXML.dtd",
190 "WNBAReboundsXML.dtd",
191 "WNBAScorersXML.dtd",
192 "wnbastandxml.dtd",
193 "WNBAStealsXML.dtd",
194 "WNBATurnoversXML.dtd" ]
195
196
197 -- | This serves as both the database and XML representation of a
198 -- SportInfo \<message\>.
199 --
200 data SportInfo =
201 SportInfo {
202 dtd :: String,
203 xml_file_id :: Int,
204 time_stamp :: UTCTime,
205 xml :: String }
206 deriving (Eq, Show)
207
208
209 -- | Attempt to parse a 'SportInfo' from an 'XmlTree'. If we cannot,
210 -- we fail with an error message.
211 --
212 parse_xml :: String -> XmlTree -> Either ParseError SportInfo
213 parse_xml dtdname xmltree = do
214 xmlfid <- parse_xmlfid xmltree
215 timestamp <- parse_xml_time_stamp xmltree
216 message <- parse_message xmltree
217 return $ SportInfo dtdname xmlfid timestamp (xshow [message])
218
219
220 --
221 -- Database code
222 --
223
224 instance DbImport SportInfo where
225 dbmigrate _ =
226 run_dbmigrate $ migrate (undefined :: SportInfo)
227
228 -- | We import a 'SportInfo' by inserting the whole thing at
229 -- once. Nothing fancy going on here.
230 dbimport msg = do
231 insert_ msg
232 return ImportSucceeded
233
234
235 -- | The database schema for SportInfo is trivial; all we need is for
236 -- the XML_File_ID to be unique.
237 --
238 mkPersist defaultCodegenConfig [groundhog|
239 - entity: SportInfo
240 dbName: sport_info
241 constructors:
242 - name: SportInfo
243 uniques:
244 - name: unique_sport_info
245 type: constraint
246 # Prevent multiple imports of the same message.
247 fields: [xml_file_id]
248 |]
249
250
251 --
252 -- Tasty Tests
253 --
254
255 -- | A list of all tests for this module.
256 --
257 sport_info_tests :: TestTree
258 sport_info_tests =
259 testGroup
260 "SportInfo tests"
261 [ test_accessors,
262 test_parse_xml_succeeds,
263 test_dbimport_succeeds ]
264
265
266 -- | Make sure the accessors work and that we can parse one file. Ok,
267 -- so the real point of this is to make the unused fields (dtd, xml,
268 -- ...) warning go away without having to mangle the groundhog code.
269 --
270 test_accessors :: TestTree
271 test_accessors = testCase "we can access a parsed sport_info" $ do
272 xmltree <- unsafe_read_document "test/xml/sportinfo/wnbastandxml.xml"
273 let Right t = parse_xml "wnbastandxml.dtd" xmltree
274 let a1 = dtd t
275 let ex1 = "wnbastandxml.dtd"
276 let a2 = xml_file_id t
277 let ex2 = 2011
278 let a3 = show $ time_stamp t
279 let ex3 = "2009-09-27 19:50:00 UTC"
280 let a4 = take 9 (xml t)
281 let ex4 = "<message>"
282 let actual = (a1,a2,a3,a4)
283 let expected = (ex1,ex2,ex3,ex4)
284 actual @?= expected
285
286
287 -- | Sample XML documents for SportInfo types.
288 --
289 sport_info_test_files :: [FilePath]
290 sport_info_test_files =
291 map (change_suffix . add_path) dtds
292 where
293 add_path = ("test/xml/sportinfo/" ++ )
294 change_suffix = replace ".dtd" ".xml"
295
296
297 -- | Make sure we can parse every element of 'sport_info_test_files'.
298 --
299 test_parse_xml_succeeds :: TestTree
300 test_parse_xml_succeeds =
301 testGroup "parse_xml" $ map check sport_info_test_files
302 where
303 check t = testCase t $ do
304 x <- unsafe_read_document t
305 let result = parse_xml "dummy" x
306 let actual = case result of -- isRight appears in base-4.7
307 Left _ -> False
308 Right _ -> True
309 let expected = True
310 actual @?= expected
311
312
313 -- | Ensure that each element of 'sport_info_test_files' can be imported
314 -- by counting the total number of database records (after
315 -- importing) and comparing it against the length of
316 -- 'sport_info_test_files'.
317 --
318 test_dbimport_succeeds :: TestTree
319 test_dbimport_succeeds = testCase "dbimport succeeds" $ do
320 xmltrees <- mapM unsafe_read_document sport_info_test_files
321 let msgs = rights $ map (parse_xml "dummy") xmltrees
322 actual <- withSqliteConn ":memory:" $ runDbConn $ do
323 runMigration silentMigrationLogger $
324 migrate (undefined :: SportInfo)
325 mapM_ dbimport msgs
326 countAll (undefined :: SportInfo)
327
328 actual @?= expected
329 where
330 expected = length sport_info_test_files