]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/TSN/XML/SportInfo.hs
a9e1f1a248863a84b13417c5d6c9fc604e05d995
[dead/htsn-import.git] / src / TSN / XML / SportInfo.hs
1 {-# LANGUAGE FlexibleInstances #-}
2 {-# LANGUAGE GADTs #-}
3 {-# LANGUAGE QuasiQuotes #-}
4 {-# LANGUAGE RecordWildCards #-}
5 {-# LANGUAGE TemplateHaskell #-}
6 {-# LANGUAGE TypeFamilies #-}
7
8 -- | SportInfo represents a collection of DTDs that we don't really
9 -- handle but want to make available. The raw XML gets stored in the
10 -- database along with the XML_File_ID, but we don't parse any of it.
11 --
12 -- This is almost completely redundant with "TSN.XML.GameInfo", but
13 -- the redundancy is necessary: we need separate message types so
14 -- that we can have separate 'DbImport' instances. It would take
15 -- more code/work to abstract (if it's even possible) than to
16 -- duplicate.
17 --
18 module TSN.XML.SportInfo (
19 dtds,
20 parse_xml,
21 sport_info_tests,
22 -- * WARNING: these are private but exported to silence warnings
23 SportInfoConstructor(..) )
24 where
25
26 -- System imports.
27 import Data.Either ( rights )
28 import Data.String.Utils ( replace )
29 import Data.Time.Clock ( UTCTime )
30 import Database.Groundhog (
31 countAll,
32 insert_,
33 migrate,
34 runMigration,
35 silentMigrationLogger )
36 import Database.Groundhog.Generic ( runDbConn )
37 import Database.Groundhog.Sqlite ( withSqliteConn )
38 import Database.Groundhog.TH (
39 defaultCodegenConfig,
40 groundhog,
41 mkPersist )
42 import Test.Tasty ( TestTree, testGroup )
43 import Test.Tasty.HUnit ( (@?=), testCase )
44 import Text.XML.HXT.Core ( XmlTree )
45 import Text.XML.HXT.DOM.ShowXml ( xshow )
46
47 -- Local imports.
48 import TSN.DbImport (
49 DbImport(..),
50 ImportResult(..),
51 run_dbmigrate )
52 import TSN.Parse (
53 parse_message,
54 parse_xmlfid,
55 parse_xml_time_stamp )
56 import Xml ( unsafe_read_document )
57
58
59 -- | The DTDs for everything that we consider \"Sport Info.\"
60 --
61 dtds :: [String]
62 dtds =
63 [ "CBASK_3PPctXML.dtd",
64 "Cbask_All_Tourn_Teams_XML.dtd",
65 "CBASK_AssistsXML.dtd",
66 "Cbask_Awards_XML.dtd",
67 "CBASK_BlocksXML.dtd",
68 "Cbask_Conf_Standings_XML.dtd",
69 "Cbask_DivII_III_Indv_Stats_XML.dtd",
70 "Cbask_DivII_Team_Stats_XML.dtd",
71 "Cbask_DivIII_Team_Stats_XML.dtd",
72 "CBASK_FGPctXML.dtd",
73 "CBASK_FoulsXML.dtd",
74 "CBASK_FTPctXML.dtd",
75 "Cbask_Indv_Scoring_XML.dtd",
76 "CBASK_MinutesXML.dtd",
77 "Cbask_Polls_XML.dtd",
78 "CBASK_ReboundsXML.dtd",
79 "CBASK_ScoringLeadersXML.dtd",
80 "Cbask_Team_ThreePT_Made_XML.dtd",
81 "Cbask_Team_ThreePT_PCT_XML.dtd",
82 "Cbask_Team_Win_Pct_XML.dtd",
83 "Cbask_Top_Twenty_Five_XML.dtd",
84 "CBASK_TopTwentyFiveResult_XML.dtd",
85 "Cbask_Tourn_Awards_XML.dtd",
86 "Cbask_Tourn_Champs_XML.dtd",
87 "Cbask_Tourn_Indiv_XML.dtd",
88 "Cbask_Tourn_Leaders_XML.dtd",
89 "Cbask_Tourn_MVP_XML.dtd",
90 "Cbask_Tourn_Records_XML.dtd",
91 "LeagueScheduleXML.dtd",
92 "minorscoresxml.dtd",
93 "Minor_Baseball_League_Leaders_XML.dtd",
94 "Minor_Baseball_Standings_XML.dtd",
95 "Minor_Baseball_Transactions_XML.dtd",
96 "mlbbattingavgxml.dtd",
97 "mlbdoublesleadersxml.dtd",
98 "MLBGamesPlayedXML.dtd",
99 "MLBGIDPXML.dtd",
100 "MLBHitByPitchXML.dtd",
101 "mlbhitsleadersxml.dtd",
102 "mlbhomerunsxml.dtd",
103 "MLBHRFreqXML.dtd",
104 "MLBIntWalksXML.dtd",
105 "MLBKORateXML.dtd",
106 "mlbonbasepctxml.dtd",
107 "MLBOPSXML.dtd",
108 "MLBPlateAppsXML.dtd",
109 "mlbrbisxml.dtd",
110 "mlbrunsleadersxml.dtd",
111 "MLBSacFliesXML.dtd",
112 "MLBSacrificesXML.dtd",
113 "MLBSBSuccessXML.dtd",
114 "mlbsluggingpctxml.dtd",
115 "mlbstandxml.dtd",
116 "mlbstandxml_preseason.dtd",
117 "mlbstolenbasexml.dtd",
118 "mlbtotalbasesleadersxml.dtd",
119 "mlbtriplesleadersxml.dtd",
120 "MLBWalkRateXML.dtd",
121 "mlbwalksleadersxml.dtd",
122 "MLBXtraBaseHitsXML.dtd",
123 "MLB_ERA_Leaders.dtd",
124 "MLB_Fielding_XML.dtd",
125 "MLB_Pitching_Appearances_Leaders.dtd",
126 "MLB_Pitching_Balks_Leaders.dtd",
127 "MLB_Pitching_CG_Leaders.dtd",
128 "MLB_Pitching_ER_Allowed_Leaders.dtd",
129 "MLB_Pitching_Hits_Allowed_Leaders.dtd",
130 "MLB_Pitching_Hit_Batters_Leaders.dtd",
131 "MLB_Pitching_HR_Allowed_Leaders.dtd",
132 "MLB_Pitching_IP_Leaders.dtd",
133 "MLB_Pitching_Runs_Allowed_Leaders.dtd",
134 "MLB_Pitching_Saves_Leaders.dtd",
135 "MLB_Pitching_Shut_Outs_Leaders.dtd",
136 "MLB_Pitching_Starts_Leaders.dtd",
137 "MLB_Pitching_Strike_Outs_Leaders.dtd",
138 "MLB_Pitching_Walks_Leaders.dtd",
139 "MLB_Pitching_WHIP_Leaders.dtd",
140 "MLB_Pitching_Wild_Pitches_Leaders.dtd",
141 "MLB_Pitching_Win_Percentage_Leaders.dtd",
142 "MLB_Pitching_WL_Leaders.dtd",
143 "NBA_Team_Stats_XML.dtd",
144 "NBA3PPctXML.dtd",
145 "NBAAssistsXML.dtd",
146 "NBABlocksXML.dtd",
147 "nbaconfrecxml.dtd",
148 "nbadaysxml.dtd",
149 "nbadivisionsxml.dtd",
150 "NBAFGPctXML.dtd",
151 "NBAFoulsXML.dtd",
152 "NBAFTPctXML.dtd",
153 "NBAMinutesXML.dtd",
154 "NBAReboundsXML.dtd",
155 "NBAScorersXML.dtd",
156 "nbastandxml.dtd",
157 "NBAStealsXML.dtd",
158 "nbateamleadersxml.dtd",
159 "nbatripledoublexml.dtd",
160 "NBATurnoversXML.dtd",
161 "NCAA_Conference_Schedule_XML.dtd",
162 "nflfirstdownxml.dtd",
163 "NFLFumbleLeaderXML.dtd",
164 "NFLGiveTakeXML.dtd",
165 "NFLInside20XML.dtd",
166 "NFLKickoffsXML.dtd",
167 "NFLMondayNightXML.dtd",
168 "NFLPassLeadXML.dtd",
169 "NFLQBStartsXML.dtd",
170 "NFLSackLeadersXML.dtd",
171 "nflstandxml.dtd",
172 "NFLTeamRankingsXML.dtd",
173 "NFLTopPerformanceXML.dtd",
174 "NFLTotalYardageXML.dtd",
175 "NFL_KickingLeaders_XML.dtd",
176 "NFL_NBA_Draft_XML.dtd",
177 "NFL_Roster_XML.dtd",
178 "NFL_Team_Stats_XML.dtd",
179 "Transactions_XML.dtd",
180 "Weekly_Sched_XML.dtd",
181 "WNBA_Team_Leaders_XML.dtd",
182 "WNBA3PPctXML.dtd",
183 "WNBAAssistsXML.dtd",
184 "WNBABlocksXML.dtd",
185 "WNBAFGPctXML.dtd",
186 "WNBAFoulsXML.dtd",
187 "WNBAFTPctXML.dtd",
188 "WNBAMinutesXML.dtd",
189 "WNBAReboundsXML.dtd",
190 "WNBAScorersXML.dtd",
191 "wnbastandxml.dtd",
192 "WNBAStealsXML.dtd",
193 "WNBATurnoversXML.dtd" ]
194
195
196 -- | This serves as both the database and XML representation of a
197 -- SportInfo \<message\>.
198 --
199 data SportInfo =
200 SportInfo {
201 dtd :: String,
202 xml_file_id :: Int,
203 time_stamp :: UTCTime,
204 xml :: String }
205 deriving (Eq, Show)
206
207
208 -- | Attempt to parse a 'SportInfo' from an 'XmlTree'. If we cannot,
209 -- we fail with an error message.
210 --
211 parse_xml :: String -> XmlTree -> Either String SportInfo
212 parse_xml dtdname xmltree = do
213 xmlfid <- parse_xmlfid xmltree
214 timestamp <- parse_xml_time_stamp xmltree
215 message <- parse_message xmltree
216 return $ SportInfo dtdname (fromInteger xmlfid) timestamp (xshow [message])
217
218
219 --
220 -- Database code
221 --
222
223 instance DbImport SportInfo where
224 dbmigrate _ =
225 run_dbmigrate $ migrate (undefined :: SportInfo)
226
227 -- | We import a 'SportInfo' by inserting the whole thing at
228 -- once. Nothing fancy going on here.
229 dbimport msg = do
230 insert_ msg
231 return ImportSucceeded
232
233
234 -- | The database schema for SportInfo is trivial; all we need is for
235 -- the XML_File_ID to be unique.
236 --
237 mkPersist defaultCodegenConfig [groundhog|
238 - entity: SportInfo
239 constructors:
240 - name: SportInfo
241 uniques:
242 - name: unique_sport_info
243 type: constraint
244 # Prevent multiple imports of the same message.
245 fields: [xml_file_id]
246 |]
247
248
249 --
250 -- Tasty Tests
251 --
252
253 -- | A list of all tests for this module.
254 --
255 sport_info_tests :: TestTree
256 sport_info_tests =
257 testGroup
258 "SportInfo tests"
259 [ test_accessors,
260 test_parse_xml_succeeds,
261 test_dbimport_succeeds ]
262
263
264 -- | Make sure the accessors work and that we can parse one file. Ok,
265 -- so the real point of this is to make the unused fields (dtd, xml,
266 -- ...) warning go away without having to mangle the groundhog code.
267 --
268 test_accessors :: TestTree
269 test_accessors = testCase "we can access a parsed sport_info" $ do
270 xmltree <- unsafe_read_document "test/xml/sportinfo/wnbastandxml.xml"
271 let Right t = parse_xml "wnbastandxml.dtd" xmltree
272 let a1 = dtd t
273 let ex1 = "wnbastandxml.dtd"
274 let a2 = xml_file_id t
275 let ex2 = 2011
276 let a3 = show $ time_stamp t
277 let ex3 = "2009-09-28 00:50:00 UTC"
278 let a4 = take 9 (xml t)
279 let ex4 = "<message>"
280 let actual = (a1,a2,a3,a4)
281 let expected = (ex1,ex2,ex3,ex4)
282 actual @?= expected
283
284
285 -- | Sample XML documents for SportInfo types.
286 --
287 sport_info_test_files :: [FilePath]
288 sport_info_test_files =
289 map (change_suffix . add_path) dtds
290 where
291 add_path = ("test/xml/sportinfo/" ++ )
292 change_suffix = replace ".dtd" ".xml"
293
294
295 -- | Make sure we can parse every element of 'sport_info_test_files'.
296 --
297 test_parse_xml_succeeds :: TestTree
298 test_parse_xml_succeeds =
299 testGroup "parse_xml" $ map check sport_info_test_files
300 where
301 check t = testCase t $ do
302 x <- unsafe_read_document t
303 let result = parse_xml "dummy" x
304 let actual = case result of -- isRight appears in base-4.7
305 Left _ -> False
306 Right _ -> True
307 let expected = True
308 actual @?= expected
309
310
311 -- | Ensure that each element of 'sport_info_test_files' can be imported
312 -- by counting the total number of database records (after
313 -- importing) and comparing it against the length of
314 -- 'sport_info_test_files'.
315 --
316 test_dbimport_succeeds :: TestTree
317 test_dbimport_succeeds = testCase "dbimport succeeds" $ do
318 xmltrees <- mapM unsafe_read_document sport_info_test_files
319 let msgs = rights $ map (parse_xml "dummy") xmltrees
320 actual <- withSqliteConn ":memory:" $ runDbConn $ do
321 runMigration silentMigrationLogger $
322 migrate (undefined :: SportInfo)
323 mapM_ dbimport msgs
324 countAll (undefined :: SportInfo)
325
326 actual @?= expected
327 where
328 expected = length sport_info_test_files