]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/TSN/XML/Odds.hs
Add a test for unpickling large Odds_XML files.
[dead/htsn-import.git] / src / TSN / XML / Odds.hs
1 {-# LANGUAGE DeriveDataTypeable #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE GADTs #-}
4 {-# LANGUAGE QuasiQuotes #-}
5 {-# LANGUAGE RecordWildCards #-}
6 {-# LANGUAGE StandaloneDeriving #-}
7 {-# LANGUAGE TemplateHaskell #-}
8 {-# LANGUAGE TypeFamilies #-}
9
10 -- | Parse TSN XML for the DTD "Odds_XML.dtd". Each document contains
11 -- a root element \<message\> that contains a bunch of other
12 -- unorganized crap.
13 --
14 module TSN.XML.Odds (
15 odds_tests,
16 pickle_message )
17 where
18
19 import Control.Monad ( forM_, join )
20 import Data.Tuple.Curry ( uncurryN )
21 import Database.Groundhog (
22 (=.),
23 (==.),
24 insert_,
25 insertByAll,
26 migrate,
27 update )
28 import Database.Groundhog.Core ( DefaultKey )
29 import Database.Groundhog.TH (
30 groundhog,
31 mkPersist )
32 import Test.Tasty ( TestTree, testGroup )
33 import Test.Tasty.HUnit ( (@?=), testCase )
34 import Text.Read ( readMaybe )
35 import Text.XML.HXT.Core (
36 PU,
37 xp5Tuple,
38 xp6Tuple,
39 xp8Tuple,
40 xpAttr,
41 xpElem,
42 xpInt,
43 xpList,
44 xpOption,
45 xpPair,
46 xpPrim,
47 xpText,
48 xpTriple,
49 xpWrap )
50
51 import TSN.Codegen (
52 tsn_codegen_config )
53 import TSN.DbImport ( DbImport(..), ImportResult(..), run_dbmigrate )
54 import TSN.Picklers ( xp_team_id )
55 import TSN.XmlImport ( XmlImport(..) )
56 import Xml ( FromXml(..), pickle_unpickle, unpickleable )
57
58
59
60 -- | The home/away lines are 'Double's, but the over/under lines are
61 -- textual. If we want to use one data type for both, we have to go
62 -- with a String and then attempt to 'read' a 'Double' later when we
63 -- go to insert the thing.
64 --
65 data OddsGameCasinoXml =
66 OddsGameCasinoXml {
67 xml_casino_client_id :: Int,
68 xml_casino_name :: String,
69 xml_casino_line :: Maybe String }
70 deriving (Eq, Show)
71
72
73 -- | Try to get a 'Double' out of the 'xml_casino_line' which is a
74 -- priori textual (because it might be an over/under line).
75 --
76 home_away_line :: OddsGameCasinoXml -> Maybe Double
77 home_away_line = join . (fmap readMaybe) . xml_casino_line
78
79 -- | The casinos should have their own table, but the lines don't
80 -- belong in that table. (There should be another table joining the
81 -- casinos and the thing the lines are for together.)
82 --
83 -- We drop the 'Game' prefix because the Casinos really aren't
84 -- children of the games; the XML just makes it seem that way.
85 --
86 data OddsCasino =
87 OddsCasino {
88 casino_client_id :: Int,
89 casino_name :: String }
90 deriving (Eq, Show)
91
92 instance FromXml OddsGameCasinoXml where
93 type Db OddsGameCasinoXml = OddsCasino
94
95 -- We don't need the key argument (from_xml_fk) since the XML type
96 -- contains more information in this case.
97 from_xml OddsGameCasinoXml{..} = OddsCasino
98 xml_casino_client_id
99 xml_casino_name
100
101 instance XmlImport OddsGameCasinoXml
102
103
104 data OddsGameHomeTeamXml =
105 OddsGameHomeTeamXml {
106 xml_home_team_id :: String, -- ^ These are three-character IDs.
107 xml_home_rotation_number :: Int,
108 xml_home_abbr :: String,
109 xml_home_team_name :: String,
110 xml_home_casinos :: [OddsGameCasinoXml] }
111 deriving (Eq, Show)
112
113 instance FromXml OddsGameHomeTeamXml where
114 type Db OddsGameHomeTeamXml = OddsGameTeam
115 from_xml OddsGameHomeTeamXml{..} = OddsGameTeam
116 xml_home_team_id
117 xml_home_abbr
118 xml_home_team_name
119
120 instance XmlImport OddsGameHomeTeamXml where
121
122
123 data OddsGameTeam =
124 OddsGameTeam {
125 db_team_id :: String, -- ^ The home/away team IDs are 3 characters
126 db_abbr :: String,
127 db_team_name :: String }
128 deriving (Eq, Show)
129
130
131 -- | Database mapping between games and their home/away teams.
132 data OddsGame_OddsGameTeam =
133 OddsGame_OddsGameTeam {
134 ogogt_odds_games_id :: DefaultKey OddsGame,
135 ogogt_away_team_id :: DefaultKey OddsGameTeam,
136 ogogt_home_team_id :: DefaultKey OddsGameTeam }
137
138 data OddsGameAwayTeamXml =
139 OddsGameAwayTeamXml {
140 xml_away_team_id :: String, -- ^ These are 3 character IDs.
141 xml_away_rotation_number :: Int,
142 xml_away_abbr :: String,
143 xml_away_team_name :: String,
144 xml_away_casinos :: [OddsGameCasinoXml] }
145 deriving (Eq, Show)
146
147 instance FromXml OddsGameAwayTeamXml where
148 type Db OddsGameAwayTeamXml = OddsGameTeam
149 from_xml OddsGameAwayTeamXml{..} = OddsGameTeam
150 xml_away_team_id
151 xml_away_abbr
152 xml_away_team_name
153
154 instance XmlImport OddsGameAwayTeamXml where
155
156 -- | Can't use a newtype with Groundhog.
157 newtype OddsGameOverUnderXml =
158 OddsGameOverUnderXml { xml_casinos :: [OddsGameCasinoXml] }
159 deriving (Eq, Show)
160
161 -- | This database representation of the casino lines can't be
162 -- constructed from the one in the XML. The casinos within
163 -- Game>HomeTeam, Game>AwayTeam, and Game>Over_Under are all more or
164 -- less the same. We don't need a bajillion different tables to
165 -- store that, just one tying the casino/game pair to the three
166 -- lines.
167 --
168 -- The one small difference between the over/under casinos and the
169 -- home/away ones is that the home/away lines are all 'Double's, but
170 -- the over/under lines appear to be textual.
171 --
172 data OddsGameLine =
173 OddsGameLine {
174 ogl_odds_games_id :: DefaultKey OddsGame,
175 ogl_odds_casinos_id :: DefaultKey OddsCasino,
176 ogl_over_under :: Maybe String,
177 ogl_away_line :: Maybe Double,
178 ogl_home_line :: Maybe Double }
179
180 data OddsGame =
181 OddsGame {
182 db_game_id :: Int,
183 db_game_date :: String, -- TODO
184 db_game_time :: String, -- TODO
185 db_game_away_team_rotation_number :: Int,
186 db_game_home_team_rotation_number :: Int }
187 deriving instance Eq OddsGame
188 deriving instance Show OddsGame
189
190 data OddsGameXml =
191 OddsGameXml {
192 xml_game_id :: Int,
193 xml_game_date :: String, -- TODO
194 xml_game_time :: String, -- TODO
195 xml_game_away_team :: OddsGameAwayTeamXml,
196 xml_game_home_team :: OddsGameHomeTeamXml,
197 xml_game_over_under :: OddsGameOverUnderXml }
198 deriving (Eq, Show)
199
200 -- | Pseudo-field that lets us get the 'OddsCasino's out of
201 -- xml_game_over_under.
202 xml_game_over_under_casinos :: OddsGameXml -> [OddsGameCasinoXml]
203 xml_game_over_under_casinos = xml_casinos . xml_game_over_under
204
205
206 instance FromXml OddsGameXml where
207 type Db OddsGameXml = OddsGame
208 from_xml OddsGameXml{..} = OddsGame
209 xml_game_id
210 xml_game_date
211 xml_game_time
212 (xml_away_rotation_number xml_game_away_team)
213 (xml_home_rotation_number xml_game_home_team)
214
215 instance XmlImport OddsGameXml
216
217
218
219 data Odds =
220 Odds {
221 db_sport :: String,
222 db_title :: String,
223 db_line_time :: String }
224
225
226 -- | Map 'Odds' to their children 'OddsGame's.
227 data Odds_OddsGame =
228 Odds_OddsGame {
229 oog_odds_id :: DefaultKey Odds,
230 oog_odds_games_id :: DefaultKey OddsGame }
231
232 -- | This is our best guess at what occurs in the Odds_XML
233 -- documents. It looks like each consecutive set of games can
234 -- optionally have some notes appear before it. Each "note" comes as
235 -- its own <Notes>...</Notes> element.
236 --
237 -- The notes are ignored completely in the database; we only bother
238 -- with them to ensure that we're (un)pickling correctly.
239 --
240 -- We can't group the notes with a "set" of 'OddsGame's, because that
241 -- leads to ambiguity in parsing. Since we're going to ignore the
242 -- notes anyway, we just stick them with an arbitrary game. C'est la
243 -- vie.
244 --
245 data OddsGameWithNotes =
246 OddsGameWithNotes {
247 notes :: [String],
248 game :: OddsGameXml }
249 deriving (Eq, Show)
250
251 -- | The XML representation of Odds.
252 data Message =
253 Message {
254 xml_xml_file_id :: Int,
255 xml_heading :: String,
256 xml_category :: String,
257 xml_sport :: String,
258 xml_title :: String,
259 xml_line_time :: String,
260 xml_games_with_notes :: [OddsGameWithNotes],
261 xml_time_stamp :: String }
262 deriving (Eq, Show)
263
264 -- | Pseudo-field that lets us get the 'OddsGame's out of
265 -- 'xml_games_with_notes'.
266 xml_games :: Message -> [OddsGameXml]
267 xml_games m = map game (xml_games_with_notes m)
268
269
270 instance FromXml Message where
271 type Db Message = Odds
272
273 -- We don't need the key argument (from_xml_fk) since the XML type
274 -- contains more information in this case.
275 from_xml (Message _ _ _ d e f _ _) =
276 Odds d e f
277
278 instance XmlImport Message
279
280
281
282 -- * Groundhog database schema.
283 -- | This must come before the dbimport code.
284 --
285 mkPersist tsn_codegen_config [groundhog|
286 - entity: Odds
287
288 - entity: OddsCasino
289 dbName: odds_casinos
290 constructors:
291 - name: OddsCasino
292 uniques:
293 - name: unique_odds_casino
294 type: constraint
295 fields: [casino_client_id]
296
297 - entity: OddsGameTeam
298 dbName: odds_games_teams
299 constructors:
300 - name: OddsGameTeam
301 fields:
302 - name: db_team_id
303 type: varchar(3)
304 uniques:
305 - name: unique_odds_games_team
306 type: constraint
307 fields: [db_team_id]
308
309
310 - entity: OddsGame
311 dbName: odds_games
312 constructors:
313 - name: OddsGame
314 uniques:
315 - name: unique_odds_game
316 type: constraint
317 fields: [db_game_id]
318
319 - entity: OddsGameLine
320 dbName: odds_games_lines
321
322 - entity: Odds_OddsGame
323 dbName: odds__odds_games
324
325 - entity: OddsGame_OddsGameTeam
326 dbName: odds_games__odds_games_teams
327 |]
328
329 instance DbImport Message where
330 dbmigrate _=
331 run_dbmigrate $ do
332 migrate (undefined :: Odds)
333 migrate (undefined :: OddsCasino)
334 migrate (undefined :: OddsGameTeam)
335 migrate (undefined :: OddsGame)
336 migrate (undefined :: Odds_OddsGame)
337 migrate (undefined :: OddsGame_OddsGameTeam)
338 migrate (undefined :: OddsGameLine)
339
340 dbimport m = do
341 -- Insert the root "odds" element and acquire its primary key (id).
342 odds_id <- insert_xml m
343
344 -- Next, we insert the home and away teams. We do this before
345 -- inserting the game itself because the game has two foreign keys
346 -- pointing to odds_games_teams.
347 forM_ (xml_games m) $ \g -> do
348 game_id <- insert_xml_or_select g
349 -- Insert a record into odds__odds_game mapping this game
350 -- to its parent in the odds table.
351 insert_ (Odds_OddsGame odds_id game_id)
352
353 -- Next to insert the home and away teams.
354 away_team_id <- insert_xml_or_select (xml_game_away_team g)
355 home_team_id <- insert_xml_or_select (xml_game_home_team g)
356
357 -- Insert a record into odds_games__odds_games_teams
358 -- mapping the home/away teams to this game.
359 insert_ (OddsGame_OddsGameTeam game_id away_team_id home_team_id)
360
361 -- Finaly, we insert the lines. The over/under entries for this
362 -- game and the lines for the casinos all wind up in the same
363 -- table, odds_games_lines. We can insert the over/under entries
364 -- freely with empty away/home lines:
365 forM_ (xml_game_over_under_casinos g) $ \c -> do
366 -- Start by inderting the casino.
367 ou_casino_id <- insert_xml_or_select c
368
369 -- Now add the over/under entry with the casino's id.
370 let ogl = OddsGameLine
371 game_id
372 ou_casino_id
373 (xml_casino_line c)
374 Nothing
375 Nothing
376
377 insertByAll ogl
378
379 -- ...but then when we insert the home/away team lines, we
380 -- prefer to update the existing entry rather than overwrite it
381 -- or add a new record.
382 forM_ (xml_away_casinos $ xml_game_away_team g) $ \c ->do
383 -- insert, or more likely retrieve the existing, casino
384 a_casino_id <- insert_xml_or_select c
385
386 -- Get a Maybe Double instead of the Maybe String that's in there.
387 let away_line = home_away_line c
388
389 -- Unconditionally update that casino's away team line with ours.
390 update [Ogl_Away_Line =. away_line] $ -- WHERE
391 Ogl_Odds_Casinos_Id ==. a_casino_id
392
393 -- Repeat all that for the home team.
394 forM_ (xml_home_casinos $ xml_game_home_team g) $ \c ->do
395 h_casino_id <- insert_xml_or_select c
396 let home_line = home_away_line c
397 update [Ogl_Home_Line =. home_line] $ -- WHERE
398 Ogl_Odds_Casinos_Id ==. h_casino_id
399
400 return game_id
401
402 return ImportSucceeded
403
404 pickle_game_with_notes :: PU OddsGameWithNotes
405 pickle_game_with_notes =
406 xpWrap (from_pair, to_pair) $
407 xpPair
408 (xpList $ xpElem "Notes" xpText)
409 pickle_game
410 where
411 from_pair = uncurry OddsGameWithNotes
412 to_pair OddsGameWithNotes{..} = (notes, game)
413
414
415
416 pickle_casino :: PU OddsGameCasinoXml
417 pickle_casino =
418 xpElem "Casino" $
419 xpWrap (from_tuple, to_tuple) $
420 xpTriple
421 (xpAttr "ClientID" xpInt)
422 (xpAttr "Name" xpText)
423 (xpOption xpText)
424 where
425 from_tuple = uncurryN OddsGameCasinoXml
426 -- Use record wildcards to avoid unused field warnings.
427 to_tuple OddsGameCasinoXml{..} = (xml_casino_client_id,
428 xml_casino_name,
429 xml_casino_line)
430
431
432 pickle_home_team :: PU OddsGameHomeTeamXml
433 pickle_home_team =
434 xpElem "HomeTeam" $
435 xpWrap (from_tuple, to_tuple) $
436 xp5Tuple
437 (xpElem "HomeTeamID" xp_team_id)
438 (xpElem "HomeRotationNumber" xpInt)
439 (xpElem "HomeAbbr" xpText)
440 (xpElem "HomeTeamName" xpText)
441 (xpList pickle_casino)
442 where
443 from_tuple = uncurryN OddsGameHomeTeamXml
444 -- Use record wildcards to avoid unused field warnings.
445 to_tuple OddsGameHomeTeamXml{..} = (xml_home_team_id,
446 xml_home_rotation_number,
447 xml_home_abbr,
448 xml_home_team_name,
449 xml_home_casinos)
450
451
452
453 pickle_away_team :: PU OddsGameAwayTeamXml
454 pickle_away_team =
455 xpElem "AwayTeam" $
456 xpWrap (from_tuple, to_tuple) $
457 xp5Tuple
458 (xpElem "AwayTeamID" xp_team_id)
459 (xpElem "AwayRotationNumber" xpInt)
460 (xpElem "AwayAbbr" xpText)
461 (xpElem "AwayTeamName" xpText)
462 (xpList pickle_casino)
463 where
464 from_tuple = uncurryN OddsGameAwayTeamXml
465 -- Use record wildcards to avoid unused field warnings.
466 to_tuple OddsGameAwayTeamXml{..} = (xml_away_team_id,
467 xml_away_rotation_number,
468 xml_away_abbr,
469 xml_away_team_name,
470 xml_away_casinos)
471
472
473
474 pickle_over_under :: PU OddsGameOverUnderXml
475 pickle_over_under =
476 xpElem "Over_Under" $
477 xpWrap (to_newtype, from_newtype) $
478 xpList pickle_casino
479 where
480 from_newtype (OddsGameOverUnderXml cs) = cs
481 to_newtype = OddsGameOverUnderXml
482
483
484
485 pickle_game :: PU OddsGameXml
486 pickle_game =
487 xpElem "Game" $
488 xpWrap (from_tuple, to_tuple) $
489 xp6Tuple
490 (xpElem "GameID" xpInt)
491 (xpElem "Game_Date" xpText)
492 (xpElem "Game_Time" xpText)
493 pickle_away_team
494 pickle_home_team
495 pickle_over_under
496 where
497 from_tuple = uncurryN OddsGameXml
498 -- Use record wildcards to avoid unused field warnings.
499 to_tuple OddsGameXml{..} = (xml_game_id,
500 xml_game_date,
501 xml_game_time,
502 xml_game_away_team,
503 xml_game_home_team,
504 xml_game_over_under)
505
506
507 pickle_message :: PU Message
508 pickle_message =
509 xpElem "message" $
510 xpWrap (from_tuple, to_tuple) $
511 xp8Tuple (xpElem "XML_File_ID" xpInt)
512 (xpElem "heading" xpText)
513 (xpElem "category" xpText)
514 (xpElem "sport" xpText)
515 (xpElem "Title" xpText)
516 (xpElem "Line_Time" xpText)
517 (xpList pickle_game_with_notes)
518 (xpElem "time_stamp" xpText)
519 where
520 from_tuple = uncurryN Message
521 to_tuple m = (xml_xml_file_id m,
522 xml_heading m,
523 xml_category m,
524 xml_sport m,
525 xml_title m,
526 xml_line_time m,
527 xml_games_with_notes m,
528 xml_time_stamp m)
529
530
531
532 -- * Tasty Tests
533 odds_tests :: TestTree
534 odds_tests =
535 testGroup
536 "Odds tests"
537 [ test_pickle_of_unpickle_is_identity,
538 test_unpickle_succeeds ]
539
540
541 -- | Warning, succeess of this test does not mean that unpickling
542 -- succeeded.
543 test_pickle_of_unpickle_is_identity :: TestTree
544 test_pickle_of_unpickle_is_identity = testGroup "pickle-unpickle tests"
545 [ check "pickle composed with unpickle is the identity"
546 "test/xml/Odds_XML.xml",
547
548 check "pickle composed with unpickle is the identity (non-int team_id)"
549 "test/xml/Odds_XML-noninteger-team-id.xml",
550
551 check "pickle composed with unpickle is the identity (positive(+) line)"
552 "test/xml/Odds_XML-positive-line.xml",
553
554 check "pickle composed with unpickle is the identity (large file)"
555 "test/xml/Odds_XML-largefile.xml" ]
556 where
557 check desc path = testCase desc $ do
558 (expected, actual) <- pickle_unpickle pickle_message path
559 actual @?= expected
560
561
562 test_unpickle_succeeds :: TestTree
563 test_unpickle_succeeds = testGroup "unpickle tests"
564 [ check "unpickling succeeds"
565 "test/xml/Odds_XML.xml",
566
567 check "unpickling succeeds (non-int team_id)"
568 "test/xml/Odds_XML-noninteger-team-id.xml",
569
570 check "unpickling succeeds (positive(+) line)"
571 "test/xml/Odds_XML-positive-line.xml",
572
573 check "unpickling succeeds (large file)"
574 "test/xml/Odds_XML-largefile.xml" ]
575 where
576 check desc path = testCase desc $ do
577 actual <- unpickleable path pickle_message
578 let expected = True
579 actual @?= expected