]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/TSN/XML/JFile.hs
Fix present-but-empty vleague parsing in jfilexml.
[dead/htsn-import.git] / src / TSN / XML / JFile.hs
1 {-# LANGUAGE DeriveGeneric #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE GADTs #-}
4 {-# LANGUAGE QuasiQuotes #-}
5 {-# LANGUAGE RecordWildCards #-}
6 {-# LANGUAGE TemplateHaskell #-}
7 {-# LANGUAGE TypeFamilies #-}
8
9 -- | Parse TSN XML for the DTD \"jfilexml.dtd\". There's a top-level
10 -- \<message\>, containing a \<gamelist\>, containing
11 -- \<game\>s. Those games contain a bunch of other stuff. The
12 -- \<gamelist\> is pretty irrelevant; we ignore it and pretend that
13 -- a message contains a bunch of games.
14 --
15 module TSN.XML.JFile (
16 dtd,
17 pickle_message,
18 -- * Tests
19 jfile_tests,
20 -- * WARNING: these are private but exported to silence warnings
21 JFileConstructor(..),
22 JFileGameConstructor(..) )
23 where
24
25 -- System imports
26 import Control.Monad ( forM_, join )
27 import Data.List ( intercalate )
28 import Data.String.Utils ( split )
29 import Data.Time ( UTCTime(..) )
30 import Data.Tuple.Curry ( uncurryN )
31 import qualified Data.Vector.HFixed as H ( HVector, convert )
32 import Database.Groundhog (
33 countAll,
34 deleteAll,
35 migrate )
36 import Database.Groundhog.Core ( DefaultKey )
37 import Database.Groundhog.Generic ( runDbConn, runMigrationSilent )
38 import Database.Groundhog.Sqlite ( withSqliteConn )
39 import Database.Groundhog.TH (
40 groundhog,
41 mkPersist )
42 import qualified GHC.Generics as GHC ( Generic )
43 import Test.Tasty ( TestTree, testGroup )
44 import Test.Tasty.HUnit ( (@?=), testCase )
45 import Text.XML.HXT.Core (
46 PU,
47 xpTriple,
48 xp6Tuple,
49 xp14Tuple,
50 xp19Tuple,
51 xpAttr,
52 xpElem,
53 xpInt,
54 xpList,
55 xpOption,
56 xpPair,
57 xpPrim,
58 xpText,
59 xpText0,
60 xpWrap )
61
62
63 -- Local imports
64 import Misc ( double_just )
65 import TSN.Codegen ( tsn_codegen_config )
66 import TSN.Database ( insert_or_select )
67 import TSN.DbImport ( DbImport(..), ImportResult(..), run_dbmigrate )
68 import TSN.Picklers (
69 xp_date,
70 xp_date_padded,
71 xp_datetime,
72 xp_tba_time,
73 xp_time_dots,
74 xp_time_stamp )
75 import TSN.Team (
76 FromXmlFkTeams(..),
77 HTeam(..),
78 Team(..),
79 VTeam(..) )
80 import TSN.XmlImport (
81 XmlImport(..),
82 XmlImportFkTeams(..) )
83 import Xml (
84 Child(..),
85 FromXml(..),
86 ToDb(..),
87 pickle_unpickle,
88 unpickleable,
89 unsafe_unpickle )
90
91
92
93 -- | The DTD to which this module corresponds. Used to invoke dbimport.
94 --
95 dtd :: String
96 dtd = "jfilexml.dtd"
97
98 --
99 -- DB/XML data types
100 --
101
102 -- * JFile/Message
103
104 -- | Database representation of a 'Message'.
105 --
106 data JFile =
107 JFile {
108 db_xml_file_id :: Int,
109 db_heading :: String,
110 db_category :: String,
111 db_sport :: String,
112 db_time_stamp :: UTCTime }
113
114
115
116 -- | XML Representation of an 'JFile'.
117 --
118 data Message =
119 Message {
120 xml_xml_file_id :: Int,
121 xml_heading :: String,
122 xml_category :: String,
123 xml_sport :: String,
124 xml_gamelist :: JFileGameListXml,
125 xml_time_stamp :: UTCTime }
126 deriving (Eq, GHC.Generic, Show)
127
128
129 -- | For 'H.convert'.
130 --
131 instance H.HVector Message
132
133
134 instance ToDb Message where
135 -- | The database analogue of a 'Message' is a 'JFile'.
136 --
137 type Db Message = JFile
138
139
140 -- | The 'FromXml' instance for 'Message' is required for the
141 -- 'XmlImport' instance.
142 --
143 instance FromXml Message where
144 -- | To convert a 'Message' to an 'JFile', we just drop
145 -- the 'xml_gamelist'.
146 --
147 from_xml Message{..} =
148 JFile {
149 db_xml_file_id = xml_xml_file_id,
150 db_heading = xml_heading,
151 db_category = xml_category,
152 db_sport = xml_sport,
153 db_time_stamp = xml_time_stamp }
154
155
156 -- | This allows us to insert the XML representation 'Message'
157 -- directly.
158 --
159 instance XmlImport Message
160
161
162
163 -- * JFileGame/JFileGameXml
164
165 -- | This is an embedded type within each JFileGame. It has its own
166 -- element, \<Odds_Info\>, but there's only one of them per game. So
167 -- essentially all of these fields belong to a 'JFileGame'. Aaaannnd
168 -- most of them are redundant. We'll (un)pickle them for good
169 -- measure, but in the conversion to the database type, we can drop
170 -- all of the redundant information.
171 --
172 -- All of these are optional because TSN does actually leave the
173 -- whole thing empty from time to time.
174 --
175 -- We stick \"info\" on the home/away team ids to avoid a name clash
176 -- with the game itself.
177 --
178 data JFileGameOddsInfo =
179 JFileGameOddsInfo {
180 db_list_date :: Maybe UTCTime,
181 db_info_home_team_id :: Maybe String, -- redundant (Team)
182 db_info_away_team_id :: Maybe String, -- redundant (Team)
183 db_home_abbr :: Maybe String, -- redundant (Team)
184 db_away_abbr :: Maybe String, -- redundant (Team)
185 db_home_team_name :: Maybe String, -- redundant (Team)
186 db_away_team_name :: Maybe String, -- redundant (Team)
187 db_home_starter :: Maybe String,
188 db_away_starter :: Maybe String,
189 db_game_date :: Maybe UTCTime, -- redundant (JFileGame)
190 db_home_game_key :: Maybe Int,
191 db_away_game_key :: Maybe Int,
192 db_current_timestamp :: Maybe UTCTime,
193 db_live :: Maybe Bool,
194 db_notes :: String }
195 deriving (Eq, Show)
196
197
198 -- | Another embedded type within 'JFileGame'. These look like,
199 -- \<status numeral=\"4\"\>FINAL\</status\> within the XML, but
200 -- they're in one-to-one correspondence with the games.
201 --
202 data JFileGameStatus =
203 JFileGameStatus {
204 db_status_numeral :: Int,
205 db_status :: Maybe String }
206 deriving (Eq, Show)
207
208
209
210 -- | Database representation of a \<game\> contained within a
211 -- \<message\>, and, implicitly, a \<gamelist\>.
212 --
213 -- We've left out the game date, opting instead to combine the
214 -- date/time into the 'db_game_time' field.
215 --
216 data JFileGame =
217 JFileGame {
218 db_jfile_id :: DefaultKey JFile,
219 db_away_team_id :: DefaultKey Team,
220 db_home_team_id :: DefaultKey Team,
221 db_game_id :: Int,
222 db_schedule_id :: Int,
223 db_odds_info :: JFileGameOddsInfo,
224 db_season_type :: Maybe String,
225 db_game_time :: Maybe UTCTime,
226 db_vleague :: Maybe String,
227 db_hleague :: Maybe String,
228 db_vscore :: Int,
229 db_hscore :: Int,
230 db_time_remaining :: Maybe String,
231 db_game_status :: JFileGameStatus }
232
233
234 -- | XML representation of a \<game\> contained within a \<message\>,
235 -- and a \<gamelist\>. The Away/Home teams seem to coincide with
236 -- those of 'OddsGame', so we're reusing the DB type via the common
237 -- 'TSN.Team' structure. But the XML types are different, because
238 -- they have different picklers!
239 --
240 data JFileGameXml =
241 JFileGameXml {
242 xml_game_id :: Int,
243 xml_schedule_id :: Int,
244 xml_odds_info :: JFileGameOddsInfo,
245 xml_season_type :: Maybe String,
246 xml_game_date :: UTCTime,
247 xml_game_time :: Maybe UTCTime,
248 xml_vteam :: VTeam,
249 xml_vleague :: Maybe String,
250 xml_hteam :: HTeam,
251 xml_hleague :: Maybe String,
252 xml_vscore :: Int,
253 xml_hscore :: Int,
254 xml_time_remaining :: Maybe String,
255 xml_game_status :: JFileGameStatus }
256 deriving (Eq, GHC.Generic, Show)
257
258
259 -- | For 'H.convert'.
260 --
261 instance H.HVector JFileGameXml
262
263
264 -- * JFileGameListXml
265
266 -- | The XML representation of \<message\> -> \<gamelist\>. This
267 -- element serves only to contain \<game\>s, so we don't store the
268 -- intermediate table in the database.
269 --
270 newtype JFileGameListXml =
271 JFileGameListXml {
272 xml_games ::
273 [JFileGameXml] }
274 deriving (Eq, Show)
275
276
277 instance ToDb JFileGameXml where
278 -- | The database analogue of an 'JFileGameXml' is
279 -- an 'JFileGame'.
280 --
281 type Db JFileGameXml = JFileGame
282
283
284 instance Child JFileGameXml where
285 -- | Each 'JFileGameXml' is contained in (i.e. has a foreign key to)
286 -- a 'JFile'.
287 --
288 type Parent JFileGameXml = JFile
289
290
291 instance FromXmlFkTeams JFileGameXml where
292 -- | To convert an 'JFileGameXml' to an 'JFileGame', we add the
293 -- foreign keys for JFile and the home/away teams. We also mash
294 -- the date/time together into one field.
295 --
296 from_xml_fk_teams fk fk_away fk_home JFileGameXml{..} =
297 JFileGame {
298 db_jfile_id = fk,
299 db_away_team_id = fk_away,
300 db_home_team_id = fk_home,
301 db_game_id = xml_game_id,
302 db_schedule_id = xml_schedule_id,
303 db_odds_info = xml_odds_info,
304 db_season_type = xml_season_type,
305 db_game_time = make_game_time xml_game_date xml_game_time,
306 db_vleague = xml_vleague,
307 db_hleague = xml_hleague,
308 db_vscore = xml_vscore,
309 db_hscore = xml_hscore,
310 db_time_remaining = xml_time_remaining,
311 db_game_status = xml_game_status }
312 where
313 -- | Construct the database game time from the XML \<Game_Date\>
314 -- and \<Game_Time\> elements. The \<Game_Time\> elements
315 -- sometimes have a value of \"TBA\"; in that case, we don't
316 -- want to pretend that we know the time by setting it to
317 -- e.g. midnight, so instead we make the entire date/time
318 -- Nothing.
319 make_game_time :: UTCTime -> Maybe UTCTime -> Maybe UTCTime
320 make_game_time _ Nothing = Nothing
321 make_game_time d (Just t) = Just $ UTCTime (utctDay d) (utctDayTime t)
322
323
324 -- | This allows us to insert the XML representation
325 -- 'JFileGameXml' directly.
326 --
327 instance XmlImportFkTeams JFileGameXml
328
329
330 ---
331 --- Database stuff.
332 ---
333
334 instance DbImport Message where
335 dbmigrate _ =
336 run_dbmigrate $ do
337 migrate (undefined :: Team)
338 migrate (undefined :: JFile)
339 migrate (undefined :: JFileGame)
340
341 dbimport m = do
342 -- Insert the top-level message
343 msg_id <- insert_xml m
344
345 -- Now loop through the message's games
346 forM_ (xml_games $ xml_gamelist m) $ \game -> do
347 -- First we insert the home and away teams.
348 away_team_id <- insert_or_select (vteam $ xml_vteam game)
349 home_team_id <- insert_or_select (hteam $ xml_hteam game)
350
351 -- Now insert the game keyed to the "jfile" and its teams.
352 insert_xml_fk_teams_ msg_id away_team_id home_team_id game
353
354
355 return ImportSucceeded
356
357
358 mkPersist tsn_codegen_config [groundhog|
359 - entity: JFile
360 dbName: jfile
361 constructors:
362 - name: JFile
363 uniques:
364 - name: unique_jfile
365 type: constraint
366 # Prevent multiple imports of the same message.
367 fields: [db_xml_file_id]
368
369 - embedded: JFileGameStatus
370 fields:
371 - name: db_status_numeral
372 dbName: status_numeral
373 - name: db_status
374 dbName: status
375
376 # Many of the JFileGameOddsInfo fields are redundant and have
377 # been left out.
378 - embedded: JFileGameOddsInfo
379 fields:
380 - name: db_list_date
381 dbName: list_date
382 - name: db_home_starter
383 dbName: home_starter
384 - name: db_home_game_key
385 dbName: home_game_key
386 - name: db_away_game_key
387 dbName: away_game_key
388 - name: db_current_timestamp
389 dbName: current_timestamp
390 - name: db_live
391 dbName: live
392 - name: db_notes
393 dbName: notes
394
395 - entity: JFileGame
396 dbName: jfile_games
397 constructors:
398 - name: JFileGame
399 fields:
400 - name: db_jfile_id
401 reference:
402 onDelete: cascade
403 - name: db_away_team_id
404 reference:
405 onDelete: cascade
406 - name: db_home_team_id
407 reference:
408 onDelete: cascade
409 - name: db_odds_info
410 embeddedType:
411 - {name: list_date, dbName: list_date}
412 - {name: home_starter, dbName: home_starter}
413 - {name: away_starter, dbName: away_starter}
414 - {name: home_game_key, dbName: home_game_key}
415 - {name: away_game_key, dbName: away_game_key}
416 - {name: current_timestamp, dbName: current_timestamp}
417 - {name: live, dbName: live}
418 - {name: notes, dbName: notes}
419 - name: db_game_status
420 embeddedType:
421 - {name: status_numeral, dbName: status_numeral}
422 - {name: status, dbName: status}
423
424 |]
425
426
427
428 ---
429 --- Pickling
430 ---
431
432 -- | Pickler for the top-level 'Message'.
433 --
434 pickle_message :: PU Message
435 pickle_message =
436 xpElem "message" $
437 xpWrap (from_tuple, H.convert) $
438 xp6Tuple (xpElem "XML_File_ID" xpInt)
439 (xpElem "heading" xpText)
440 (xpElem "category" xpText)
441 (xpElem "sport" xpText)
442 pickle_gamelist
443 (xpElem "time_stamp" xp_time_stamp)
444 where
445 from_tuple = uncurryN Message
446
447
448 pickle_gamelist :: PU JFileGameListXml
449 pickle_gamelist =
450 xpElem "gamelist" $
451 xpWrap (to_result, from_result) $ xpList pickle_game
452 where
453 to_result = JFileGameListXml
454 from_result = xml_games
455
456
457
458
459 pickle_game :: PU JFileGameXml
460 pickle_game =
461 xpElem "game" $
462 xpWrap (from_tuple, to_tuple') $
463 xp14Tuple (xpElem "game_id" xpInt)
464 (xpElem "schedule_id" xpInt)
465 pickle_odds_info
466 (xpElem "seasontype" (xpOption xpText))
467 (xpElem "Game_Date" xp_date_padded)
468 (xpElem "Game_Time" xp_tba_time)
469 pickle_away_team
470 (xpOption $ xpElem "vleague" (xpOption xpText))
471 pickle_home_team
472 (xpOption $ xpElem "hleague" xpText)
473 (xpElem "vscore" xpInt)
474 (xpElem "hscore" xpInt)
475 (xpOption $ xpElem "time_r" xpText)
476 pickle_status
477 where
478 from_tuple (a,b,c,d,e,f,g,h,i,j,k,l,m,n) =
479 JFileGameXml a b c d e f g (join h) i j k l m n
480
481 to_tuple' (JFileGameXml a b c d e f g h i j k l m n) =
482 (a, b, c, d, e, f, g, double_just h, i, j, k, l, m, n)
483
484
485 pickle_odds_info :: PU JFileGameOddsInfo
486 pickle_odds_info =
487 xpElem "Odds_Info" $
488 xpWrap (from_tuple, to_tuple') $
489 xp19Tuple (xpElem "ListDate" (xpOption xp_date))
490 (xpElem "HomeTeamID" (xpOption xpText))
491 (xpElem "AwayTeamID" (xpOption xpText))
492 (xpElem "HomeAbbr" (xpOption xpText))
493 (xpElem "AwayAbbr" (xpOption xpText))
494 (xpElem "HomeTeamName" (xpOption xpText))
495 (xpElem "AwayTeamName" (xpOption xpText))
496 (xpElem "HStarter" (xpOption xpText))
497 (xpElem "AStarter" (xpOption xpText))
498 (xpElem "GameDate" (xpOption xp_datetime))
499 (xpElem "HGameKey" (xpOption xpInt))
500 (xpElem "AGameKey" (xpOption xpInt))
501 (xpElem "CurrentTimeStamp" (xpOption xp_time_dots))
502 (xpElem "Live" (xpOption xpPrim))
503 (xpElem "Notes1" xpText0)
504 (xpElem "Notes2" xpText0)
505 (xpElem "Notes3" xpText0)
506 (xpElem "Notes4" xpText0)
507 (xpElem "Notes5" xpText0)
508 where
509 from_tuple (x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,n1,n2,n3,n4,n5) =
510 JFileGameOddsInfo x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 x13 x14 notes
511 where
512 notes = intercalate "\n" [n1,n2,n3,n4,n5]
513
514 to_tuple' o = (db_list_date o,
515 db_info_home_team_id o,
516 db_info_away_team_id o,
517 db_home_abbr o,
518 db_away_abbr o,
519 db_home_team_name o,
520 db_away_team_name o,
521 db_home_starter o,
522 db_away_starter o,
523 db_game_date o,
524 db_home_game_key o,
525 db_away_game_key o,
526 db_current_timestamp o,
527 db_live o,
528 n1,n2,n3,n4,n5)
529 where
530 note_lines = split "\n" (db_notes o)
531 n1 = case note_lines of
532 (notes1:_) -> notes1
533 _ -> ""
534 n2 = case note_lines of
535 (_:notes2:_) -> notes2
536 _ -> ""
537 n3 = case note_lines of
538 (_:_:notes3:_) -> notes3
539 _ -> ""
540 n4 = case note_lines of
541 (_:_:_:notes4:_) -> notes4
542 _ -> ""
543 n5 = case note_lines of
544 (_:_:_:_:notes5:_) -> notes5
545 _ -> ""
546
547 -- | (Un)pickle a home team to/from the dual XML/DB representation
548 -- 'Team'.
549 --
550 pickle_home_team :: PU HTeam
551 pickle_home_team =
552 xpElem "hteam" $
553 xpWrap (from_tuple, to_tuple') $
554 xpTriple (xpAttr "teamid" xpText) -- Yeah, they're text.
555 (xpAttr "abbr" (xpOption xpText)) -- Some are blank
556 (xpOption xpText) -- Yup, some are nameless
557 where
558 from_tuple = HTeam . (uncurryN Team)
559 to_tuple' (HTeam t) = H.convert t
560
561
562 -- | (Un)pickle an away team to/from the dual XML/DB representation
563 -- 'Team'.
564 --
565 pickle_away_team :: PU VTeam
566 pickle_away_team =
567 xpElem "vteam" $
568 xpWrap (from_tuple, to_tuple') $
569 xpTriple (xpAttr "teamid" xpText) -- Yeah, they're text.
570 (xpAttr "abbr" (xpOption xpText)) -- Some are blank
571 (xpOption xpText) -- Yup, some are nameless
572 where
573 from_tuple = VTeam . (uncurryN Team)
574 to_tuple' (VTeam t) = H.convert t
575
576
577 pickle_status :: PU JFileGameStatus
578 pickle_status =
579 xpElem "status" $
580 xpWrap (from_tuple, to_tuple') $
581 xpPair (xpAttr "numeral" xpInt)
582 (xpOption xpText)
583 where
584 from_tuple = uncurry JFileGameStatus
585
586 -- Avoid unused field warnings.
587 to_tuple' JFileGameStatus{..} = (db_status_numeral, db_status)
588
589
590 --
591 -- Tasty Tests
592 --
593
594 -- | A list of all tests for this module.
595 --
596 jfile_tests :: TestTree
597 jfile_tests =
598 testGroup
599 "JFile tests"
600 [ test_on_delete_cascade,
601 test_pickle_of_unpickle_is_identity,
602 test_unpickle_succeeds ]
603
604
605 -- | If we unpickle something and then pickle it, we should wind up
606 -- with the same thing we started with. WARNING: success of this
607 -- test does not mean that unpickling succeeded.
608 --
609 test_pickle_of_unpickle_is_identity :: TestTree
610 test_pickle_of_unpickle_is_identity = testGroup "pickle-unpickle tests"
611 [ check "pickle composed with unpickle is the identity"
612 "test/xml/jfilexml.xml",
613 check "pickle composed with unpickle is the identity (missing fields)"
614 "test/xml/jfilexml-missing-fields.xml",
615
616 check "pickle composed with unpickle is the identity (TBA game time)"
617 "test/xml/jfilexml-tba-game-time.xml"]
618 where
619 check desc path = testCase desc $ do
620 (expected, actual) <- pickle_unpickle pickle_message path
621 actual @?= expected
622
623
624
625 -- | Make sure we can actually unpickle these things.
626 --
627 test_unpickle_succeeds :: TestTree
628 test_unpickle_succeeds = testGroup "unpickle tests"
629 [ check "unpickling succeeds" "test/xml/jfilexml.xml",
630
631 check "unpickling succeeds (missing fields)"
632 "test/xml/jfilexml-missing-fields.xml",
633
634 check "unpickling succeeds (TBA game time)"
635 "test/xml/jfilexml-tba-game-time.xml" ]
636 where
637 check desc path = testCase desc $ do
638 actual <- unpickleable path pickle_message
639
640 let expected = True
641 actual @?= expected
642
643
644
645 -- | Make sure everything gets deleted when we delete the top-level
646 -- record.
647 --
648 test_on_delete_cascade :: TestTree
649 test_on_delete_cascade = testGroup "cascading delete tests"
650 [ check "deleting auto_racing_results deletes its children"
651 "test/xml/jfilexml.xml"
652 20, -- teams
653
654 check "deleting auto_racing_results deletes its children (missing fields)"
655 "test/xml/jfilexml-missing-fields.xml"
656 44,
657
658 check "deleting auto_racing_results deletes its children (TBA game time)"
659 "test/xml/jfilexml-tba-game-time.xml"
660 8 ]
661 where
662 check desc path expected = testCase desc $ do
663 results <- unsafe_unpickle path pickle_message
664 let a = undefined :: Team
665 let b = undefined :: JFile
666 let c = undefined :: JFileGame
667
668 actual <- withSqliteConn ":memory:" $ runDbConn $ do
669 runMigrationSilent $ do
670 migrate a
671 migrate b
672 migrate c
673 _ <- dbimport results
674 deleteAll b
675 count_a <- countAll a
676 count_b <- countAll b
677 count_c <- countAll c
678 return $ sum [count_a, count_b, count_c]
679 actual @?= expected