]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/TSN/XML/Scores.hs
Fix present-but-empty vleague parsing in jfilexml.
[dead/htsn-import.git] / src / TSN / XML / Scores.hs
1 {-# LANGUAGE DeriveGeneric #-}
2 {-# LANGUAGE DeriveDataTypeable #-}
3 {-# LANGUAGE FlexibleInstances #-}
4 {-# LANGUAGE GADTs #-}
5 {-# LANGUAGE QuasiQuotes #-}
6 {-# LANGUAGE RecordWildCards #-}
7 {-# LANGUAGE TemplateHaskell #-}
8 {-# LANGUAGE TypeFamilies #-}
9
10 -- | Parse TSN XML for the DTD \"scoresxml.dtd\". Each document
11 -- contains a single \<game\> and some \<location\>s.
12 --
13 module TSN.XML.Scores (
14 dtd,
15 pickle_message,
16 -- * Tests
17 scores_tests,
18 -- * WARNING: these are private but exported to silence warnings
19 Score_LocationConstructor(..),
20 ScoreConstructor(..),
21 ScoreGameConstructor(..) )
22 where
23
24 -- System imports.
25 import Control.Monad ( join )
26 import Data.Data ( Data )
27 import Data.Time ( UTCTime )
28 import Data.Tuple.Curry ( uncurryN )
29 import Data.Typeable ( Typeable )
30 import qualified Data.Vector.HFixed as H ( HVector, convert )
31 import Database.Groundhog (
32 countAll,
33 deleteAll,
34 insert_,
35 migrate )
36 import Database.Groundhog.Core ( DefaultKey )
37 import Database.Groundhog.Generic ( runDbConn, runMigrationSilent )
38 import Database.Groundhog.Sqlite ( withSqliteConn )
39 import Database.Groundhog.TH (
40 groundhog,
41 mkPersist )
42 import qualified GHC.Generics as GHC ( Generic )
43 import Test.Tasty ( TestTree, testGroup )
44 import Test.Tasty.HUnit ( (@?=), testCase )
45 import Text.XML.HXT.Core (
46 PU,
47 xp7Tuple,
48 xp11Tuple,
49 xpAttr,
50 xpElem,
51 xpInt,
52 xpList,
53 xpOption,
54 xpPrim,
55 xpText,
56 xpTriple,
57 xpWrap )
58
59 -- Local imports.
60 import Misc ( double_just )
61 import TSN.Codegen ( tsn_codegen_config )
62 import TSN.Database ( insert_or_select )
63 import TSN.DbImport ( DbImport(..), ImportResult(..), run_dbmigrate )
64 import TSN.Location ( Location(..), pickle_location )
65 import TSN.Picklers ( xp_attr_option, xp_time_stamp )
66 import TSN.Team (
67 FromXmlFkTeams(..),
68 HTeam(..),
69 Team(..),
70 VTeam(..) )
71 import TSN.XmlImport ( XmlImport(..), XmlImportFkTeams(..) )
72 import Xml (
73 Child(..),
74 FromXml(..),
75 ToDb(..),
76 pickle_unpickle,
77 unpickleable,
78 unsafe_unpickle )
79
80
81 -- | The DTD to which this module corresponds. Used to invoke dbimport.
82 --
83 dtd :: String
84 dtd = "scoresxml.dtd"
85
86
87 --
88 -- * DB/XML Data types
89 --
90
91
92 -- * Score / Message
93
94 -- | Database representation of a 'Message'. It lacks the
95 -- 'xml_locations' and 'xml_game' which are related via foreign keys
96 -- instead.
97 --
98 data Score =
99 Score {
100 db_xml_file_id :: Int,
101 db_heading :: String,
102 db_game_id :: Maybe Int, -- ^ We've seen an empty one
103 db_schedule_id :: Maybe Int, -- ^ We've seen an empty one
104 db_tsnupdate :: Maybe Bool,
105 db_category :: String,
106 db_sport :: String,
107 db_season_type :: Maybe String, -- ^ We've seen an empty one
108 db_time_stamp :: UTCTime }
109
110
111 -- | XML representation of the top level \<message\> element (i.e. a
112 -- 'Score').
113 --
114 data Message =
115 Message {
116 xml_xml_file_id :: Int,
117 xml_heading :: String,
118 xml_game_id :: Maybe Int, -- ^ We've seen an empty one
119 xml_schedule_id :: Maybe Int, -- ^ We've seen an empty one
120 xml_tsnupdate :: Maybe Bool,
121 xml_category :: String,
122 xml_sport :: String,
123 xml_locations :: [Location],
124 xml_season_type :: Maybe String, -- ^ We've seen an empty one
125 xml_game :: ScoreGameXml,
126 xml_time_stamp :: UTCTime }
127 deriving (Eq, GHC.Generic, Show)
128
129
130 -- | For 'H.convert'.
131 --
132 instance H.HVector Message
133
134
135 instance ToDb Message where
136 -- | The database representation of a 'Message' is a 'Score'.
137 type Db Message = Score
138
139 instance FromXml Message where
140 -- | When converting from the XML representation to the database
141 -- one, we drop the list of locations which will be foreign-keyed to
142 -- us instead.
143 from_xml Message{..} =
144 Score {
145 db_xml_file_id = xml_xml_file_id,
146 db_heading = xml_heading,
147 db_game_id = xml_game_id,
148 db_schedule_id = xml_schedule_id,
149 db_tsnupdate = xml_tsnupdate,
150 db_category = xml_category,
151 db_sport = xml_sport,
152 db_season_type = xml_season_type,
153 db_time_stamp = xml_time_stamp }
154
155
156 -- | This lets us insert the XML representation 'Message' directly.
157 --
158 instance XmlImport Message
159
160
161 -- * ScoreGame / ScoreGameXml
162
163 -- | This is an embedded field within 'SportsGame'. Each \<status\>
164 -- element has two attributes, a numeral and a type. It also
165 -- contains some text. Rather than put these in their own table, we
166 -- include them in the parent 'SportsGame'.
167 --
168 data ScoreGameStatus =
169 ScoreGameStatus {
170 db_status_numeral :: Maybe Int,
171 db_status_type :: Maybe String, -- ^ These are probably only one-character,
172 -- long, but they all take the same
173 -- amount of space in Postgres.
174 db_status_text :: String }
175 deriving (Data, Eq, Show, Typeable)
176
177
178 -- | Database representation of a game.
179 --
180 data ScoreGame =
181 ScoreGame {
182 db_scores_id :: DefaultKey Score,
183 db_away_team_id :: DefaultKey Team,
184 db_home_team_id :: DefaultKey Team,
185 db_away_team_score :: Int,
186 db_home_team_score :: Int,
187 db_away_team_pitcher :: Maybe String, -- ^ Found in the child \<vteam\>
188 db_home_team_pitcher :: Maybe String, -- ^ Found in the child \<hteam\>
189 db_time_r :: Maybe String, -- ^ Time remaining, the format is uncertain.
190 db_status :: ScoreGameStatus,
191 db_notes :: Maybe String }
192
193
194 -- | XML representation of a \<game\> element (i.e. a 'ScoreGame').
195 --
196 data ScoreGameXml =
197 ScoreGameXml {
198 xml_vteam :: VTeamXml,
199 xml_hteam :: HTeamXml,
200 xml_away_team_score :: Int,
201 xml_home_team_score :: Int,
202 xml_time_r :: Maybe String, -- ^ Time remaining, the format is uncertain.
203 xml_status :: ScoreGameStatus,
204 xml_notes :: Maybe String }
205 deriving (Eq, GHC.Generic, Show)
206
207
208 -- | For 'H.convert'.
209 --
210 instance H.HVector ScoreGameXml
211
212
213 instance ToDb ScoreGameXml where
214 -- | The database representation of a 'ScoreGameXml' is a
215 -- 'ScoreGame'.
216 --
217 type Db ScoreGameXml = ScoreGame
218
219
220 instance Child ScoreGameXml where
221 -- | Each 'ScoreGameXml' is contained in (i.e. has a foreign key to)
222 -- a 'Score'.
223 --
224 type Parent ScoreGameXml = Score
225
226
227 instance FromXmlFkTeams ScoreGameXml where
228 -- | To create a 'ScoreGame' from a 'ScoreGameXml', we need three
229 -- foreign keys: the parent message, and the away/home teams.
230 --
231 -- During conversion, we also get the pitchers out of the teams;
232 -- unfortunately this prevents us from making the conversion
233 -- generically.
234 --
235 from_xml_fk_teams fk fk_away fk_home ScoreGameXml{..} =
236 ScoreGame {
237 db_scores_id = fk,
238 db_away_team_id = fk_away,
239 db_home_team_id = fk_home,
240 db_away_team_score = xml_away_team_score,
241 db_home_team_score = xml_home_team_score,
242 db_away_team_pitcher = xml_vpitcher xml_vteam,
243 db_home_team_pitcher = xml_hpitcher xml_hteam,
244 db_time_r = xml_time_r,
245 db_status = xml_status,
246 db_notes = xml_notes }
247
248 -- | This lets us import the database representation 'ScoreGameXml'
249 -- directly.
250 --
251 instance XmlImportFkTeams ScoreGameXml
252
253
254
255 -- * Score_Location
256
257 -- | Join each 'Score' with its 'Location's. Database-only. We use a
258 -- join table because the locations are kept unique but there are
259 -- multiple locations per 'Score'.
260 --
261 data Score_Location =
262 Score_Location
263 (DefaultKey Score)
264 (DefaultKey Location)
265
266
267 -- * HTeamXml / VTeamXml
268
269 -- | XML Representation of a home team. This document type is unusual
270 -- in that the \<hteam\> elements can have a pitcher attribute
271 -- attached to them. We still want to maintain the underlying 'Team'
272 -- representation, so we say that a home team is a 'Team' and
273 -- (maybe) a pitcher.
274 --
275 data HTeamXml =
276 HTeamXml {
277 xml_ht :: HTeam,
278 xml_hpitcher :: Maybe String }
279 deriving (Eq, Show)
280
281 instance ToDb HTeamXml where
282 -- | The database analogue of a 'HTeamXml' is its 'Team'.
283 type Db HTeamXml = Team
284
285 instance FromXml HTeamXml where
286 -- | The conversion from XML to database is simply the 'Team' accessor.
287 --
288 from_xml = hteam . xml_ht
289
290 -- | Allow import of the XML representation directly, without
291 -- requiring a manual conversion to the database type first.
292 --
293 instance XmlImport HTeamXml
294
295
296
297 -- | XML Representation of an away team. This document type is unusual
298 -- in that the \<hteam\> elements can have a pitcher attribute
299 -- attached to them. We still want to maintain the underlying 'Team'
300 -- representation, so we say that an away team is a 'Team' and
301 -- (maybe) a pitcher.
302 --
303 data VTeamXml =
304 VTeamXml {
305 xml_vt :: VTeam,
306 xml_vpitcher :: Maybe String }
307 deriving (Eq, Show)
308
309 instance ToDb VTeamXml where
310 -- | The database analogue of a 'VTeamXml' is its 'Team'.
311 type Db VTeamXml = Team
312
313 instance FromXml VTeamXml where
314 -- | The conversion from XML to database is simply the 'Team' accessor.
315 --
316 from_xml = vteam . xml_vt
317
318 -- | Allow import of the XML representation directly, without
319 -- requiring a manual conversion to the database type first.
320 --
321 instance XmlImport VTeamXml
322
323
324
325 instance DbImport Message where
326 dbmigrate _ =
327 run_dbmigrate $ do
328 migrate (undefined :: Location)
329 migrate (undefined :: Team)
330 migrate (undefined :: Score)
331 migrate (undefined :: ScoreGame)
332 migrate (undefined :: Score_Location)
333
334 dbimport m = do
335 -- Insert the message and get its ID.
336 msg_id <- insert_xml m
337
338 -- Insert all of the locations contained within this message and
339 -- collect their IDs in a list. We use insert_or_select because
340 -- most of the locations will already exist, and we just want to
341 -- get the ID of the existing location when there's a collision.
342 location_ids <- mapM insert_or_select (xml_locations m)
343
344 -- Now use that list to construct 'Score_ScoreLocation' objects,
345 -- and insert them.
346 mapM_ (insert_ . Score_Location msg_id) location_ids
347
348 -- Insert the hteam/vteams, noting the IDs.
349 vteam_id <- insert_xml_or_select (xml_vteam $ xml_game m)
350 hteam_id <- insert_xml_or_select (xml_hteam $ xml_game m)
351
352 -- Now use those along with the msg_id to construct the game.
353 insert_xml_fk_teams_ msg_id vteam_id hteam_id (xml_game m)
354
355 return ImportSucceeded
356
357
358
359 -- These types have fields with e.g. db_ and xml_ prefixes, so we
360 -- use our own codegen to peel those off before naming the columns.
361 mkPersist tsn_codegen_config [groundhog|
362 - entity: Score
363 dbName: scores
364 constructors:
365 - name: Score
366 uniques:
367 - name: unique_scores
368 type: constraint
369 # Prevent multiple imports of the same message.
370 fields: [db_xml_file_id]
371
372 - embedded: ScoreGameStatus
373 fields:
374 - name: db_status_numeral
375 dbName: status_numeral
376 - name: db_status_type
377 dbName: status_type
378 - name: db_status_text
379 dbName: status_text
380
381
382 - entity: ScoreGame
383 dbName: scores_games
384 constructors:
385 - name: ScoreGame
386 fields:
387 - name: db_scores_id
388 reference:
389 onDelete: cascade
390 - name: db_status
391 embeddedType:
392 - { name: status_numeral, dbName: status_numeral }
393 - { name: status_type, dbName: status_type }
394 - { name: status_text, dbName: status_text }
395
396
397 - entity: Score_Location
398 dbName: scores__locations
399 constructors:
400 - name: Score_Location
401 fields:
402 - name: score_Location0 # Default created by mkNormalFieldName
403 dbName: scores_id
404 reference:
405 onDelete: cascade
406 - name: score_Location1 # Default created by mkNormalFieldName
407 dbName: locations_id
408 reference:
409 onDelete: cascade
410 |]
411
412
413 --
414 -- Pickling
415 --
416
417 -- | Convert a 'Message' to/from \<message\>.
418 --
419 pickle_message :: PU Message
420 pickle_message =
421 xpElem "message" $
422 xpWrap (from_tuple, H.convert) $
423 xp11Tuple (xpElem "XML_File_ID" xpInt)
424 (xpElem "heading" xpText)
425 (xpElem "game_id" (xpOption xpInt))
426 (xpElem "schedule_id" (xpOption xpInt))
427 (xpOption $ xpElem "tsnupdate" xpPrim)
428 (xpElem "category" xpText)
429 (xpElem "sport" xpText)
430 (xpList pickle_location)
431 (xpElem "seasontype" (xpOption xpText))
432 pickle_game
433 (xpElem "time_stamp" xp_time_stamp)
434 where
435 from_tuple = uncurryN Message
436
437
438
439 -- | Convert a 'ScoreGameStatus' to/from \<status\>. The \"type\"
440 -- attribute can be either missing or empty, so we're really parsing
441 -- a double-Maybe here. We use the monad join to collapse it into
442 -- one. See also: the hteam/vteam picklers.
443 --
444 pickle_status :: PU ScoreGameStatus
445 pickle_status =
446 xpElem "status" $
447 xpWrap (from_tuple, to_tuple') $
448 xpTriple (xpAttr "numeral" xp_attr_option)
449 (xpOption $ xpAttr "type" $ xpOption xpText)
450 xpText
451 where
452 from_tuple (x,y,z) = ScoreGameStatus x (join y) z
453 to_tuple' ScoreGameStatus{..} =
454 (db_status_numeral, double_just db_status_type, db_status_text)
455
456
457 -- | Convert a 'ScoreGameXml' to/from \<game\>.
458 --
459 pickle_game :: PU ScoreGameXml
460 pickle_game =
461 xpElem "game" $
462 xpWrap (from_tuple, H.convert) $
463 xp7Tuple pickle_vteam
464 pickle_hteam
465 (xpElem "vscore" xpInt)
466 (xpElem "hscore" xpInt)
467 (xpOption $ xpElem "time_r" xpText)
468 pickle_status
469 (xpOption $ xpElem "notes" xpText)
470 where
471 from_tuple = uncurryN ScoreGameXml
472
473
474 -- | Convert a 'VTeamXml' to/from \<vteam\>. The team names
475 -- always seem to be present here, but in the shared representation,
476 -- they're optional (because they show up blank elsewhere). So, we
477 -- pretend they're optional.
478 --
479 -- The \"pitcher\" attribute is a little bit funny. Usually, when
480 -- there's no pitcher, the attribute itself is missing. But once in
481 -- a blue moon, it will be present with no text. We want to treat
482 -- both cases the same, so what we really parse is a Maybe (Maybe
483 -- String), and then use the monad 'join' to collapse it into a single
484 -- Maybe.
485 --
486 pickle_vteam :: PU VTeamXml
487 pickle_vteam =
488 xpElem "vteam" $
489 xpWrap (from_tuple, to_tuple') $
490 xpTriple (xpAttr "id" xpText)
491 (xpOption $ xpAttr "pitcher" (xpOption xpText))
492 (xpOption xpText) -- Team name
493 where
494 from_tuple (x,y,z) = VTeamXml (VTeam (Team x Nothing z)) (join y)
495
496 to_tuple' (VTeamXml (VTeam t) Nothing) = (team_id t, Nothing, name t)
497 to_tuple' (VTeamXml (VTeam t) jvp) = (team_id t, Just jvp, name t)
498
499
500 -- | Convert a 'HTeamXml' to/from \<hteam\>. Identical to 'pickle_vteam'
501 -- modulo the \"h\" and \"v\". The team names always seem to be
502 -- present here, but in the shared representation, they're optional
503 -- (because they show up blank elsewhere). So, we pretend they're
504 -- optional.
505 --
506 -- The \"pitcher\" attribute is a little bit funny. Usually, when
507 -- there's no pitcher, the attribute itself is missing. But once in
508 -- a blue moon, it will be present with no text. We want to treat
509 -- both cases the same, so what we really parse is a Maybe (Maybe
510 -- String), and then use the monad 'join' to collapse it into a single
511 -- Maybe.
512 --
513 pickle_hteam :: PU HTeamXml
514 pickle_hteam =
515 xpElem "hteam" $
516 xpWrap (from_tuple, to_tuple') $
517 xpTriple (xpAttr "id" xpText)
518 (xpOption $ xpAttr "pitcher" (xpOption xpText))
519 (xpOption xpText) -- Team name
520 where
521 from_tuple (x,y,z)= HTeamXml (HTeam (Team x Nothing z)) (join y)
522 to_tuple' (HTeamXml (HTeam t) jhp) = (team_id t, double_just jhp, name t)
523
524
525
526 --
527 -- * Tasty tests
528 --
529
530 -- | A list of all tests for this module.
531 --
532 scores_tests :: TestTree
533 scores_tests =
534 testGroup
535 "Scores tests"
536 [ test_on_delete_cascade,
537 test_pickle_of_unpickle_is_identity,
538 test_unpickle_succeeds ]
539
540
541 -- | If we unpickle something and then pickle it, we should wind up
542 -- with the same thing we started with. WARNING: success of this
543 -- test does not mean that unpickling succeeded.
544 --
545 test_pickle_of_unpickle_is_identity :: TestTree
546 test_pickle_of_unpickle_is_identity = testGroup "pickle-unpickle tests"
547 [ check "pickle composed with unpickle is the identity"
548 "test/xml/scoresxml.xml",
549
550 check "pickle composed with unpickle is the identity (no locations)"
551 "test/xml/scoresxml-no-locations.xml",
552
553 check "pickle composed with unpickle is the identity (pitcher, no type)"
554 "test/xml/scoresxml-pitcher-no-type.xml",
555
556 check "pickle composed with unpickle is the identity (empty numeral)"
557 "test/xml/scoresxml-empty-numeral.xml",
558
559 check "pickle composed with unpickle is the identity (empty type)"
560 "test/xml/scoresxml-empty-type.xml" ]
561 where
562 check desc path = testCase desc $ do
563 (expected, actual) <- pickle_unpickle pickle_message path
564 actual @?= expected
565
566
567 -- | Make sure we can actually unpickle these things.
568 --
569 test_unpickle_succeeds :: TestTree
570 test_unpickle_succeeds = testGroup "unpickle tests"
571 [ check "unpickling succeeds"
572 "test/xml/scoresxml.xml",
573
574 check "unpickling succeeds (no locations)"
575 "test/xml/scoresxml-no-locations.xml",
576
577 check "unpickling succeeds (pitcher, no type)"
578 "test/xml/scoresxml-pitcher-no-type.xml",
579
580 check "unpickling succeeds (empty numeral)"
581 "test/xml/scoresxml-empty-numeral.xml",
582
583 check "unpickling succeeds (empty type)"
584 "test/xml/scoresxml-empty-type.xml" ]
585 where
586 check desc path = testCase desc $ do
587 actual <- unpickleable path pickle_message
588 let expected = True
589 actual @?= expected
590
591
592 -- | Make sure everything gets deleted when we delete the top-level
593 -- record.
594 --
595 test_on_delete_cascade :: TestTree
596 test_on_delete_cascade = testGroup "cascading delete tests"
597 [ check "unpickling succeeds"
598 "test/xml/scoresxml.xml"
599 4, -- 2 teams, 2 locations
600
601 check "unpickling succeeds (no locations)"
602 "test/xml/scoresxml-no-locations.xml"
603 2, -- 2 teams, 0 locations
604
605 check "unpickling succeeds (pitcher, no type)"
606 "test/xml/scoresxml-pitcher-no-type.xml"
607 3, -- 2 teams, 1 location
608
609 check "unpickling succeeds (empty numeral)"
610 "test/xml/scoresxml-empty-numeral.xml"
611 3, -- 2 teams, 1 location
612
613 check "unpickling succeeds (empty type)"
614 "test/xml/scoresxml-empty-type.xml"
615 4 -- 2 teams, 2 locations
616 ]
617 where
618 check desc path expected = testCase desc $ do
619 score <- unsafe_unpickle path pickle_message
620 let a = undefined :: Location
621 let b = undefined :: Team
622 let c = undefined :: Score
623 let d = undefined :: ScoreGame
624 let e = undefined :: Score_Location
625 actual <- withSqliteConn ":memory:" $ runDbConn $ do
626 runMigrationSilent $ do
627 migrate a
628 migrate b
629 migrate c
630 migrate d
631 migrate e
632 _ <- dbimport score
633 -- No idea how 'delete' works, so do this instead.
634 deleteAll c
635 count_a <- countAll a
636 count_b <- countAll b
637 count_c <- countAll c
638 count_d <- countAll d
639 count_e <- countAll e
640 return $ sum [count_a, count_b, count_c,
641 count_d, count_e ]
642 actual @?= expected