]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/TSN/XML/EarlyLine.hs
7f5a89048d93afb202237aa03f746e0d41cfaa82
[dead/htsn-import.git] / src / TSN / XML / EarlyLine.hs
1 {-# LANGUAGE DeriveGeneric #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE GADTs #-}
4 {-# LANGUAGE QuasiQuotes #-}
5 {-# LANGUAGE RecordWildCards #-}
6 {-# LANGUAGE TemplateHaskell #-}
7 {-# LANGUAGE TypeFamilies #-}
8
9 -- | Parse TSN XML for the DTD \"earlylineXML.dtd\". For that DTD,
10 -- each \<message\> element contains a bunch of \<date\>s, and those
11 -- \<date\>s contain a single \<game\>. In the database, we merge
12 -- the date info into the games, and key the games to the messages.
13 --
14 -- Real life is not so simple, however. There is another module,
15 -- "TSN.XML.MLBEarlyLine" that is something of a subclass of this
16 -- one. It contains early lines, but only for MLB games. The data
17 -- types and XML schema are /almost/ the same, but TSN like to make
18 -- things difficult.
19 --
20 -- A full list of the differences is given in that module. In this
21 -- one, we mention where data types have been twerked a little to
22 -- support the second document type.
23 --
24 module TSN.XML.EarlyLine (
25 EarlyLine, -- Used in TSN.XML.MLBEarlyLine
26 EarlyLineGame, -- Used in TSN.XML.MLBEarlyLine
27 dtd,
28 pickle_message,
29 -- * Tests
30 early_line_tests,
31 -- * WARNING: these are private but exported to silence warnings
32 EarlyLineConstructor(..),
33 EarlyLineGameConstructor(..) )
34 where
35
36 -- System imports.
37 import Control.Monad ( join )
38 import Data.Time ( UTCTime(..) )
39 import Data.Tuple.Curry ( uncurryN )
40 import qualified Data.Vector.HFixed as H ( HVector, convert )
41 import Database.Groundhog (
42 countAll,
43 deleteAll,
44 insert_,
45 migrate )
46 import Database.Groundhog.Core ( DefaultKey )
47 import Database.Groundhog.Generic ( runDbConn, runMigrationSilent )
48 import Database.Groundhog.Sqlite ( withSqliteConn )
49 import Database.Groundhog.TH (
50 groundhog,
51 mkPersist )
52 import qualified GHC.Generics as GHC ( Generic )
53 import Test.Tasty ( TestTree, testGroup )
54 import Test.Tasty.HUnit ( (@?=), testCase )
55 import Text.XML.HXT.Core (
56 PU,
57 xp4Tuple,
58 xp6Tuple,
59 xp7Tuple,
60 xpAttr,
61 xpElem,
62 xpInt,
63 xpList,
64 xpOption,
65 xpPair,
66 xpText,
67 xpWrap )
68
69 -- Local imports.
70 import TSN.Codegen ( tsn_codegen_config )
71 import TSN.DbImport ( DbImport(..), ImportResult(..), run_dbmigrate )
72 import TSN.Picklers (
73 xp_ambiguous_time,
74 xp_attr_option,
75 xp_early_line_date,
76 xp_time_stamp )
77 import TSN.XmlImport ( XmlImport(..) )
78 import Xml (
79 FromXml(..),
80 ToDb(..),
81 pickle_unpickle,
82 unpickleable,
83 unsafe_unpickle )
84
85
86 -- | The DTD to which this module corresponds. Used to invoke dbimport.
87 --
88 dtd :: String
89 dtd = "earlylineXML.dtd"
90
91 --
92 -- * DB/XML data types
93 --
94
95 -- * EarlyLine/Message
96
97 -- | Database representation of a 'Message'. It lacks the \<date\>
98 -- elements since they're really properties of the games that they
99 -- contain.
100 --
101 data EarlyLine =
102 EarlyLine {
103 db_xml_file_id :: Int,
104 db_heading :: String,
105 db_category :: String,
106 db_sport :: String,
107 db_title :: String,
108 db_time_stamp :: UTCTime }
109 deriving (Eq, Show)
110
111
112
113 -- | XML Representation of an 'EarlyLine'. It has the same
114 -- fields, but in addition contains the 'xml_dates'.
115 --
116 data Message =
117 Message {
118 xml_xml_file_id :: Int,
119 xml_heading :: String,
120 xml_category :: String,
121 xml_sport :: String,
122 xml_title :: String,
123 xml_dates :: [EarlyLineDate],
124 xml_time_stamp :: UTCTime }
125 deriving (Eq, GHC.Generic, Show)
126
127 -- | For 'H.convert'.
128 --
129 instance H.HVector Message
130
131
132 instance ToDb Message where
133 -- | The database analogue of a 'Message' is an 'EarlyLine'.
134 --
135 type Db Message = EarlyLine
136
137
138 -- | The 'FromXml' instance for 'Message' is required for the
139 -- 'XmlImport' instance.
140 --
141 instance FromXml Message where
142 -- | To convert a 'Message' to an 'EarlyLine', we just drop
143 -- the 'xml_dates'.
144 --
145 from_xml Message{..} =
146 EarlyLine {
147 db_xml_file_id = xml_xml_file_id,
148 db_heading = xml_heading,
149 db_category = xml_category,
150 db_sport = xml_sport,
151 db_title = xml_title,
152 db_time_stamp = xml_time_stamp }
153
154
155 -- | This allows us to insert the XML representation 'Message'
156 -- directly.
157 --
158 instance XmlImport Message
159
160
161
162 -- * EarlyLineDate / EarlyLineGameWithNote
163
164 -- | This is a very sad data type. It exists so that we can
165 -- successfully unpickle/pickle the MLB_earlylineXML.dtd documents
166 -- and get back what we started with. In that document type, the
167 -- dates all have multiple \<game\>s associated with them (as
168 -- children). But the dates also have multiple \<note\>s as
169 -- children, and we're supposed to figure out which notes go with
170 -- which games based on the order that they appear in the XML
171 -- file. Yeah, right.
172 --
173 -- In any case, instead of expecting the games and notes in some
174 -- nice order, we use this data type to expect \"a game and maybe a
175 -- note\" multiple times. This will pair the notes with only one
176 -- game, rather than all of the games that TSN think it should go
177 -- with. But it allows us to pickle and unpickle correctly at least.
178 --
179 data EarlyLineGameWithNote =
180 EarlyLineGameWithNote
181 (Maybe String) -- date_note, unused
182 EarlyLineGameXml -- date_game
183 deriving (Eq, GHC.Generic, Show)
184
185 -- | Accessor for the game within a 'EarlyLineGameWithNote'. We define
186 -- this ourselves to avoid an unused field warning for date_note.
187 --
188 date_game :: EarlyLineGameWithNote -> EarlyLineGameXml
189 date_game (EarlyLineGameWithNote _ g) = g
190
191 -- | For 'H.convert'.
192 --
193 instance H.HVector EarlyLineGameWithNote
194
195
196
197 -- | XML representation of a \<date\>. It has a \"value\" attribute
198 -- containing the actual date string. As children it contains a
199 -- (non-optional) note, and a game. The note and date value are
200 -- properties of the game as far as I can tell.
201 --
202 data EarlyLineDate =
203 EarlyLineDate {
204 date_value :: UTCTime,
205 date_games_with_notes :: [EarlyLineGameWithNote] }
206 deriving (Eq, GHC.Generic, Show)
207
208 -- | For 'H.convert'.
209 --
210 instance H.HVector EarlyLineDate
211
212
213
214 -- * EarlyLineGame / EarlyLineGameXml
215
216 -- | Database representation of a \<game\> in earlylineXML.dtd and
217 -- MLB_earlylineXML.dtd. We've had to make a sacrifice here to
218 -- support both document types. Since it's not possible to pair the
219 -- \<note\>s with \<game\>s reliably in MLB_earlylineXML.dtd, we
220 -- have omitted the notes entirely. This is sad, but totally not our
221 -- fault.
222 --
223 -- In earlylineXML.dtd, each \<date\> and thus each \<note\> is
224 -- paired with exactly one \<game\>, so if we only cared about that
225 -- document type, we could have retained the notes.
226 --
227 -- In earlylinexml.DTD, the over/under is required, but in
228 -- MLB_earlylinexml.DTD it is not. So another compromise is to have
229 -- it optional here.
230 --
231 -- The 'db_game_time' should be the combined date/time using the
232 -- date value from the \<game\> element's containing
233 -- \<date\>. That's why EarlyLineGame isn't an instance of
234 -- 'FromXmlFk': the foreign key isn't enough to construct one, we
235 -- also need the date.
236 --
237 data EarlyLineGame =
238 EarlyLineGame {
239 db_early_lines_id :: DefaultKey EarlyLine,
240 db_game_time :: UTCTime, -- ^ Combined date/time
241 db_away_team :: EarlyLineGameTeam,
242 db_home_team :: EarlyLineGameTeam,
243 db_over_under :: Maybe String }
244
245
246 -- | XML representation of a 'EarlyLineGame'. Comparatively, it lacks
247 -- only the foreign key to the parent message.
248 --
249 data EarlyLineGameXml =
250 EarlyLineGameXml {
251 xml_game_time :: Maybe UTCTime, -- ^ Only an ambiguous time string,
252 -- e.g. \"8:30\". Can be empty.
253 xml_away_team :: EarlyLineGameTeamXml,
254 xml_home_team :: EarlyLineGameTeamXml,
255 xml_over_under :: Maybe String }
256 deriving (Eq, GHC.Generic, Show)
257
258
259 -- | For 'H.convert'.
260 --
261 instance H.HVector EarlyLineGameXml
262
263
264 -- * EarlyLineGameTeam / EarlyLineGameTeamXml
265
266 -- | Database representation of an EarlyLine team, used in both
267 -- earlylineXML.dtd and MLB_earlylineXML.dtd. It doubles as an
268 -- embedded type within the DB representation 'EarlyLineGame'.
269 --
270 -- The team name is /not/ optional. However, since we're overloading
271 -- the XML representation, we're constructing 'db_team_name' name
272 -- from two Maybes, 'xml_team_name_attr' and
273 -- 'xml_team_name_text'. To ensure type safety (and avoid a runtime
274 -- crash), we allow the database field to be optional as well.
275 --
276 data EarlyLineGameTeam =
277 EarlyLineGameTeam {
278 db_rotation_number :: Maybe Int, -- ^ Usually there but sometimes empty.
279 db_line :: Maybe String, -- ^ Can be blank, a Double, or \"off\".
280 db_team_name :: Maybe String, -- ^ NOT optional, see the data type docs.
281 db_pitcher :: Maybe String -- ^ Optional in MLB_earlylineXML.dtd,
282 -- always absent in earlylineXML.dtd.
283 }
284
285
286 -- | This here is an abomination. What we've got is an XML
287 -- representation, not for either earlylineXML.dtd or
288 -- MLB_earlylineXML.dtd, but one that will work for /both/. Even
289 -- though they represent the teams totally differently! Argh!
290 --
291 -- The earlylineXML.dtd teams look like,
292 --
293 -- \<teamA rotation=\"709\" line=\"\">Miami\</teamA\>
294 --
295 -- While the MLB_earlylineXML.dtd teams look like,
296 --
297 -- <teamA rotation="901" name="LOS">
298 -- <pitcher>D.Haren</pitcher>
299 -- <line>-130</line>
300 -- </teamA>
301 --
302 -- So that's cool. This data type has placeholders that should allow
303 -- the name/line to appear either as an attribute or as a text
304 -- node. We'll sort it all out in the conversion to
305 -- EarlyLineGameTeam.
306 --
307 data EarlyLineGameTeamXml =
308 EarlyLineGameTeamXml {
309 xml_rotation_number :: Maybe Int,
310 xml_line_attr :: Maybe String,
311 xml_team_name_attr :: Maybe String,
312 xml_team_name_text :: Maybe String,
313 xml_pitcher :: Maybe String,
314 xml_line_elem :: Maybe String }
315 deriving (Eq, Show)
316
317
318
319 instance ToDb EarlyLineGameTeamXml where
320 -- | The database analogue of a 'EarlyLineGameTeamXml' is an
321 -- 'EarlyLineGameTeam', although the DB type is merely embedded
322 -- in another type.
323 --
324 type Db EarlyLineGameTeamXml = EarlyLineGameTeam
325
326
327 -- | The 'FromXml' instance for 'EarlyLineGameTeamXml' lets us convert
328 -- it to a 'EarlyLineGameTeam' easily.
329 --
330 instance FromXml EarlyLineGameTeamXml where
331 -- | To convert a 'EarlyLineGameTeamXml' to an 'EarlyLineGameTeam',
332 -- we figure how its fields were represented and choose the ones
333 -- that are populated. For example if the \"line\" attribute was
334 -- there, we'll use it, but if now, we'll use the \<line\>
335 -- element.
336 --
337 from_xml EarlyLineGameTeamXml{..} =
338 EarlyLineGameTeam {
339 db_rotation_number = xml_rotation_number,
340 db_line = merge xml_line_attr xml_line_elem,
341 db_team_name = merge xml_team_name_attr xml_team_name_text,
342 db_pitcher = xml_pitcher }
343 where
344 merge :: Maybe String -> Maybe String -> Maybe String
345 merge Nothing y = y
346 merge x Nothing = x
347 merge _ _ = Nothing
348
349
350
351
352 -- | Convert an 'EarlyLineDate' into a list of 'EarlyLineGame's. Each
353 -- date has one or more games, and the fields that belong to the date
354 -- should really be in the game anyway. So the database
355 -- representation of a game has the combined fields of the XML
356 -- date/game.
357 --
358 -- This function gets the games out of a date, and then sticks the
359 -- date value inside the games. It also adds the foreign key
360 -- reference to the games' parent message, and returns the result.
361 --
362 -- This would convert a single date to a single game if we only
363 -- needed to support earlylineXML.dtd and not MLB_earlylineXML.dtd.
364 --
365 date_to_games :: (DefaultKey EarlyLine) -> EarlyLineDate -> [EarlyLineGame]
366 date_to_games fk date =
367 map convert_game games_only
368 where
369 -- | Get the list of games out of a date (i.e. drop the notes).
370 --
371 games_only :: [EarlyLineGameXml]
372 games_only = (map date_game (date_games_with_notes date))
373
374 -- | Stick the date value into the given game. If our
375 -- 'EarlyLineGameXml' has an 'xml_game_time', then we combine it
376 -- with the day portion of the supplied @date@. If not, then we
377 -- just use @date as-is.
378 --
379 combine_date_time :: Maybe UTCTime -> UTCTime
380 combine_date_time (Just t) =
381 UTCTime (utctDay $ date_value date) (utctDayTime t)
382 combine_date_time Nothing = date_value date
383
384 -- | Convert an XML game to a database one.
385 --
386 convert_game :: EarlyLineGameXml -> EarlyLineGame
387 convert_game EarlyLineGameXml{..} =
388 EarlyLineGame {
389 db_early_lines_id = fk,
390 db_game_time = combine_date_time xml_game_time,
391 db_away_team = from_xml xml_away_team,
392 db_home_team = from_xml xml_home_team,
393 db_over_under = xml_over_under }
394
395
396 --
397 -- * Database stuff
398 --
399
400 instance DbImport Message where
401 dbmigrate _ =
402 run_dbmigrate $ do
403 migrate (undefined :: EarlyLine)
404 migrate (undefined :: EarlyLineGame)
405
406 dbimport m = do
407 -- Insert the message and obtain its ID.
408 msg_id <- insert_xml m
409
410 -- Create a function that will turn a list of dates into a list of
411 -- games by converting each date to its own list of games, and
412 -- then concatenating all of the game lists together.
413 let convert_dates_to_games = concatMap (date_to_games msg_id)
414
415 -- Now use it to make dem games.
416 let games = convert_dates_to_games (xml_dates m)
417
418 -- And insert all of them
419 mapM_ insert_ games
420
421 return ImportSucceeded
422
423
424 mkPersist tsn_codegen_config [groundhog|
425
426 - entity: EarlyLine
427 dbName: early_lines
428 constructors:
429 - name: EarlyLine
430 uniques:
431 - name: unique_early_lines
432 type: constraint
433 # Prevent multiple imports of the same message.
434 fields: [db_xml_file_id]
435
436
437 - entity: EarlyLineGame
438 dbName: early_lines_games
439 constructors:
440 - name: EarlyLineGame
441 fields:
442 - name: db_early_lines_id
443 reference:
444 onDelete: cascade
445 - name: db_away_team
446 embeddedType:
447 - {name: rotation_number, dbName: away_team_rotation_number}
448 - {name: line, dbName: away_team_line}
449 - {name: team_name, dbName: away_team_name}
450 - {name: pitcher, dbName: away_team_pitcher}
451 - name: db_home_team
452 embeddedType:
453 - {name: rotation_number, dbName: home_team_rotation_number}
454 - {name: line, dbName: home_team_line}
455 - {name: team_name, dbName: home_team_name}
456 - {name: pitcher, dbName: home_team_pitcher}
457
458 - embedded: EarlyLineGameTeam
459 fields:
460 - name: db_rotation_number
461 dbName: rotation_number
462 - name: db_line
463 dbName: line
464 - name: db_team_name
465 dbName: team_name
466 - name: db_pitcher
467 dbName: pitcher
468 |]
469
470
471
472 --
473 -- * Pickling
474 --
475
476
477 -- | Pickler for the top-level 'Message'.
478 --
479 pickle_message :: PU Message
480 pickle_message =
481 xpElem "message" $
482 xpWrap (from_tuple, H.convert) $
483 xp7Tuple (xpElem "XML_File_ID" xpInt)
484 (xpElem "heading" xpText)
485 (xpElem "category" xpText)
486 (xpElem "sport" xpText)
487 (xpElem "title" xpText)
488 (xpList pickle_date)
489 (xpElem "time_stamp" xp_time_stamp)
490 where
491 from_tuple = uncurryN Message
492
493
494
495 -- | Pickler for a '\<note\> followed by a \<game\>. We turn them into
496 -- a 'EarlyLineGameWithNote'.
497 --
498 pickle_game_with_note :: PU EarlyLineGameWithNote
499 pickle_game_with_note =
500 xpWrap (from_tuple, H.convert) $
501 xpPair (xpOption $ xpElem "note" xpText)
502 pickle_game
503 where
504 from_tuple = uncurry EarlyLineGameWithNote
505
506
507 -- | Pickler for the \<date\> elements within each \<message\>.
508 --
509 pickle_date :: PU EarlyLineDate
510 pickle_date =
511 xpElem "date" $
512 xpWrap (from_tuple, H.convert) $
513 xpPair (xpAttr "value" xp_early_line_date)
514 (xpList pickle_game_with_note)
515 where
516 from_tuple = uncurry EarlyLineDate
517
518
519
520 -- | Pickler for the \<game\> elements within each \<date\>.
521 --
522 pickle_game :: PU EarlyLineGameXml
523 pickle_game =
524 xpElem "game" $
525 xpWrap (from_tuple, H.convert) $
526 xp4Tuple (xpElem "time" (xpOption xp_ambiguous_time))
527 pickle_away_team
528 pickle_home_team
529 (xpElem "over_under" (xpOption xpText))
530 where
531 from_tuple = uncurryN EarlyLineGameXml
532
533
534
535 -- | Pickle an away team (\<teamA\>) element within a \<game\>. Most
536 -- of the work (common with the home team pickler) is done by
537 -- 'pickle_team'.
538 --
539 pickle_away_team :: PU EarlyLineGameTeamXml
540 pickle_away_team = xpElem "teamA" pickle_team
541
542
543 -- | Pickle a home team (\<teamH\>) element within a \<game\>. Most
544 -- of the work (common with theaway team pickler) is done by
545 -- 'pickle_team'.
546 --
547 pickle_home_team :: PU EarlyLineGameTeamXml
548 pickle_home_team = xpElem "teamH" pickle_team
549
550
551 -- | Team pickling common to both 'pickle_away_team' and
552 -- 'pickle_home_team'. Handles everything inside the \<teamA\> and
553 -- \<teamH\> elements. We try to parse the line/name as both an
554 -- attribute and an element in order to accomodate
555 -- MLB_earlylineXML.dtd.
556 --
557 -- The \"line\" and \"pitcher\" fields wind up being double-Maybes,
558 -- since they can be empty even if they exist.
559 --
560 pickle_team :: PU EarlyLineGameTeamXml
561 pickle_team =
562 xpWrap (from_tuple, to_tuple') $
563 xp6Tuple (xpAttr "rotation" xp_attr_option)
564 (xpOption $ xpAttr "line" (xpOption xpText))
565 (xpOption $ xpAttr "name" xpText)
566 (xpOption xpText)
567 (xpOption $ xpElem "pitcher" (xpOption xpText))
568 (xpOption $ xpElem "line" (xpOption xpText))
569 where
570 from_tuple (u,v,w,x,y,z) =
571 EarlyLineGameTeamXml u (join v) w x (join y) (join z)
572
573 to_tuple' (EarlyLineGameTeamXml u v w x y z) =
574 (u, double_just v, w, x, double_just y, double_just z)
575 where
576 double_just val = case val of
577 Nothing -> Nothing
578 just_something -> Just just_something
579
580
581
582
583 --
584 -- * Tasty Tests
585 --
586
587 -- | A list of all tests for this module.
588 --
589 early_line_tests :: TestTree
590 early_line_tests =
591 testGroup
592 "EarlyLine tests"
593 [ test_on_delete_cascade,
594 test_pickle_of_unpickle_is_identity,
595 test_unpickle_succeeds ]
596
597 -- | If we unpickle something and then pickle it, we should wind up
598 -- with the same thing we started with. WARNING: success of this
599 -- test does not mean that unpickling succeeded.
600 --
601 test_pickle_of_unpickle_is_identity :: TestTree
602 test_pickle_of_unpickle_is_identity = testGroup "pickle-unpickle tests"
603 [ check "pickle composed with unpickle is the identity"
604 "test/xml/earlylineXML.xml",
605
606 check "pickle composed with unpickle is the identity (empty game time)"
607 "test/xml/earlylineXML-empty-game-time.xml" ]
608 where
609 check desc path = testCase desc $ do
610 (expected, actual) <- pickle_unpickle pickle_message path
611 actual @?= expected
612
613
614
615 -- | Make sure we can actually unpickle these things.
616 --
617 test_unpickle_succeeds :: TestTree
618 test_unpickle_succeeds = testGroup "unpickle tests"
619 [ check "unpickling succeeds"
620 "test/xml/earlylineXML.xml",
621
622 check "unpickling succeeds (empty game time)"
623 "test/xml/earlylineXML-empty-game-time.xml" ]
624 where
625 check desc path = testCase desc $ do
626 actual <- unpickleable path pickle_message
627 let expected = True
628 actual @?= expected
629
630
631
632 -- | Make sure everything gets deleted when we delete the top-level
633 -- record.
634 --
635 test_on_delete_cascade :: TestTree
636 test_on_delete_cascade = testGroup "cascading delete tests"
637 [ check "deleting early_lines deletes its children"
638 "test/xml/earlylineXML.xml",
639
640 check "deleting early_lines deletes its children (empty game time)"
641 "test/xml/earlylineXML-empty-game-time.xml" ]
642 where
643 check desc path = testCase desc $ do
644 results <- unsafe_unpickle path pickle_message
645 let a = undefined :: EarlyLine
646 let b = undefined :: EarlyLineGame
647
648 actual <- withSqliteConn ":memory:" $ runDbConn $ do
649 runMigrationSilent $ do
650 migrate a
651 migrate b
652 _ <- dbimport results
653 deleteAll a
654 count_a <- countAll a
655 count_b <- countAll b
656 return $ sum [count_a, count_b]
657 let expected = 0
658 actual @?= expected