1 {-# LANGUAGE DeriveGeneric #-}
2 {-# LANGUAGE FlexibleInstances #-}
4 {-# LANGUAGE QuasiQuotes #-}
5 {-# LANGUAGE RecordWildCards #-}
6 {-# LANGUAGE TemplateHaskell #-}
7 {-# LANGUAGE TypeFamilies #-}
9 -- | Parse TSN XML for the DTD \"earlylineXML.dtd\". For that DTD,
10 -- each \<message\> element contains a bunch of \<date\>s, and those
11 -- \<date\>s contain a single \<game\>. In the database, we merge
12 -- the date info into the games, and key the games to the messages.
14 -- Real life is not so simple, however. There is another module,
15 -- "TSN.XML.MLBEarlyLine" that is something of a subclass of this
16 -- one. It contains early lines, but only for MLB games. The data
17 -- types and XML schema are /almost/ the same, but TSN like to make
20 -- A full list of the differences is given in that module. In this
21 -- one, we mention where data types have been twerked a little to
22 -- support the second document type.
24 module TSN.XML.EarlyLine (
25 EarlyLine, -- Used in TSN.XML.MLBEarlyLine
26 EarlyLineGame, -- Used in TSN.XML.MLBEarlyLine
31 -- * WARNING: these are private but exported to silence warnings
32 EarlyLineConstructor(..),
33 EarlyLineGameConstructor(..) )
37 import Control.Monad ( join )
38 import Data.Time ( UTCTime(..) )
39 import Data.Tuple.Curry ( uncurryN )
40 import Database.Groundhog (
46 silentMigrationLogger )
47 import Database.Groundhog.Core ( DefaultKey )
48 import Database.Groundhog.Generic ( runDbConn )
49 import Database.Groundhog.Sqlite ( withSqliteConn )
50 import Database.Groundhog.TH (
53 import qualified GHC.Generics as GHC ( Generic )
54 import Test.Tasty ( TestTree, testGroup )
55 import Test.Tasty.HUnit ( (@?=), testCase )
56 import Text.XML.HXT.Core (
71 import Generics ( Generic(..), to_tuple )
72 import TSN.Codegen ( tsn_codegen_config )
73 import TSN.DbImport ( DbImport(..), ImportResult(..), run_dbmigrate )
78 import TSN.XmlImport ( XmlImport(..) )
87 -- | The DTD to which this module corresponds. Used to invoke dbimport.
90 dtd = "earlylineXML.dtd"
93 -- * DB/XML data types
96 -- * EarlyLine/Message
98 -- | Database representation of a 'Message'. It lacks the \<date\>
99 -- elements since they're really properties of the games that they
104 db_xml_file_id :: Int,
105 db_heading :: String,
106 db_category :: String,
109 db_time_stamp :: UTCTime }
114 -- | XML Representation of an 'EarlyLine'. It has the same
115 -- fields, but in addition contains the 'xml_dates'.
119 xml_xml_file_id :: Int,
120 xml_heading :: String,
121 xml_category :: String,
124 xml_dates :: [EarlyLineDate],
125 xml_time_stamp :: UTCTime }
126 deriving (Eq, GHC.Generic, Show)
128 -- | For 'Generics.to_tuple'.
130 instance Generic Message
133 instance ToDb Message where
134 -- | The database analogue of a 'Message' is an 'EarlyLine'.
136 type Db Message = EarlyLine
139 -- | The 'FromXml' instance for 'Message' is required for the
140 -- 'XmlImport' instance.
142 instance FromXml Message where
143 -- | To convert a 'Message' to an 'EarlyLine', we just drop
146 from_xml Message{..} =
148 db_xml_file_id = xml_xml_file_id,
149 db_heading = xml_heading,
150 db_category = xml_category,
151 db_sport = xml_sport,
152 db_title = xml_title,
153 db_time_stamp = xml_time_stamp }
156 -- | This allows us to insert the XML representation 'Message'
159 instance XmlImport Message
163 -- * EarlyLineDate / EarlyLineGameWithNote
165 -- | This is a very sad data type. It exists so that we can
166 -- successfully unpickle/pickle the MLB_earlylineXML.dtd documents
167 -- and get back what we started with. In that document type, the
168 -- dates all have multiple \<game\>s associated with them (as
169 -- children). But the dates also have multiple \<note\>s as
170 -- children, and we're supposed to figure out which notes go with
171 -- which games based on the order that they appear in the XML
172 -- file. Yeah, right.
174 -- In any case, instead of expecting the games and notes in some
175 -- nice order, we use this data type to expect \"a game and maybe a
176 -- note\" multiple times. This will pair the notes with only one
177 -- game, rather than all of the games that TSN think it should go
178 -- with. But it allows us to pickle and unpickle correctly at least.
180 data EarlyLineGameWithNote =
181 EarlyLineGameWithNote
182 (Maybe String) -- date_note, unused
183 EarlyLineGameXml -- date_game
184 deriving (Eq, GHC.Generic, Show)
186 -- | Accessor for the game within a 'EarlyLineGameWithNote'. We define
187 -- this ourselves to avoid an unused field warning for date_note.
189 date_game :: EarlyLineGameWithNote -> EarlyLineGameXml
190 date_game (EarlyLineGameWithNote _ g) = g
192 -- | For 'Generics.to_tuple'.
194 instance Generic EarlyLineGameWithNote
198 -- | XML representation of a \<date\>. It has a \"value\" attribute
199 -- containing the actual date string. As children it contains a
200 -- (non-optional) note, and a game. The note and date value are
201 -- properties of the game as far as I can tell.
205 date_value :: UTCTime,
206 date_games_with_notes :: [EarlyLineGameWithNote] }
207 deriving (Eq, GHC.Generic, Show)
209 -- | For 'Generics.to_tuple'.
211 instance Generic EarlyLineDate
215 -- * EarlyLineGame / EarlyLineGameXml
217 -- | Database representation of a \<game\> in earlylineXML.dtd and
218 -- MLB_earlylineXML.dtd. We've had to make a sacrifice here to
219 -- support both document types. Since it's not possible to pair the
220 -- \<note\>s with \<game\>s reliably in MLB_earlylineXML.dtd, we
221 -- have omitted the notes entirely. This is sad, but totally not our
224 -- In earlylineXML.dtd, each \<date\> and thus each \<note\> is
225 -- paired with exactly one \<game\>, so if we only cared about that
226 -- document type, we could have retained the notes.
228 -- In earlylinexml.DTD, the over/under is required, but in
229 -- MLB_earlylinexml.DTD it is not. So another compromise is to have
232 -- The 'db_game_time' should be the combined date/time using the
233 -- date value from the \<game\> element's containing
234 -- \<date\>. That's why EarlyLineGame isn't an instance of
235 -- 'FromXmlFk': the foreign key isn't enough to construct one, we
236 -- also need the date.
240 db_early_lines_id :: DefaultKey EarlyLine,
241 db_game_time :: UTCTime, -- ^ Combined date/time
242 db_away_team :: EarlyLineGameTeam,
243 db_home_team :: EarlyLineGameTeam,
244 db_over_under :: Maybe String }
247 -- | XML representation of a 'EarlyLineGame'. Comparatively, it lacks
248 -- only the foreign key to the parent message.
250 data EarlyLineGameXml =
252 xml_game_time :: Maybe UTCTime, -- ^ Only an ambiguous time string,
253 -- e.g. \"8:30\". Can be empty.
254 xml_away_team :: EarlyLineGameTeamXml,
255 xml_home_team :: EarlyLineGameTeamXml,
256 xml_over_under :: Maybe String }
257 deriving (Eq, GHC.Generic, Show)
260 -- | For 'Generics.to_tuple'.
262 instance Generic EarlyLineGameXml
265 -- * EarlyLineGameTeam / EarlyLineGameTeamXml
267 -- | Database representation of an EarlyLine team, used in both
268 -- earlylineXML.dtd and MLB_earlylineXML.dtd. It doubles as an
269 -- embedded type within the DB representation 'EarlyLineGame'.
271 -- The team name is /not/ optional. However, since we're overloading
272 -- the XML representation, we're constructing 'db_team_name' name
273 -- from two Maybes, 'xml_team_name_attr' and
274 -- 'xml_team_name_text'. To ensure type safety (and avoid a runtime
275 -- crash), we allow the database field to be optional as well.
277 data EarlyLineGameTeam =
279 db_rotation_number :: Maybe Int, -- ^ Usually there but sometimes empty.
280 db_line :: Maybe String, -- ^ Can be blank, a Double, or \"off\".
281 db_team_name :: Maybe String, -- ^ NOT optional, see the data type docs.
282 db_pitcher :: Maybe String -- ^ Optional in MLB_earlylineXML.dtd,
283 -- always absent in earlylineXML.dtd.
287 -- | This here is an abomination. What we've got is an XML
288 -- representation, not for either earlylineXML.dtd or
289 -- MLB_earlylineXML.dtd, but one that will work for /both/. Even
290 -- though they represent the teams totally differently! Argh!
292 -- The earlylineXML.dtd teams look like,
294 -- \<teamA rotation=\"709\" line=\"\">Miami\</teamA\>
296 -- While the MLB_earlylineXML.dtd teams look like,
298 -- <teamA rotation="901" name="LOS">
299 -- <pitcher>D.Haren</pitcher>
303 -- So that's cool. This data type has placeholders that should allow
304 -- the name/line to appear either as an attribute or as a text
305 -- node. We'll sort it all out in the conversion to
306 -- EarlyLineGameTeam.
308 data EarlyLineGameTeamXml =
309 EarlyLineGameTeamXml {
310 xml_rotation_number :: Maybe Int,
311 xml_line_attr :: Maybe String,
312 xml_team_name_attr :: Maybe String,
313 xml_team_name_text :: Maybe String,
314 xml_pitcher :: Maybe String,
315 xml_line_elem :: Maybe String }
320 instance ToDb EarlyLineGameTeamXml where
321 -- | The database analogue of a 'EarlyLineGameTeamXml' is an
322 -- 'EarlyLineGameTeam', although the DB type is merely embedded
325 type Db EarlyLineGameTeamXml = EarlyLineGameTeam
328 -- | The 'FromXml' instance for 'EarlyLineGameTeamXml' lets us convert
329 -- it to a 'EarlyLineGameTeam' easily.
331 instance FromXml EarlyLineGameTeamXml where
332 -- | To convert a 'EarlyLineGameTeamXml' to an 'EarlyLineGameTeam',
333 -- we figure how its fields were represented and choose the ones
334 -- that are populated. For example if the \"line\" attribute was
335 -- there, we'll use it, but if now, we'll use the \<line\>
338 from_xml EarlyLineGameTeamXml{..} =
340 db_rotation_number = xml_rotation_number,
341 db_line = merge xml_line_attr xml_line_elem,
342 db_team_name = merge xml_team_name_attr xml_team_name_text,
343 db_pitcher = xml_pitcher }
345 merge :: Maybe String -> Maybe String -> Maybe String
353 -- | Convert an 'EarlyLineDate' into a list of 'EarlyLineGame's. Each
354 -- date has one or more games, and the fields that belong to the date
355 -- should really be in the game anyway. So the database
356 -- representation of a game has the combined fields of the XML
359 -- This function gets the games out of a date, and then sticks the
360 -- date value inside the games. It also adds the foreign key
361 -- reference to the games' parent message, and returns the result.
363 -- This would convert a single date to a single game if we only
364 -- needed to support earlylineXML.dtd and not MLB_earlylineXML.dtd.
366 date_to_games :: (DefaultKey EarlyLine) -> EarlyLineDate -> [EarlyLineGame]
367 date_to_games fk date =
368 map convert_game games_only
370 -- | Get the list of games out of a date (i.e. drop the notes).
372 games_only :: [EarlyLineGameXml]
373 games_only = (map date_game (date_games_with_notes date))
375 -- | Stick the date value into the given game. If our
376 -- 'EarlyLineGameXml' has an 'xml_game_time', then we combine it
377 -- with the day portion of the supplied @date@. If not, then we
378 -- just use @date as-is.
380 combine_date_time :: Maybe UTCTime -> UTCTime
381 combine_date_time (Just t) =
382 UTCTime (utctDay $ date_value date) (utctDayTime t)
383 combine_date_time Nothing = date_value date
385 -- | Convert an XML game to a database one.
387 convert_game :: EarlyLineGameXml -> EarlyLineGame
388 convert_game EarlyLineGameXml{..} =
390 db_early_lines_id = fk,
391 db_game_time = combine_date_time xml_game_time,
392 db_away_team = from_xml xml_away_team,
393 db_home_team = from_xml xml_home_team,
394 db_over_under = xml_over_under }
401 instance DbImport Message where
404 migrate (undefined :: EarlyLine)
405 migrate (undefined :: EarlyLineGame)
408 -- Insert the message and obtain its ID.
409 msg_id <- insert_xml m
411 -- Create a function that will turn a list of dates into a list of
412 -- games by converting each date to its own list of games, and
413 -- then concatenating all of the game lists together.
414 let convert_dates_to_games = concatMap (date_to_games msg_id)
416 -- Now use it to make dem games.
417 let games = convert_dates_to_games (xml_dates m)
419 -- And insert all of them
422 return ImportSucceeded
425 mkPersist tsn_codegen_config [groundhog|
432 - name: unique_early_lines
434 # Prevent multiple imports of the same message.
435 fields: [db_xml_file_id]
438 - entity: EarlyLineGame
439 dbName: early_lines_games
441 - name: EarlyLineGame
443 - name: db_early_lines_id
448 - {name: rotation_number, dbName: away_team_rotation_number}
449 - {name: line, dbName: away_team_line}
450 - {name: team_name, dbName: away_team_name}
451 - {name: pitcher, dbName: away_team_pitcher}
454 - {name: rotation_number, dbName: home_team_rotation_number}
455 - {name: line, dbName: home_team_line}
456 - {name: team_name, dbName: home_team_name}
457 - {name: pitcher, dbName: home_team_pitcher}
459 - embedded: EarlyLineGameTeam
461 - name: db_rotation_number
462 dbName: rotation_number
478 -- | Pickler for the top-level 'Message'.
480 pickle_message :: PU Message
483 xpWrap (from_tuple, to_tuple) $
484 xp7Tuple (xpElem "XML_File_ID" xpInt)
485 (xpElem "heading" xpText)
486 (xpElem "category" xpText)
487 (xpElem "sport" xpText)
488 (xpElem "title" xpText)
490 (xpElem "time_stamp" xp_time_stamp)
492 from_tuple = uncurryN Message
496 -- | Pickler for a '\<note\> followed by a \<game\>. We turn them into
497 -- a 'EarlyLineGameWithNote'.
499 pickle_game_with_note :: PU EarlyLineGameWithNote
500 pickle_game_with_note =
501 xpWrap (from_tuple, to_tuple) $
502 xpPair (xpOption $ xpElem "note" xpText)
505 from_tuple = uncurry EarlyLineGameWithNote
508 -- | Pickler for the \<date\> elements within each \<message\>.
510 pickle_date :: PU EarlyLineDate
513 xpWrap (from_tuple, to_tuple) $
514 xpPair (xpAttr "value" xp_early_line_date)
515 (xpList pickle_game_with_note)
517 from_tuple = uncurry EarlyLineDate
521 -- | Pickler for the \<game\> elements within each \<date\>.
523 pickle_game :: PU EarlyLineGameXml
526 xpWrap (from_tuple, to_tuple) $
527 xp4Tuple (xpElem "time" (xpOption xp_ambiguous_time))
530 (xpElem "over_under" (xpOption xpText))
532 from_tuple = uncurryN EarlyLineGameXml
536 -- | Pickle an away team (\<teamA\>) element within a \<game\>. Most
537 -- of the work (common with the home team pickler) is done by
540 pickle_away_team :: PU EarlyLineGameTeamXml
541 pickle_away_team = xpElem "teamA" pickle_team
544 -- | Pickle a home team (\<teamH\>) element within a \<game\>. Most
545 -- of the work (common with theaway team pickler) is done by
548 pickle_home_team :: PU EarlyLineGameTeamXml
549 pickle_home_team = xpElem "teamH" pickle_team
552 -- | Team pickling common to both 'pickle_away_team' and
553 -- 'pickle_home_team'. Handles everything inside the \<teamA\> and
554 -- \<teamH\> elements. We try to parse the line/name as both an
555 -- attribute and an element in order to accomodate
556 -- MLB_earlylineXML.dtd.
558 -- The \"line\" and \"pitcher\" fields wind up being double-Maybes,
559 -- since they can be empty even if they exist.
561 pickle_team :: PU EarlyLineGameTeamXml
563 xpWrap (from_tuple, to_tuple') $
564 xp6Tuple (xpAttr "rotation" (xpOption xpInt))
565 (xpOption $ xpAttr "line" (xpOption xpText))
566 (xpOption $ xpAttr "name" xpText)
568 (xpOption $ xpElem "pitcher" (xpOption xpText))
569 (xpOption $ xpElem "line" (xpOption xpText))
571 from_tuple (u,v,w,x,y,z) =
572 EarlyLineGameTeamXml u (join v) w x (join y) (join z)
574 to_tuple' (EarlyLineGameTeamXml u v w x y z) =
575 (u, double_just v, w, x, double_just y, double_just z)
577 double_just val = case val of
579 just_something -> Just just_something
588 -- | A list of all tests for this module.
590 early_line_tests :: TestTree
594 [ test_on_delete_cascade,
595 test_pickle_of_unpickle_is_identity,
596 test_unpickle_succeeds ]
598 -- | If we unpickle something and then pickle it, we should wind up
599 -- with the same thing we started with. WARNING: success of this
600 -- test does not mean that unpickling succeeded.
602 test_pickle_of_unpickle_is_identity :: TestTree
603 test_pickle_of_unpickle_is_identity = testGroup "pickle-unpickle tests" $
604 [ check "pickle composed with unpickle is the identity"
605 "test/xml/earlylineXML.xml",
607 check "pickle composed with unpickle is the identity (empty game time)"
608 "test/xml/earlylineXML-empty-game-time.xml" ]
610 check desc path = testCase desc $ do
611 (expected, actual) <- pickle_unpickle pickle_message path
616 -- | Make sure we can actually unpickle these things.
618 test_unpickle_succeeds :: TestTree
619 test_unpickle_succeeds = testGroup "unpickle tests" $
620 [ check "unpickling succeeds"
621 "test/xml/earlylineXML.xml",
623 check "unpickling succeeds (empty game time)"
624 "test/xml/earlylineXML-empty-game-time.xml" ]
626 check desc path = testCase desc $ do
627 actual <- unpickleable path pickle_message
633 -- | Make sure everything gets deleted when we delete the top-level
636 test_on_delete_cascade :: TestTree
637 test_on_delete_cascade = testGroup "cascading delete tests" $
638 [ check "deleting early_lines deletes its children"
639 "test/xml/earlylineXML.xml",
641 check "deleting early_lines deletes its children (empty game time)"
642 "test/xml/earlylineXML-empty-game-time.xml" ]
644 check desc path = testCase desc $ do
645 results <- unsafe_unpickle path pickle_message
646 let a = undefined :: EarlyLine
647 let b = undefined :: EarlyLineGame
649 actual <- withSqliteConn ":memory:" $ runDbConn $ do
650 runMigration silentMigrationLogger $ do
653 _ <- dbimport results
655 count_a <- countAll a
656 count_b <- countAll b
657 return $ sum [count_a, count_b]