]> gitweb.michael.orlitzky.com - dead/htsn-import.git/blob - src/TSN/XML/EarlyLine.hs
Allow empty game <time> elements (with a test case) in TSN.XML.EarlyLine.
[dead/htsn-import.git] / src / TSN / XML / EarlyLine.hs
1 {-# LANGUAGE FlexibleInstances #-}
2 {-# LANGUAGE GADTs #-}
3 {-# LANGUAGE QuasiQuotes #-}
4 {-# LANGUAGE RecordWildCards #-}
5 {-# LANGUAGE TemplateHaskell #-}
6 {-# LANGUAGE TypeFamilies #-}
7
8 -- | Parse TSN XML for the DTD \"earlylineXML.dtd\". For that DTD,
9 -- each \<message\> element contains a bunch of \<date\>s, and those
10 -- \<date\>s contain a single \<game\>. In the database, we merge
11 -- the date info into the games, and key the games to the messages.
12 --
13 -- Real life is not so simple, however. There is another module,
14 -- "TSN.XML.MLBEarlyLine" that is something of a subclass of this
15 -- one. It contains early lines, but only for MLB games. The data
16 -- types and XML schema are /almost/ the same, but TSN like to make
17 -- things difficult.
18 --
19 -- A full list of the differences is given in that module. In this
20 -- one, we mention where data types have been twerked a little to
21 -- support the second document type.
22 --
23 module TSN.XML.EarlyLine (
24 EarlyLine, -- Used in TSN.XML.MLBEarlyLine
25 EarlyLineGame, -- Used in TSN.XML.MLBEarlyLine
26 dtd,
27 pickle_message,
28 -- * Tests
29 early_line_tests,
30 -- * WARNING: these are private but exported to silence warnings
31 EarlyLineConstructor(..),
32 EarlyLineGameConstructor(..) )
33 where
34
35 -- System imports.
36 import Control.Monad ( join )
37 import Data.Time ( UTCTime(..) )
38 import Data.Tuple.Curry ( uncurryN )
39 import Database.Groundhog (
40 countAll,
41 deleteAll,
42 insert_,
43 migrate,
44 runMigration,
45 silentMigrationLogger )
46 import Database.Groundhog.Core ( DefaultKey )
47 import Database.Groundhog.Generic ( runDbConn )
48 import Database.Groundhog.Sqlite ( withSqliteConn )
49 import Database.Groundhog.TH (
50 groundhog,
51 mkPersist )
52 import Test.Tasty ( TestTree, testGroup )
53 import Test.Tasty.HUnit ( (@?=), testCase )
54 import Text.XML.HXT.Core (
55 PU,
56 xp4Tuple,
57 xp6Tuple,
58 xp7Tuple,
59 xpAttr,
60 xpElem,
61 xpInt,
62 xpList,
63 xpOption,
64 xpPair,
65 xpText,
66 xpWrap )
67
68 -- Local imports.
69 import TSN.Codegen ( tsn_codegen_config )
70 import TSN.DbImport ( DbImport(..), ImportResult(..), run_dbmigrate )
71 import TSN.Picklers (
72 xp_ambiguous_time,
73 xp_early_line_date,
74 xp_time_stamp )
75 import TSN.XmlImport ( XmlImport(..) )
76 import Xml (
77 FromXml(..),
78 ToDb(..),
79 pickle_unpickle,
80 unpickleable,
81 unsafe_unpickle )
82
83
84 -- | The DTD to which this module corresponds. Used to invoke dbimport.
85 --
86 dtd :: String
87 dtd = "earlylineXML.dtd"
88
89 --
90 -- * DB/XML data types
91 --
92
93 -- * EarlyLine/Message
94
95 -- | Database representation of a 'Message'. It lacks the \<date\>
96 -- elements since they're really properties of the games that they
97 -- contain.
98 --
99 data EarlyLine =
100 EarlyLine {
101 db_xml_file_id :: Int,
102 db_heading :: String,
103 db_category :: String,
104 db_sport :: String,
105 db_title :: String,
106 db_time_stamp :: UTCTime }
107 deriving (Eq, Show)
108
109
110
111 -- | XML Representation of an 'EarlyLine'. It has the same
112 -- fields, but in addition contains the 'xml_dates'.
113 --
114 data Message =
115 Message {
116 xml_xml_file_id :: Int,
117 xml_heading :: String,
118 xml_category :: String,
119 xml_sport :: String,
120 xml_title :: String,
121 xml_dates :: [EarlyLineDate],
122 xml_time_stamp :: UTCTime }
123 deriving (Eq, Show)
124
125
126 instance ToDb Message where
127 -- | The database analogue of a 'Message' is an 'EarlyLine'.
128 --
129 type Db Message = EarlyLine
130
131
132 -- | The 'FromXml' instance for 'Message' is required for the
133 -- 'XmlImport' instance.
134 --
135 instance FromXml Message where
136 -- | To convert a 'Message' to an 'EarlyLine', we just drop
137 -- the 'xml_dates'.
138 --
139 from_xml Message{..} =
140 EarlyLine {
141 db_xml_file_id = xml_xml_file_id,
142 db_heading = xml_heading,
143 db_category = xml_category,
144 db_sport = xml_sport,
145 db_title = xml_title,
146 db_time_stamp = xml_time_stamp }
147
148
149 -- | This allows us to insert the XML representation 'Message'
150 -- directly.
151 --
152 instance XmlImport Message
153
154
155
156 -- * EarlyLineDate / EarlyLineGameWithNote
157
158 -- | This is a very sad data type. It exists so that we can
159 -- successfully unpickle/pickle the MLB_earlylineXML.dtd documents
160 -- and get back what we started with. In that document type, the
161 -- dates all have multiple \<game\>s associated with them (as
162 -- children). But the dates also have multiple \<note\>s as
163 -- children, and we're supposed to figure out which notes go with
164 -- which games based on the order that they appear in the XML
165 -- file. Yeah, right.
166 --
167 -- In any case, instead of expecting the games and notes in some
168 -- nice order, we use this data type to expect \"a game and maybe a
169 -- note\" multiple times. This will pair the notes with only one
170 -- game, rather than all of the games that TSN think it should go
171 -- with. But it allows us to pickle and unpickle correctly at least.
172 --
173 data EarlyLineGameWithNote =
174 EarlyLineGameWithNote {
175 date_note :: Maybe String,
176 date_game :: EarlyLineGameXml }
177 deriving (Eq, Show)
178
179
180 -- | XML representation of a \<date\>. It has a \"value\" attribute
181 -- containing the actual date string. As children it contains a
182 -- (non-optional) note, and a game. The note and date value are
183 -- properties of the game as far as I can tell.
184 --
185 data EarlyLineDate =
186 EarlyLineDate {
187 date_value :: UTCTime,
188 date_games_with_notes :: [EarlyLineGameWithNote] }
189 deriving (Eq, Show)
190
191
192
193 -- * EarlyLineGame / EarlyLineGameXml
194
195 -- | Database representation of a \<game\> in earlylineXML.dtd and
196 -- MLB_earlylineXML.dtd. We've had to make a sacrifice here to
197 -- support both document types. Since it's not possible to pair the
198 -- \<note\>s with \<game\>s reliably in MLB_earlylineXML.dtd, we
199 -- have omitted the notes entirely. This is sad, but totally not our
200 -- fault.
201 --
202 -- In earlylineXML.dtd, each \<date\> and thus each \<note\> is
203 -- paired with exactly one \<game\>, so if we only cared about that
204 -- document type, we could have retained the notes.
205 --
206 -- In earlylinexml.DTD, the over/under is required, but in
207 -- MLB_earlylinexml.DTD it is not. So another compromise is to have
208 -- it optional here.
209 --
210 -- The 'db_game_time' should be the combined date/time using the
211 -- date value from the \<game\> element's containing
212 -- \<date\>. That's why EarlyLineGame isn't an instance of
213 -- 'FromXmlFk': the foreign key isn't enough to construct one, we
214 -- also need the date.
215 --
216 data EarlyLineGame =
217 EarlyLineGame {
218 db_early_lines_id :: DefaultKey EarlyLine,
219 db_game_time :: UTCTime, -- ^ Combined date/time
220 db_away_team :: EarlyLineGameTeam,
221 db_home_team :: EarlyLineGameTeam,
222 db_over_under :: Maybe String }
223
224
225 -- | XML representation of a 'EarlyLineGame'. Comparatively, it lacks
226 -- only the foreign key to the parent message.
227 --
228 data EarlyLineGameXml =
229 EarlyLineGameXml {
230 xml_game_time :: Maybe UTCTime, -- ^ Only an ambiguous time string,
231 -- e.g. \"8:30\". Can be empty.
232 xml_away_team :: EarlyLineGameTeamXml,
233 xml_home_team :: EarlyLineGameTeamXml,
234 xml_over_under :: Maybe String }
235 deriving (Eq, Show)
236
237
238
239 -- * EarlyLineGameTeam / EarlyLineGameTeamXml
240
241 -- | Database representation of an EarlyLine team, used in both
242 -- earlylineXML.dtd and MLB_earlylineXML.dtd. It doubles as an
243 -- embedded type within the DB representation 'EarlyLineGame'.
244 --
245 -- The team name is /not/ optional. However, since we're overloading
246 -- the XML representation, we're constructing 'db_team_name' name
247 -- from two Maybes, 'xml_team_name_attr' and
248 -- 'xml_team_name_text'. To ensure type safety (and avoid a runtime
249 -- crash), we allow the database field to be optional as well.
250 --
251 data EarlyLineGameTeam =
252 EarlyLineGameTeam {
253 db_rotation_number :: Int,
254 db_line :: Maybe String, -- ^ Can be blank, a Double, or \"off\".
255 db_team_name :: Maybe String, -- ^ NOT optional, see the data type docs.
256 db_pitcher :: Maybe String -- ^ Optional in MLB_earlylineXML.dtd,
257 -- always absent in earlylineXML.dtd.
258 }
259
260
261 -- | This here is an abomination. What we've got is an XML
262 -- representation, not for either earlylineXML.dtd or
263 -- MLB_earlylineXML.dtd, but one that will work for /both/. Even
264 -- though they represent the teams totally differently! Argh!
265 --
266 -- The earlylineXML.dtd teams look like,
267 --
268 -- \<teamA rotation=\"709\" line=\"\">Miami\</teamA\>
269 --
270 -- While the MLB_earlylineXML.dtd teams look like,
271 --
272 -- <teamA rotation="901" name="LOS">
273 -- <pitcher>D.Haren</pitcher>
274 -- <line>-130</line>
275 -- </teamA>
276 --
277 -- So that's cool. This data type has placeholders that should allow
278 -- the name/line to appear either as an attribute or as a text
279 -- node. We'll sort it all out in the conversion to
280 -- EarlyLineGameTeam.
281 --
282 data EarlyLineGameTeamXml =
283 EarlyLineGameTeamXml {
284 xml_rotation_number :: Int,
285 xml_line_attr :: Maybe String,
286 xml_team_name_attr :: Maybe String,
287 xml_team_name_text :: Maybe String,
288 xml_pitcher :: Maybe String,
289 xml_line_elem :: Maybe String }
290 deriving (Eq, Show)
291
292
293
294 instance ToDb EarlyLineGameTeamXml where
295 -- | The database analogue of a 'EarlyLineGameTeamXml' is an
296 -- 'EarlyLineGameTeam', although the DB type is merely embedded
297 -- in another type.
298 --
299 type Db EarlyLineGameTeamXml = EarlyLineGameTeam
300
301
302 -- | The 'FromXml' instance for 'EarlyLineGameTeamXml' lets us convert
303 -- it to a 'EarlyLineGameTeam' easily.
304 --
305 instance FromXml EarlyLineGameTeamXml where
306 -- | To convert a 'EarlyLineGameTeamXml' to an 'EarlyLineGameTeam',
307 -- we figure how its fields were represented and choose the ones
308 -- that are populated. For example if the \"line\" attribute was
309 -- there, we'll use it, but if now, we'll use the \<line\>
310 -- element.
311 --
312 from_xml EarlyLineGameTeamXml{..} =
313 EarlyLineGameTeam {
314 db_rotation_number = xml_rotation_number,
315 db_line = merge xml_line_attr xml_line_elem,
316 db_team_name = merge xml_team_name_attr xml_team_name_text,
317 db_pitcher = xml_pitcher }
318 where
319 merge :: Maybe String -> Maybe String -> Maybe String
320 merge Nothing y = y
321 merge x Nothing = x
322 merge _ _ = Nothing
323
324
325
326
327 -- | Convert an 'EarlyLineDate' into a list of 'EarlyLineGame's. Each
328 -- date has one or more games, and the fields that belong to the date
329 -- should really be in the game anyway. So the database
330 -- representation of a game has the combined fields of the XML
331 -- date/game.
332 --
333 -- This function gets the games out of a date, and then sticks the
334 -- date value inside the games. It also adds the foreign key
335 -- reference to the games' parent message, and returns the result.
336 --
337 -- This would convert a single date to a single game if we only
338 -- needed to support earlylineXML.dtd and not MLB_earlylineXML.dtd.
339 --
340 date_to_games :: (DefaultKey EarlyLine) -> EarlyLineDate -> [EarlyLineGame]
341 date_to_games fk date =
342 map convert_game games_only
343 where
344 -- | Get the list of games out of a date (i.e. drop the notes).
345 --
346 games_only :: [EarlyLineGameXml]
347 games_only = (map date_game (date_games_with_notes date))
348
349 -- | Stick the date value into the given game. If our
350 -- 'EarlyLineGameXml' has an 'xml_game_time', then we combine it
351 -- with the day portion of the supplied @date@. If not, then we
352 -- just use @date as-is.
353 --
354 combine_date_time :: EarlyLineGameXml -> UTCTime
355 combine_date_time (EarlyLineGameXml (Just t) _ _ _) =
356 UTCTime (utctDay $ date_value date) (utctDayTime t)
357 combine_date_time (EarlyLineGameXml Nothing _ _ _ ) = date_value date
358
359 -- | Convert an XML game to a database one.
360 --
361 convert_game :: EarlyLineGameXml -> EarlyLineGame
362 convert_game gx =
363 EarlyLineGame {
364 db_early_lines_id = fk,
365 db_game_time = combine_date_time gx,
366 db_away_team = from_xml (xml_away_team gx),
367 db_home_team = from_xml (xml_home_team gx),
368 db_over_under = xml_over_under gx }
369
370
371 --
372 -- * Database stuff
373 --
374
375 instance DbImport Message where
376 dbmigrate _ =
377 run_dbmigrate $ do
378 migrate (undefined :: EarlyLine)
379 migrate (undefined :: EarlyLineGame)
380
381 dbimport m = do
382 -- Insert the message and obtain its ID.
383 msg_id <- insert_xml m
384
385 -- Create a function that will turn a list of dates into a list of
386 -- games by converting each date to its own list of games, and
387 -- then concatenating all of the game lists together.
388 let convert_dates_to_games = concatMap (date_to_games msg_id)
389
390 -- Now use it to make dem games.
391 let games = convert_dates_to_games (xml_dates m)
392
393 -- And insert all of them
394 mapM_ insert_ games
395
396 return ImportSucceeded
397
398
399 mkPersist tsn_codegen_config [groundhog|
400
401 - entity: EarlyLine
402 dbName: early_lines
403 constructors:
404 - name: EarlyLine
405 uniques:
406 - name: unique_early_lines
407 type: constraint
408 # Prevent multiple imports of the same message.
409 fields: [db_xml_file_id]
410
411
412 - entity: EarlyLineGame
413 dbName: early_lines_games
414 constructors:
415 - name: EarlyLineGame
416 fields:
417 - name: db_early_lines_id
418 reference:
419 onDelete: cascade
420 - name: db_away_team
421 embeddedType:
422 - {name: rotation_number, dbName: away_team_rotation_number}
423 - {name: line, dbName: away_team_line}
424 - {name: team_name, dbName: away_team_name}
425 - {name: pitcher, dbName: away_team_pitcher}
426 - name: db_home_team
427 embeddedType:
428 - {name: rotation_number, dbName: home_team_rotation_number}
429 - {name: line, dbName: home_team_line}
430 - {name: team_name, dbName: home_team_name}
431 - {name: pitcher, dbName: home_team_pitcher}
432
433 - embedded: EarlyLineGameTeam
434 fields:
435 - name: db_rotation_number
436 dbName: rotation_number
437 - name: db_line
438 dbName: line
439 - name: db_team_name
440 dbName: team_name
441 - name: db_pitcher
442 dbName: pitcher
443 |]
444
445
446
447 --
448 -- * Pickling
449 --
450
451
452 -- | Pickler for the top-level 'Message'.
453 --
454 pickle_message :: PU Message
455 pickle_message =
456 xpElem "message" $
457 xpWrap (from_tuple, to_tuple) $
458 xp7Tuple (xpElem "XML_File_ID" xpInt)
459 (xpElem "heading" xpText)
460 (xpElem "category" xpText)
461 (xpElem "sport" xpText)
462 (xpElem "title" xpText)
463 (xpList pickle_date)
464 (xpElem "time_stamp" xp_time_stamp)
465 where
466 from_tuple = uncurryN Message
467 to_tuple m = (xml_xml_file_id m,
468 xml_heading m,
469 xml_category m,
470 xml_sport m,
471 xml_title m,
472 xml_dates m,
473 xml_time_stamp m)
474
475
476
477 -- | Pickler for a '\<note\> followed by a \<game\>. We turn them into
478 -- a 'EarlyLineGameWithNote'.
479 --
480 pickle_game_with_note :: PU EarlyLineGameWithNote
481 pickle_game_with_note =
482 xpWrap (from_tuple, to_tuple) $
483 xpPair (xpOption $ xpElem "note" xpText)
484 pickle_game
485 where
486 from_tuple = uncurry EarlyLineGameWithNote
487 to_tuple m = (date_note m, date_game m)
488
489
490 -- | Pickler for the \<date\> elements within each \<message\>.
491 --
492 pickle_date :: PU EarlyLineDate
493 pickle_date =
494 xpElem "date" $
495 xpWrap (from_tuple, to_tuple) $
496 xpPair (xpAttr "value" xp_early_line_date)
497 (xpList pickle_game_with_note)
498 where
499 from_tuple = uncurry EarlyLineDate
500 to_tuple m = (date_value m, date_games_with_notes m)
501
502
503
504 -- | Pickler for the \<game\> elements within each \<date\>.
505 --
506 pickle_game :: PU EarlyLineGameXml
507 pickle_game =
508 xpElem "game" $
509 xpWrap (from_tuple, to_tuple) $
510 xp4Tuple (xpElem "time" (xpOption xp_ambiguous_time))
511 pickle_away_team
512 pickle_home_team
513 (xpElem "over_under" (xpOption xpText))
514 where
515 from_tuple = uncurryN EarlyLineGameXml
516 to_tuple m = (xml_game_time m,
517 xml_away_team m,
518 xml_home_team m,
519 xml_over_under m)
520
521
522
523 -- | Pickle an away team (\<teamA\>) element within a \<game\>. Most
524 -- of the work (common with the home team pickler) is done by
525 -- 'pickle_team'.
526 --
527 pickle_away_team :: PU EarlyLineGameTeamXml
528 pickle_away_team = xpElem "teamA" pickle_team
529
530
531 -- | Pickle a home team (\<teamH\>) element within a \<game\>. Most
532 -- of the work (common with theaway team pickler) is done by
533 -- 'pickle_team'.
534 --
535 pickle_home_team :: PU EarlyLineGameTeamXml
536 pickle_home_team = xpElem "teamH" pickle_team
537
538
539 -- | Team pickling common to both 'pickle_away_team' and
540 -- 'pickle_home_team'. Handles everything inside the \<teamA\> and
541 -- \<teamH\> elements. We try to parse the line/name as both an
542 -- attribute and an element in order to accomodate
543 -- MLB_earlylineXML.dtd.
544 --
545 -- The \"line\" and \"pitcher\" fields wind up being double-Maybes,
546 -- since they can be empty even if they exist.
547 --
548 pickle_team :: PU EarlyLineGameTeamXml
549 pickle_team =
550 xpWrap (from_tuple, to_tuple) $
551 xp6Tuple (xpAttr "rotation" xpInt)
552 (xpOption $ xpAttr "line" (xpOption xpText))
553 (xpOption $ xpAttr "name" xpText)
554 (xpOption xpText)
555 (xpOption $ xpElem "pitcher" (xpOption xpText))
556 (xpOption $ xpElem "line" (xpOption xpText))
557 where
558 from_tuple (u,v,w,x,y,z) =
559 EarlyLineGameTeamXml u (join v) w x (join y) (join z)
560
561 to_tuple (EarlyLineGameTeamXml u v w x y z) =
562 (u, double_just v, w, x, double_just y, double_just z)
563 where
564 double_just val = case val of
565 Nothing -> Nothing
566 just_something -> Just just_something
567
568
569
570
571 --
572 -- * Tasty Tests
573 --
574
575 -- | A list of all tests for this module.
576 --
577 early_line_tests :: TestTree
578 early_line_tests =
579 testGroup
580 "EarlyLine tests"
581 [ test_on_delete_cascade,
582 test_pickle_of_unpickle_is_identity,
583 test_unpickle_succeeds ]
584
585 -- | If we unpickle something and then pickle it, we should wind up
586 -- with the same thing we started with. WARNING: success of this
587 -- test does not mean that unpickling succeeded.
588 --
589 test_pickle_of_unpickle_is_identity :: TestTree
590 test_pickle_of_unpickle_is_identity = testGroup "pickle-unpickle tests" $
591 [ check "pickle composed with unpickle is the identity"
592 "test/xml/earlylineXML.xml",
593
594 check "pickle composed with unpickle is the identity (empty game time)"
595 "test/xml/earlylineXML-empty-game-time.xml" ]
596 where
597 check desc path = testCase desc $ do
598 (expected, actual) <- pickle_unpickle pickle_message path
599 actual @?= expected
600
601
602
603 -- | Make sure we can actually unpickle these things.
604 --
605 test_unpickle_succeeds :: TestTree
606 test_unpickle_succeeds = testGroup "unpickle tests" $
607 [ check "unpickling succeeds"
608 "test/xml/earlylineXML.xml",
609
610 check "unpickling succeeds (empty game time)"
611 "test/xml/earlylineXML-empty-game-time.xml" ]
612 where
613 check desc path = testCase desc $ do
614 actual <- unpickleable path pickle_message
615 let expected = True
616 actual @?= expected
617
618
619
620 -- | Make sure everything gets deleted when we delete the top-level
621 -- record.
622 --
623 test_on_delete_cascade :: TestTree
624 test_on_delete_cascade = testGroup "cascading delete tests" $
625 [ check "deleting early_lines deletes its children"
626 "test/xml/earlylineXML.xml",
627
628 check "deleting early_lines deletes its children (empty game time)"
629 "test/xml/earlylineXML-empty-game-time.xml" ]
630 where
631 check desc path = testCase desc $ do
632 results <- unsafe_unpickle path pickle_message
633 let a = undefined :: EarlyLine
634 let b = undefined :: EarlyLineGame
635
636 actual <- withSqliteConn ":memory:" $ runDbConn $ do
637 runMigration silentMigrationLogger $ do
638 migrate a
639 migrate b
640 _ <- dbimport results
641 deleteAll a
642 count_a <- countAll a
643 count_b <- countAll b
644 return $ sum [count_a, count_b]
645 let expected = 0
646 actual @?= expected