{-# LANGUAGE BangPatterns #-} {-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TypeFamilies #-} -- | Parse TSN XML for the DTD "newsxml.dtd". Each document contains a -- root element \ that contains an entire news item. -- module TSN.XML.News ( Message, news_tests ) where import Data.Data ( Data, constrFields, dataTypeConstrs, dataTypeOf ) import Data.List.Utils ( join, split ) import Data.Tuple.Curry ( uncurryN ) import Data.Typeable ( Typeable ) import Database.Groundhog ( defaultMigrationLogger, insert, migrate, runMigration ) import Database.Groundhog.Core ( DefaultKey ) import Database.Groundhog.TH ( groundhog, mkPersist ) import Test.Tasty ( TestTree, testGroup ) import Test.Tasty.HUnit ( (@?=), testCase ) import Text.XML.HXT.Core ( PU, XmlPickler(..), unpickleDoc, xp12Tuple, xpAttr, xpElem, xpInt, xpList, xpOption, xpPair, xpText, xpTriple, xpWrap ) import TSN.Codegen ( tsn_codegen_config, tsn_db_field_namer ) -- Used in a test import TSN.DbImport ( DbImport(..), ImportResult(..) ) import Xml ( ToFromXml(..), pickle_unpickle ) -- | The database type for teams as they show up in the news. We need -- this separate from its XML representation because of the -- DefaultKey pointing to a message. We don't know how to create one -- of those unless we've just inserted a message into the database, -- so it screws up pickling. data NewsTeam = NewsTeam { nt_news_id :: DefaultKey Message, -- ^ foreign key. db_team_name :: String } deriving instance Eq NewsTeam -- Standalone instances necessary for deriving instance Show NewsTeam -- Groundhog types with DefaultKeys -- | The XML type for teams as they show up in the news. See -- 'NewsTeam' for why there are two types. data NewsTeamXml = NewsTeamXml { xml_team_name :: String } deriving (Eq, Show) -- | Specify how to convert between the two representations NewsTeam -- (database) and NewsTeamXml (XML). instance ToFromXml NewsTeam where type Xml NewsTeam = NewsTeamXml type Container NewsTeam = Message -- Use a record wildcard here so GHC doesn't complain that we never -- used our named fields. to_xml (NewsTeam {..}) = NewsTeamXml db_team_name -- We can't create a DefaultKey Message... from_xml = error "Called from_xml on a NewsTeam." -- unless we're handed one. from_xml_fk key = (NewsTeam key) . xml_team_name -- | The database type for locations as they show up in the news. We -- need this separate from its XML representation because of the -- DefaultKey pointing to a message. We don't know how to create one -- of those unless we've just inserted a message into the database, -- so it screws up pickling. data NewsLocation = NewsLocation { loc_news_id :: DefaultKey Message, -- ^ foreign key. db_city ::String, db_state :: String, db_country :: String } deriving instance Eq NewsLocation -- Standalone instances necessary for deriving instance Show NewsLocation -- Groundhog types with DefaultKeys -- | The XML type for locations as they show up in the news. See -- 'NewsLocation' for why there are two types. data NewsLocationXml = NewsLocationXml { xml_city :: String, xml_state :: String, xml_country :: String } deriving (Eq, Show) -- | Specify how to convert between the two representations -- NewsLocation (database) and NewsLocationXml (XML). instance ToFromXml NewsLocation where type Xml NewsLocation = NewsLocationXml type Container NewsLocation = Message -- Use a record wildcard here so GHC doesn't complain that we never -- used our named fields. to_xml (NewsLocation {..}) = NewsLocationXml db_city db_state db_country -- We can't create a DefaultKey Message... from_xml = error "Called from_xml on a NewsLocation." -- unless we're given one. from_xml_fk key (NewsLocationXml x y z) = NewsLocation key x y z -- | The msg_id child of contains an event_id attribute; we -- embed it into the 'Message' type. We (pointlessly) use the "db_" -- prefix here so that the two names collide on "id" when Groundhog -- is creating its fields using our field namer. data MsgId = MsgId { db_msg_id :: Int, db_event_id :: Maybe Int } deriving (Data, Eq, Show, Typeable) data MessageXml = MessageXml { xml_xml_file_id :: Int, xml_heading :: String, xml_mid :: MsgId, xml_category :: String, xml_sport :: String, xml_url :: String, xml_teams :: [NewsTeamXml], xml_locations :: [NewsLocationXml], xml_sms :: String, xml_text :: String, xml_continue :: String, xml_time_stamp :: String } deriving (Eq, Show) data Message = Message { db_mid :: MsgId, db_sport :: String, db_url :: String, db_sms :: String, db_text :: String, db_continue :: String } deriving (Data, Eq, Show, Typeable) instance ToFromXml Message where type Xml Message = MessageXml type Container Message = () -- Use a record wildcard here so GHC doesn't complain that we never -- used our named fields. to_xml (Message {..}) = MessageXml 0 "" db_mid "" db_sport db_url [] [] db_sms db_text db_continue "" -- We don't need the key argument (from_xml_fk) since the XML type -- contains more information in this case. from_xml (MessageXml _ _ c _ e f _ _ g h i _) = Message c e f g h i mkPersist tsn_codegen_config [groundhog| - entity: NewsTeam dbName: news_teams - entity: NewsLocation dbName: news_locations - entity: Message dbName: news constructors: - name: Message fields: - name: db_mid embeddedType: - {name: msg_id, dbName: msg_id} - {name: event_id, dbName: event_id} - embedded: MsgId fields: - name: db_msg_id dbName: msg_id - name: db_event_id dbName: event_id |] pickle_news_team :: PU NewsTeamXml pickle_news_team = xpElem "team" $ xpWrap (from_string, to_string) xpText where to_string :: NewsTeamXml -> String to_string = xml_team_name from_string :: String -> NewsTeamXml from_string = NewsTeamXml instance XmlPickler NewsTeamXml where xpickle = pickle_news_team pickle_msg_id :: PU MsgId pickle_msg_id = xpElem "msg_id" $ xpWrap (from_tuple, to_tuple) $ xpPair xpInt (xpAttr "EventId" (xpOption xpInt)) where from_tuple = uncurryN MsgId to_tuple m = (db_msg_id m, db_event_id m) instance XmlPickler MsgId where xpickle = pickle_msg_id pickle_location :: PU NewsLocationXml pickle_location = xpElem "location" $ xpWrap (from_tuple, to_tuple) $ xpTriple (xpElem "city" xpText) (xpElem "state" xpText) (xpElem "country" xpText) where from_tuple = uncurryN NewsLocationXml to_tuple l = (xml_city l, xml_state l, xml_country l) instance XmlPickler NewsLocationXml where xpickle = pickle_location pickle_message :: PU MessageXml pickle_message = xpElem "message" $ xpWrap (from_tuple, to_tuple) $ xp12Tuple (xpElem "XML_File_ID" xpInt) (xpElem "heading" xpText) pickle_msg_id (xpElem "category" xpText) (xpElem "sport" xpText) (xpElem "url" xpText) (xpList $ pickle_news_team) (xpList $ pickle_location) (xpElem "SMS" xpText) (xpElem "text" xpText) pickle_continue (xpElem "time_stamp" xpText) where from_tuple = uncurryN MessageXml to_tuple m = (xml_xml_file_id m, xml_heading m, xml_mid m, xml_category m, xml_sport m, xml_url m, xml_teams m, xml_locations m, xml_sms m, xml_text m, xml_continue m, xml_time_stamp m) pickle_continue :: PU String pickle_continue = xpWrap (to_string, from_string) $ xpElem "continue" $ (xpList $ xpElem "P" xpText) where from_string :: String -> [String] from_string = split "\n" to_string :: [String] -> String to_string = join "\n" instance XmlPickler MessageXml where xpickle = pickle_message instance DbImport Message where dbimport _ xml = do runMigration defaultMigrationLogger $ do migrate (undefined :: Message) migrate (undefined :: NewsTeam) migrate (undefined :: NewsLocation) let root_element = unpickleDoc xpickle xml :: Maybe MessageXml case root_element of Nothing -> do let errmsg = "Could not unpickle News message in dbimport." return $ ImportFailed errmsg Just message -> do news_id <- insert (from_xml message :: Message) let nts :: [NewsTeam] = map (from_xml_fk news_id) (xml_teams message) let nlocs :: [NewsLocation] = map (from_xml_fk news_id) (xml_locations message) nt_ids <- mapM insert nts loc_ids <- mapM insert nlocs return $ ImportSucceeded (1 + (length nt_ids) + (length loc_ids)) -- * Tasty Tests news_tests :: TestTree news_tests = testGroup "News tests" [ test_news_fields_have_correct_names, test_pickle_of_unpickle_is_identity ] test_pickle_of_unpickle_is_identity :: TestTree test_pickle_of_unpickle_is_identity = testCase "pickle composed with unpickle is the identity" $ do let path = "test/xml/newsxml.xml" (expected :: [MessageXml], actual) <- pickle_unpickle "message" path actual @?= expected test_news_fields_have_correct_names :: TestTree test_news_fields_have_correct_names = testCase "news fields get correct database names" $ do mapM_ check (zip actual expected) where -- This is cool, it uses the (derived) Data instance of -- News.Message to get its constructor names. field_names :: [String] field_names = constrFields . head $ dataTypeConstrs $ dataTypeOf (undefined :: Message) expected :: [String] expected = map (\x -> tsn_db_field_namer "herp" "derp" 8675309 x 90210) field_names actual :: [String] actual = ["mid", "sport", "url", "sms", "text", "continue"] check (x,y) = (x @?= y)