{-# LANGUAGE BangPatterns #-} {-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TypeFamilies #-} -- | Parse TSN XML for the DTD "newsxml.dtd". Each document contains a -- root element \ that contains an entire news item. -- module TSN.XML.News ( News, news_tests ) where import Data.Data ( Data, constrFields, dataTypeConstrs, dataTypeOf ) import Data.List.Utils ( join, split ) import Data.Tuple.Curry ( uncurryN ) import Data.Typeable ( Typeable ) import Database.Groundhog ( defaultMigrationLogger, insert, insertByAll, migrate, runMigration ) import Database.Groundhog.Core ( DefaultKey ) import Database.Groundhog.TH ( defaultCodegenConfig, groundhog, mkPersist ) import System.Console.CmdArgs.Default ( Default(..) ) import Test.Tasty ( TestTree, testGroup ) import Test.Tasty.HUnit ( (@?=), testCase ) import Text.XML.HXT.Core ( PU, XmlPickler(..), unpickleDoc, xp13Tuple, xpAttr, xpElem, xpInt, xpList, xpOption, xpPair, xpText, xpTriple, xpWrap ) import TSN.Codegen ( tsn_codegen_config, tsn_db_field_namer ) -- Used in a test import TSN.DbImport ( DbImport(..), ImportResult(..) ) import Xml ( ToFromXml(..), pickle_unpickle, unpickleable ) -- | The database type for teams as they show up in the news. data NewsTeam = NewsTeam { team_name :: String } deriving (Eq, Show) -- | Mapping between News records and NewsTeam records in the -- database. We name the fields (even though they're never used) for -- Groundhog's benefit. data News_NewsTeam = News_NewsTeam { nnt_news_id :: DefaultKey News, nnt_news_team_id :: DefaultKey NewsTeam } -- | The database type for locations as they show up in the news. data NewsLocation = NewsLocation { city :: Maybe String, state :: Maybe String, country :: String } deriving (Eq, Show) -- | Mapping between News records and NewsLocation records in the -- database. We name the fields (even though they're never used) for -- Groundhog's benefit. data News_NewsLocation = News_NewsLocation { nnl_news_id :: DefaultKey News, nnl_news_location_id :: DefaultKey NewsLocation } -- | The msg_id child of contains an event_id attribute; we -- embed it into the 'News' type. We (pointlessly) use the "db_" -- prefix here so that the two names collide on "id" when Groundhog -- is creating its fields using our field namer. data MsgId = MsgId { db_msg_id :: Int, db_event_id :: Maybe Int } deriving (Data, Eq, Show, Typeable) data Message = Message { xml_xml_file_id :: Int, xml_heading :: String, xml_mid :: MsgId, xml_category :: String, xml_sport :: String, xml_url :: String, xml_teams :: [NewsTeam], xml_locations :: [NewsLocation], xml_sms :: String, xml_editor :: Maybe String, xml_text :: Maybe String, -- Text and continue seem to show up in pairs, xml_continue :: Maybe String, -- either both present or both missing. xml_time_stamp :: String } deriving (Eq, Show) data News = News { db_mid :: MsgId, db_sport :: String, db_url :: String, db_sms :: String, db_editor :: Maybe String, db_text :: Maybe String, db_continue :: Maybe String } deriving (Data, Eq, Show, Typeable) instance ToFromXml News where type Xml News = Message type Container News = () -- Use a record wildcard here so GHC doesn't complain that we never -- used our named fields. to_xml (News {..}) = Message def def db_mid def db_sport db_url def def db_sms db_editor db_text db_continue def -- We don't need the key argument (from_xml_fk) since the XML type -- contains more information in this case. from_xml (Message _ _ c _ e f _ _ i j k l _) = News c e f i j k l -- These types don't have special XML representations or field name -- collisions so we use the defaultCodegenConfig and give their fields -- nice simple names. mkPersist defaultCodegenConfig [groundhog| - entity: NewsTeam dbName: news_teams constructors: - name: NewsTeam uniques: - name: unique_news_team type: constraint fields: [team_name] - entity: NewsLocation dbName: news_locations constructors: - name: NewsLocation uniques: - name: unique_news_location type: constraint fields: [city, state, country] |] mkPersist tsn_codegen_config [groundhog| - entity: News dbName: news constructors: - name: News fields: - name: db_mid embeddedType: - {name: msg_id, dbName: msg_id} - {name: event_id, dbName: event_id} - embedded: MsgId fields: - name: db_msg_id dbName: msg_id - name: db_event_id dbName: event_id - entity: News_NewsTeam dbName: news__news_teams - entity: News_NewsLocation dbName: news__news_locations |] pickle_news_team :: PU NewsTeam pickle_news_team = xpElem "team" $ xpWrap (from_string, to_string) xpText where to_string :: NewsTeam -> String to_string = team_name from_string :: String -> NewsTeam from_string = NewsTeam instance XmlPickler NewsTeam where xpickle = pickle_news_team pickle_msg_id :: PU MsgId pickle_msg_id = xpElem "msg_id" $ xpWrap (from_tuple, to_tuple) $ xpPair xpInt (xpAttr "EventId" (xpOption xpInt)) where from_tuple = uncurryN MsgId to_tuple m = (db_msg_id m, db_event_id m) instance XmlPickler MsgId where xpickle = pickle_msg_id pickle_location :: PU NewsLocation pickle_location = xpElem "location" $ xpWrap (from_tuple, to_tuple) $ xpTriple (xpOption (xpElem "city" xpText)) (xpOption (xpElem "state" xpText)) (xpElem "country" xpText) where from_tuple = uncurryN NewsLocation to_tuple l = (city l, state l, country l) instance XmlPickler NewsLocation where xpickle = pickle_location pickle_message :: PU Message pickle_message = xpElem "message" $ xpWrap (from_tuple, to_tuple) $ xp13Tuple (xpElem "XML_File_ID" xpInt) (xpElem "heading" xpText) pickle_msg_id (xpElem "category" xpText) (xpElem "sport" xpText) (xpElem "url" xpText) (xpList pickle_news_team) (xpList pickle_location) (xpElem "SMS" xpText) (xpOption (xpElem "Editor" xpText)) (xpOption (xpElem "text" xpText)) pickle_continue (xpElem "time_stamp" xpText) where from_tuple = uncurryN Message to_tuple m = (xml_xml_file_id m, xml_heading m, xml_mid m, xml_category m, xml_sport m, xml_url m, xml_teams m, xml_locations m, xml_sms m, xml_editor m, xml_text m, xml_continue m, xml_time_stamp m) pickle_continue :: PU (Maybe String) pickle_continue = xpOption $ xpWrap (to_string, from_string) $ xpElem "continue" $ xpList (xpElem "P" xpText) where from_string :: String -> [String] from_string = split "\n" to_string :: [String] -> String to_string = join "\n" instance XmlPickler Message where xpickle = pickle_message instance DbImport News where dbimport _ xml = do runMigration defaultMigrationLogger $ do migrate (undefined :: News) migrate (undefined :: NewsTeam) migrate (undefined :: NewsLocation) migrate (undefined :: News_NewsTeam) migrate (undefined :: News_NewsLocation) let root_element = unpickleDoc xpickle xml :: Maybe Message case root_element of Nothing -> do let errmsg = "Could not unpickle News message in dbimport." return $ ImportFailed errmsg Just message -> do -- Insert the message and acquire its primary key (unique ID) news_id <- insert (from_xml message :: News) -- And insert each one into its own table. We use insertByAll -- because we know that most teams will already exist, and we -- want to get back a Left (id) for the existing team when -- there's a collision. In fact, if the insert succeeds, we'll -- get a Right (id) back, so we can disregard the Either -- constructor entirely. That's what the (either id id) does. either_nt_ids <- mapM insertByAll (xml_teams message) let nt_ids = map (either id id) either_nt_ids -- Now that the teams have been inserted, create -- news__news_team records mapping beween the two. let news_news_teams = map (News_NewsTeam news_id) nt_ids nnt_ids <- mapM insert news_news_teams -- Do all of that over again for the NewsLocations. either_loc_ids <- mapM insertByAll (xml_locations message) let loc_ids = map (either id id) either_loc_ids let news_news_locations = map (News_NewsLocation news_id) loc_ids nnl_ids <- mapM insertByAll news_news_locations return $ ImportSucceeded (1 + -- for the News (length nt_ids) + (length loc_ids) + (length nnt_ids) + (length nnl_ids)) -- * Tasty Tests news_tests :: TestTree news_tests = testGroup "News tests" [ test_news_fields_have_correct_names, test_pickle_of_unpickle_is_identity, test_unpickle_succeeds ] test_news_fields_have_correct_names :: TestTree test_news_fields_have_correct_names = testCase "news fields get correct database names" $ mapM_ check (zip actual expected) where -- This is cool, it uses the (derived) Data instance of -- News.News to get its constructor names. field_names :: [String] field_names = constrFields . head $ dataTypeConstrs $ dataTypeOf (undefined :: News) expected :: [String] expected = map (\x -> tsn_db_field_namer "herp" "derp" 8675309 x 90210) field_names actual :: [String] actual = ["mid", "sport", "url", "sms", "editor", "text", "continue"] check (x,y) = (x @?= y) -- | Warning, succeess of this test does not mean that unpickling -- succeeded. test_pickle_of_unpickle_is_identity :: TestTree test_pickle_of_unpickle_is_identity = testGroup "pickle-unpickle tests" [ check "pickle composed with unpickle is the identity" "test/xml/newsxml.xml", check "pickle composed with unpickle is the identity (with Editor)" "test/xml/newsxml-with-editor.xml" ] where check desc path = testCase desc $ do (expected :: [Message], actual) <- pickle_unpickle "message" path actual @?= expected test_unpickle_succeeds :: TestTree test_unpickle_succeeds = testGroup "unpickle tests" [ check "unpickling succeeds" "test/xml/newsxml.xml", check "unpickling succeeds (with Editor)" "test/xml/newsxml-with-editor.xml" ] where check desc path = testCase desc $ do actual <- unpickleable path pickle_message let expected = True actual @?= expected