{-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TypeFamilies #-} -- | Parse TSN XML for the DTD "newsxml.dtd". Each document contains a -- root element \ that contains an entire news item. -- module TSN.XML.News ( pickle_message, -- * Tests news_tests, -- * WARNING: these are private but exported to silence warnings News_NewsLocationConstructor(..), News_NewsTeamConstructor(..), NewsConstructor(..), NewsLocationConstructor(..), NewsTeamConstructor(..) ) where -- System imports. import Data.Data ( Data, constrFields, dataTypeConstrs, dataTypeOf ) import Data.List.Utils ( join, split ) import Data.Tuple.Curry ( uncurryN ) import Data.Typeable ( Typeable ) import Database.Groundhog ( insert_, migrate ) import Database.Groundhog.Core ( DefaultKey ) import Database.Groundhog.TH ( defaultCodegenConfig, groundhog, mkPersist ) import Test.Tasty ( TestTree, testGroup ) import Test.Tasty.HUnit ( (@?=), testCase ) import Text.XML.HXT.Core ( PU, xp13Tuple, xpAttr, xpElem, xpInt, xpList, xpOption, xpPair, xpText, xpTriple, xpWrap ) -- Local imports. import TSN.Codegen ( tsn_codegen_config, tsn_db_field_namer ) -- Used in a test import TSN.DbImport ( DbImport(..), ImportResult(..), run_dbmigrate ) import TSN.XmlImport ( XmlImport(..) ) import Xml ( FromXml(..), pickle_unpickle, unpickleable ) -- | The database type for teams as they show up in the news. -- data NewsTeam = NewsTeam { team_name :: String } deriving (Eq, Show) -- | This is needed to define the XmlImport instance for NewsTeam; it -- basically says that the DB representation is the same as the XML -- representation. -- instance FromXml NewsTeam where type Db NewsTeam = NewsTeam from_xml = id -- | Allow us to call 'insert_xml' on the XML representation of -- NewsTeams. -- instance XmlImport NewsTeam -- | Mapping between News records and NewsTeam records in the -- database. We don't name the fields because we don't use the names -- explicitly; that means we have to give them nice database names -- via groundhog. -- data News_NewsTeam = News_NewsTeam (DefaultKey News) (DefaultKey NewsTeam) -- | The database type for locations as they show up in the news. -- data NewsLocation = NewsLocation { city :: Maybe String, state :: Maybe String, country :: String } deriving (Eq, Show) -- | This is needed to define the XmlImport instance for NewsLocation; it -- basically says that the DB representation is the same as the XML -- representation. -- instance FromXml NewsLocation where type Db NewsLocation = NewsLocation from_xml = id -- | Allow us to call 'insert_xml' on the XML representation of -- NewsLocations. -- instance XmlImport NewsLocation -- | Mapping between News records and NewsLocation records in the -- database. We don't name the fields because we don't use the names -- explicitly; that means we have to give them nice database names -- via groundhog. -- data News_NewsLocation = News_NewsLocation (DefaultKey News) (DefaultKey NewsLocation) -- | The msg_id child of contains an event_id attribute; we -- embed it into the 'News' type. We (pointlessly) use the "db_" -- prefix here so that the two names don't collide on "id" when -- Groundhog is creating its fields using our field namer. data MsgId = MsgId { db_msg_id :: Int, db_event_id :: Maybe Int } deriving (Data, Eq, Show, Typeable) -- | The XML representation of a news item (message). -- data Message = Message { xml_xml_file_id :: Int, xml_heading :: String, xml_mid :: MsgId, xml_category :: String, xml_sport :: String, xml_url :: Maybe String, xml_teams :: [NewsTeam], xml_locations :: [NewsLocation], xml_sms :: String, xml_editor :: Maybe String, xml_text :: Maybe String, -- Text and continue seem to show up in pairs, xml_continue :: Maybe String, -- either both present or both missing. xml_time_stamp :: String } deriving (Eq, Show) -- | The database representation of a news item. We drop several -- uninteresting fields from 'Message', and omit the list fields which -- will be represented as join tables. -- data News = News { db_mid :: MsgId, db_sport :: String, db_url :: Maybe String, db_sms :: String, db_editor :: Maybe String, db_text :: Maybe String, db_continue :: Maybe String } deriving (Data, Eq, Show, Typeable) -- | Convert the XML representation 'Message' to the database -- representation 'News'. -- instance FromXml Message where type Db Message = News -- | We use a record wildcard so GHC doesn't complain that we never -- used the field names. -- from_xml Message{..} = News { db_mid = xml_mid, db_sport = xml_sport, db_url = xml_url, db_sms = xml_sms, db_editor = xml_editor, db_text = xml_text, db_continue = xml_continue } -- | This lets us call 'insert_xml' on a 'Message'. -- instance XmlImport Message -- | Define 'dbmigrate' and 'dbimport' for 'Message's. The import is -- slightly non-generic because of our 'News_NewsTeam' and -- 'News_NewsLocation' join tables. -- instance DbImport Message where dbmigrate _ = run_dbmigrate $ do migrate (undefined :: NewsTeam) migrate (undefined :: NewsLocation) migrate (undefined :: News) migrate (undefined :: News_NewsTeam) migrate (undefined :: News_NewsLocation) dbimport message = do -- Insert the message and acquire its primary key (unique ID) news_id <- insert_xml message -- And insert each one into its own table. We use insert_xml_or_select -- because we know that most teams will already exist, and we -- want to get back the id for the existing team when -- there's a collision. nt_ids <- mapM insert_xml_or_select (xml_teams message) -- Now that the teams have been inserted, create -- news__news_team records mapping beween the two. let news_news_teams = map (News_NewsTeam news_id) nt_ids mapM_ insert_ news_news_teams -- Do all of that over again for the NewsLocations. loc_ids <- mapM insert_xml_or_select (xml_locations message) let news_news_locations = map (News_NewsLocation news_id) loc_ids mapM_ insert_ news_news_locations return ImportSucceeded -- These types don't have special XML representations or field name -- collisions so we use the defaultCodegenConfig and give their -- fields nice simple names. mkPersist defaultCodegenConfig [groundhog| - entity: NewsTeam dbName: news_teams constructors: - name: NewsTeam uniques: - name: unique_news_team type: constraint fields: [team_name] - entity: NewsLocation dbName: news_locations constructors: - name: NewsLocation uniques: - name: unique_news_location type: constraint fields: [city, state, country] |] -- These types have fields with e.g. db_ and xml_ prefixes, so we -- use our own codegen to peel those off before naming the columns. mkPersist tsn_codegen_config [groundhog| - entity: News dbName: news constructors: - name: News fields: - name: db_mid embeddedType: - {name: msg_id, dbName: msg_id} - {name: event_id, dbName: event_id} - embedded: MsgId fields: - name: db_msg_id dbName: msg_id - name: db_event_id dbName: event_id - entity: News_NewsTeam dbName: news__news_teams constructors: - name: News_NewsTeam fields: - name: news_NewsTeam0 # Default created by mkNormalFieldName dbName: news_id - name: news_NewsTeam1 # Default created by mkNormalFieldName dbName: news_teams_id - entity: News_NewsLocation dbName: news__news_locations constructors: - name: News_NewsLocation fields: - name: news_NewsLocation0 # Default created by mkNormalFieldName dbName: news_id - name: news_NewsLocation1 # Default created by mkNormalFieldName dbName: news_locations_id |] -- | Convert a 'NewsTeam' to/from XML. -- pickle_news_team :: PU NewsTeam pickle_news_team = xpElem "team" $ xpWrap (from_string, to_string) xpText where to_string :: NewsTeam -> String to_string = team_name from_string :: String -> NewsTeam from_string = NewsTeam -- | Convert a 'MsgId' to/from XML. -- pickle_msg_id :: PU MsgId pickle_msg_id = xpElem "msg_id" $ xpWrap (from_tuple, to_tuple) $ xpPair xpInt (xpAttr "EventId" (xpOption xpInt)) where from_tuple = uncurryN MsgId to_tuple m = (db_msg_id m, db_event_id m) -- | Convert a 'NewsLocation' to/from XML. -- pickle_location :: PU NewsLocation pickle_location = xpElem "location" $ xpWrap (from_tuple, to_tuple) $ xpTriple (xpOption (xpElem "city" xpText)) (xpOption (xpElem "state" xpText)) (xpElem "country" xpText) where from_tuple = uncurryN NewsLocation to_tuple l = (city l, state l, country l) -- | Convert a 'Message' to/from XML. -- pickle_message :: PU Message pickle_message = xpElem "message" $ xpWrap (from_tuple, to_tuple) $ xp13Tuple (xpElem "XML_File_ID" xpInt) (xpElem "heading" xpText) pickle_msg_id (xpElem "category" xpText) (xpElem "sport" xpText) (xpElem "url" $ xpOption xpText) (xpList pickle_news_team) (xpList pickle_location) (xpElem "SMS" xpText) (xpOption (xpElem "Editor" xpText)) (xpOption (xpElem "text" xpText)) pickle_continue (xpElem "time_stamp" xpText) where from_tuple = uncurryN Message to_tuple m = (xml_xml_file_id m, -- Verbose, xml_heading m, -- but xml_mid m, -- eliminates xml_category m, -- GHC xml_sport m, -- warnings xml_url m, -- . xml_teams m, -- . xml_locations m, -- . xml_sms m, xml_editor m, xml_text m, xml_continue m, xml_time_stamp m) -- | We combine all of the \ elements into one 'String' -- while unpickling and do the reverse while pickling. -- pickle_continue :: PU (Maybe String) pickle_continue = xpOption $ xpWrap (to_string, from_string) $ xpElem "continue" $ xpList (xpElem "P" xpText) where from_string :: String -> [String] from_string = split "\n" to_string :: [String] -> String to_string = join "\n" -- -- Tasty Tests -- -- | A list of all tests for this module. -- news_tests :: TestTree news_tests = testGroup "News tests" [ test_news_fields_have_correct_names, test_pickle_of_unpickle_is_identity, test_unpickle_succeeds ] -- | Make sure our codegen is producing the correct database names. -- test_news_fields_have_correct_names :: TestTree test_news_fields_have_correct_names = testCase "news fields get correct database names" $ mapM_ check (zip actual expected) where -- This is cool, it uses the (derived) Data instance of -- News.News to get its constructor names. field_names :: [String] field_names = constrFields . head $ dataTypeConstrs $ dataTypeOf (undefined :: News) expected :: [String] expected = map (\x -> tsn_db_field_namer "herp" "derp" 8675309 x 90210) field_names actual :: [String] actual = ["mid", "sport", "url", "sms", "editor", "text", "continue"] check (x,y) = (x @?= y) -- | If we unpickle something and then pickle it, we should wind up -- with the same thing we started with. WARNING: success of this -- test does not mean that unpickling succeeded. -- test_pickle_of_unpickle_is_identity :: TestTree test_pickle_of_unpickle_is_identity = testGroup "pickle-unpickle tests" [ check "pickle composed with unpickle is the identity" "test/xml/newsxml.xml", check "pickle composed with unpickle is the identity (with Editor)" "test/xml/newsxml-with-editor.xml" ] where check desc path = testCase desc $ do (expected, actual) <- pickle_unpickle pickle_message path actual @?= expected -- | Make sure we can actually unpickle these things. -- test_unpickle_succeeds :: TestTree test_unpickle_succeeds = testGroup "unpickle tests" [ check "unpickling succeeds" "test/xml/newsxml.xml", check "unpickling succeeds (with Editor)" "test/xml/newsxml-with-editor.xml" ] where check desc path = testCase desc $ do actual <- unpickleable path pickle_message let expected = True actual @?= expected