X-Git-Url: http://gitweb.michael.orlitzky.com/?a=blobdiff_plain;f=src%2FTSN%2FXML%2FNews.hs;h=130323cc94544b23081e887b6c551273e71c973d;hb=7815ba497d075c63c76418fc2c2b914ebe56b712;hp=565c7a52fb038b10e377b651bee1b58c73a49777;hpb=6883632cfac0e3ee7ad6781300555dbf40d98b40;p=dead%2Fhtsn-import.git diff --git a/src/TSN/XML/News.hs b/src/TSN/XML/News.hs index 565c7a5..130323c 100644 --- a/src/TSN/XML/News.hs +++ b/src/TSN/XML/News.hs @@ -1,10 +1,8 @@ -{-# LANGUAGE BangPatterns #-} {-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE RecordWildCards #-} -{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TypeFamilies #-} @@ -13,30 +11,35 @@ -- root element \ that contains an entire news item. -- module TSN.XML.News ( - Message, - news_tests ) + pickle_message, + -- * Tests + news_tests, + -- * WARNING: these are private but exported to silence warnings + News_NewsLocationConstructor(..), + News_NewsTeamConstructor(..), + NewsConstructor(..), + NewsLocationConstructor(..), + NewsTeamConstructor(..) ) where +-- System imports. import Data.Data ( Data, constrFields, dataTypeConstrs, dataTypeOf ) +import Data.Time.Clock ( UTCTime ) import Data.List.Utils ( join, split ) import Data.Tuple.Curry ( uncurryN ) import Data.Typeable ( Typeable ) import Database.Groundhog ( - defaultMigrationLogger, - insert, - migrate, - runMigration ) + insert_, + migrate ) import Database.Groundhog.Core ( DefaultKey ) import Database.Groundhog.TH ( + defaultCodegenConfig, groundhog, mkPersist ) -import System.Console.CmdArgs.Default ( Default(..) ) import Test.Tasty ( TestTree, testGroup ) import Test.Tasty.HUnit ( (@?=), testCase ) import Text.XML.HXT.Core ( PU, - XmlPickler(..), - unpickleDoc, xp13Tuple, xpAttr, xpElem, @@ -48,89 +51,89 @@ import Text.XML.HXT.Core ( xpTriple, xpWrap ) +-- Local imports. import TSN.Codegen ( tsn_codegen_config, tsn_db_field_namer ) -- Used in a test -import TSN.DbImport ( DbImport(..), ImportResult(..) ) -import Xml ( ToFromXml(..), pickle_unpickle, unpickleable ) +import TSN.DbImport ( DbImport(..), ImportResult(..), run_dbmigrate ) +import TSN.Picklers ( xp_time_stamp ) +import TSN.XmlImport ( XmlImport(..) ) +import Xml ( FromXml(..), ToDb(..), pickle_unpickle, unpickleable ) --- | The database type for teams as they show up in the news. We need --- this separate from its XML representation because of the --- DefaultKey pointing to a message. We don't know how to create one --- of those unless we've just inserted a message into the database, --- so it screws up pickling. +-- | The database type for teams as they show up in the news. +-- data NewsTeam = - NewsTeam { - nt_news_id :: DefaultKey Message, -- ^ foreign key. - db_team_name :: String } -deriving instance Eq NewsTeam -- Standalone instances necessary for -deriving instance Show NewsTeam -- Groundhog types with DefaultKeys - --- | The XML type for teams as they show up in the news. See --- 'NewsTeam' for why there are two types. -data NewsTeamXml = - NewsTeamXml { - xml_team_name :: String } + NewsTeam { team_name :: String } deriving (Eq, Show) --- | Specify how to convert between the two representations NewsTeam --- (database) and NewsTeamXml (XML). -instance ToFromXml NewsTeam where - type Xml NewsTeam = NewsTeamXml - type Container NewsTeam = Message - -- Use a record wildcard here so GHC doesn't complain that we never - -- used our named fields. - to_xml (NewsTeam {..}) = NewsTeamXml db_team_name - -- We can't create a DefaultKey Message... - from_xml = error "Called from_xml on a NewsTeam." - -- unless we're handed one. - from_xml_fk key = (NewsTeam key) . xml_team_name - - --- | The database type for locations as they show up in the news. We --- need this separate from its XML representation because of the --- DefaultKey pointing to a message. We don't know how to create one --- of those unless we've just inserted a message into the database, --- so it screws up pickling. + +instance ToDb NewsTeam where + -- | The database representaion of a 'NewsTeam' is itself. + type Db NewsTeam = NewsTeam + +-- | This is needed to define the XmlImport instance for NewsTeam. +-- +instance FromXml NewsTeam where + -- | How to we get a 'NewsTeam' from itself? + from_xml = id + +-- | Allow us to call 'insert_xml' on the XML representation of +-- NewsTeams. +-- +instance XmlImport NewsTeam + + +-- | Mapping between News records and NewsTeam records in the +-- database. We don't name the fields because we don't use the names +-- explicitly; that means we have to give them nice database names +-- via groundhog. +-- +data News_NewsTeam = News_NewsTeam + (DefaultKey News) + (DefaultKey NewsTeam) + + +-- | The database type for locations as they show up in the news. +-- data NewsLocation = NewsLocation { - loc_news_id :: DefaultKey Message, -- ^ foreign key. - db_city ::String, - db_state :: String, - db_country :: String } -deriving instance Eq NewsLocation -- Standalone instances necessary for -deriving instance Show NewsLocation -- Groundhog types with DefaultKeys - --- | The XML type for locations as they show up in the news. See --- 'NewsLocation' for why there are two types. -data NewsLocationXml = - NewsLocationXml { - xml_city :: String, - xml_state :: String, - xml_country :: String } + city :: Maybe String, + state :: Maybe String, + country :: String } deriving (Eq, Show) +instance ToDb NewsLocation where + -- | The database representation of a 'NewsLocation' is itself. + type Db NewsLocation = NewsLocation + +-- | This is needed to define the XmlImport instance for NewsLocation. +-- +instance FromXml NewsLocation where + -- | How to we get a 'NewsLocation' from itself? + from_xml = id --- | Specify how to convert between the two representations --- NewsLocation (database) and NewsLocationXml (XML). -instance ToFromXml NewsLocation where - type Xml NewsLocation = NewsLocationXml - type Container NewsLocation = Message - -- Use a record wildcard here so GHC doesn't complain that we never - -- used our named fields. - to_xml (NewsLocation {..}) = NewsLocationXml db_city db_state db_country - -- We can't create a DefaultKey Message... - from_xml = error "Called from_xml on a NewsLocation." - -- unless we're given one. - from_xml_fk key (NewsLocationXml x y z) = NewsLocation key x y z +-- | Allow us to call 'insert_xml' on the XML representation of +-- NewsLocations. +-- +instance XmlImport NewsLocation + + +-- | Mapping between News records and NewsLocation records in the +-- database. We don't name the fields because we don't use the names +-- explicitly; that means we have to give them nice database names +-- via groundhog. +-- +data News_NewsLocation = News_NewsLocation + (DefaultKey News) + (DefaultKey NewsLocation) -- | The msg_id child of contains an event_id attribute; we --- embed it into the 'Message' type. We (pointlessly) use the "db_" --- prefix here so that the two names collide on "id" when Groundhog --- is creating its fields using our field namer. +-- embed it into the 'News' type. We (pointlessly) use the "db_" +-- prefix here so that the two names don't collide on "id" when +-- Groundhog is creating its fields using our field namer. data MsgId = MsgId { db_msg_id :: Int, @@ -138,73 +141,135 @@ data MsgId = deriving (Data, Eq, Show, Typeable) -data MessageXml = - MessageXml { +-- | The XML representation of a news item (message). +-- +data Message = + Message { xml_xml_file_id :: Int, xml_heading :: String, xml_mid :: MsgId, xml_category :: String, xml_sport :: String, - xml_url :: String, - xml_teams :: [NewsTeamXml], - xml_locations :: [NewsLocationXml], + xml_url :: Maybe String, + xml_teams :: [NewsTeam], + xml_locations :: [NewsLocation], xml_sms :: String, xml_editor :: Maybe String, - xml_text :: String, - xml_continue :: String, - xml_time_stamp :: String } + xml_text :: Maybe String, -- Text and continue seem to show up in pairs, + xml_continue :: Maybe String, -- either both present or both missing. + xml_time_stamp :: UTCTime } deriving (Eq, Show) -data Message = - Message { + +-- | The database representation of a news item. We drop several +-- uninteresting fields from 'Message', and omit the list fields which +-- will be represented as join tables. +-- +data News = + News { db_mid :: MsgId, db_sport :: String, - db_url :: String, + db_url :: Maybe String, db_sms :: String, db_editor :: Maybe String, - db_text :: String, - db_continue :: String } + db_text :: Maybe String, + db_continue :: Maybe String, + db_time_stamp :: UTCTime } deriving (Data, Eq, Show, Typeable) -instance ToFromXml Message where - type Xml Message = MessageXml - type Container Message = () - - -- Use a record wildcard here so GHC doesn't complain that we never - -- used our named fields. - to_xml (Message {..}) = - MessageXml - def - def - db_mid - def - db_sport - db_url - def - def - db_sms - db_editor - db_text - db_continue - def - - -- We don't need the key argument (from_xml_fk) since the XML type - -- contains more information in this case. - from_xml (MessageXml _ _ c _ e f _ _ i j k l _) = - Message c e f i j k l -mkPersist tsn_codegen_config [groundhog| +instance ToDb Message where + type Db Message = News + +-- | Convert the XML representation 'Message' to the database +-- representation 'News'. +-- +instance FromXml Message where + -- | We use a record wildcard so GHC doesn't complain that we never + -- used the field names. + -- + from_xml Message{..} = News { db_mid = xml_mid, + db_sport = xml_sport, + db_url = xml_url, + db_sms = xml_sms, + db_editor = xml_editor, + db_text = xml_text, + db_continue = xml_continue, + db_time_stamp = xml_time_stamp } + +-- | This lets us call 'insert_xml' on a 'Message'. +-- +instance XmlImport Message + +-- | Define 'dbmigrate' and 'dbimport' for 'Message's. The import is +-- slightly non-generic because of our 'News_NewsTeam' and +-- 'News_NewsLocation' join tables. +-- +instance DbImport Message where + dbmigrate _ = + run_dbmigrate $ do + migrate (undefined :: NewsTeam) + migrate (undefined :: NewsLocation) + migrate (undefined :: News) + migrate (undefined :: News_NewsTeam) + migrate (undefined :: News_NewsLocation) + + dbimport message = do + -- Insert the message and acquire its primary key (unique ID) + news_id <- insert_xml message + + -- And insert each one into its own table. We use insert_xml_or_select + -- because we know that most teams will already exist, and we + -- want to get back the id for the existing team when + -- there's a collision. + nt_ids <- mapM insert_xml_or_select (xml_teams message) + + -- Now that the teams have been inserted, create + -- news__news_team records mapping beween the two. + let news_news_teams = map (News_NewsTeam news_id) nt_ids + mapM_ insert_ news_news_teams + + -- Do all of that over again for the NewsLocations. + loc_ids <- mapM insert_xml_or_select (xml_locations message) + let news_news_locations = map (News_NewsLocation news_id) loc_ids + mapM_ insert_ news_news_locations + + return ImportSucceeded + + +-- These types don't have special XML representations or field name +-- collisions so we use the defaultCodegenConfig and give their +-- fields nice simple names. +mkPersist defaultCodegenConfig [groundhog| - entity: NewsTeam dbName: news_teams + constructors: + - name: NewsTeam + uniques: + - name: unique_news_team + type: constraint + fields: [team_name] - entity: NewsLocation dbName: news_locations + constructors: + - name: NewsLocation + uniques: + - name: unique_news_location + type: constraint + fields: [city, state, country] -- entity: Message +|] + + +-- These types have fields with e.g. db_ and xml_ prefixes, so we +-- use our own codegen to peel those off before naming the columns. +mkPersist tsn_codegen_config [groundhog| +- entity: News dbName: news constructors: - - name: Message + - name: News fields: - name: db_mid embeddedType: @@ -216,22 +281,54 @@ mkPersist tsn_codegen_config [groundhog| dbName: msg_id - name: db_event_id dbName: event_id + + +- entity: News_NewsTeam + dbName: news__news_teams + constructors: + - name: News_NewsTeam + fields: + - name: news_NewsTeam0 # Default created by mkNormalFieldName + dbName: news_id + reference: + onDelete: cascade + - name: news_NewsTeam1 # Default created by mkNormalFieldName + dbName: news_teams_id + reference: + onDelete: cascade + +- entity: News_NewsLocation + dbName: news__news_locations + constructors: + - name: News_NewsLocation + fields: + - name: news_NewsLocation0 # Default created by mkNormalFieldName + dbName: news_id + reference: + onDelete: cascade + - name: news_NewsLocation1 # Default created by mkNormalFieldName + dbName: news_locations_id + reference: + onDelete: cascade |] -pickle_news_team :: PU NewsTeamXml + +-- | Convert a 'NewsTeam' to/from XML. +-- +pickle_news_team :: PU NewsTeam pickle_news_team = xpElem "team" $ xpWrap (from_string, to_string) xpText where - to_string :: NewsTeamXml -> String - to_string = xml_team_name + to_string :: NewsTeam -> String + to_string = team_name - from_string :: String -> NewsTeamXml - from_string = NewsTeamXml + from_string :: String -> NewsTeam + from_string = NewsTeam -instance XmlPickler NewsTeamXml where - xpickle = pickle_news_team +-- | Convert a 'MsgId' to/from XML. +-- pickle_msg_id :: PU MsgId pickle_msg_id = xpElem "msg_id" $ @@ -241,26 +338,25 @@ pickle_msg_id = from_tuple = uncurryN MsgId to_tuple m = (db_msg_id m, db_event_id m) -instance XmlPickler MsgId where - xpickle = pickle_msg_id -pickle_location :: PU NewsLocationXml +-- | Convert a 'NewsLocation' to/from XML. +-- +pickle_location :: PU NewsLocation pickle_location = xpElem "location" $ xpWrap (from_tuple, to_tuple) $ - xpTriple (xpElem "city" xpText) - (xpElem "state" xpText) + xpTriple (xpOption (xpElem "city" xpText)) + (xpOption (xpElem "state" xpText)) (xpElem "country" xpText) where from_tuple = - uncurryN NewsLocationXml - to_tuple l = (xml_city l, xml_state l, xml_country l) + uncurryN NewsLocation + to_tuple l = (city l, state l, country l) -instance XmlPickler NewsLocationXml where - xpickle = pickle_location - -pickle_message :: PU MessageXml +-- | Convert a 'Message' to/from XML. +-- +pickle_message :: PU Message pickle_message = xpElem "message" $ xpWrap (from_tuple, to_tuple) $ @@ -269,35 +365,39 @@ pickle_message = pickle_msg_id (xpElem "category" xpText) (xpElem "sport" xpText) - (xpElem "url" xpText) + (xpElem "url" $ xpOption xpText) (xpList pickle_news_team) (xpList pickle_location) (xpElem "SMS" xpText) (xpOption (xpElem "Editor" xpText)) - (xpElem "text" xpText) + (xpOption (xpElem "text" xpText)) pickle_continue - (xpElem "time_stamp" xpText) + (xpElem "time_stamp" xp_time_stamp) where - from_tuple = uncurryN MessageXml - to_tuple m = (xml_xml_file_id m, - xml_heading m, - xml_mid m, - xml_category m, - xml_sport m, - xml_url m, - xml_teams m, - xml_locations m, + from_tuple = uncurryN Message + to_tuple m = (xml_xml_file_id m, -- Verbose, + xml_heading m, -- but + xml_mid m, -- eliminates + xml_category m, -- GHC + xml_sport m, -- warnings + xml_url m, -- . + xml_teams m, -- . + xml_locations m, -- . xml_sms m, xml_editor m, xml_text m, xml_continue m, xml_time_stamp m) - pickle_continue :: PU String + -- | We combine all of the \ elements into one 'String' + -- while unpickling and do the reverse while pickling. + -- + pickle_continue :: PU (Maybe String) pickle_continue = - xpWrap (to_string, from_string) $ - xpElem "continue" $ - xpList (xpElem "P" xpText) + xpOption $ + xpWrap (to_string, from_string) $ + xpElem "continue" $ + xpList (xpElem "P" xpText) where from_string :: String -> [String] from_string = split "\n" @@ -305,35 +405,13 @@ pickle_message = to_string :: [String] -> String to_string = join "\n" -instance XmlPickler MessageXml where - xpickle = pickle_message - +-- +-- Tasty Tests +-- -instance DbImport Message where - dbimport _ xml = do - runMigration defaultMigrationLogger $ do - migrate (undefined :: Message) - migrate (undefined :: NewsTeam) - migrate (undefined :: NewsLocation) - let root_element = unpickleDoc xpickle xml :: Maybe MessageXml - case root_element of - Nothing -> do - let errmsg = "Could not unpickle News message in dbimport." - return $ ImportFailed errmsg - Just message -> do - news_id <- insert (from_xml message :: Message) - let nts :: [NewsTeam] = map (from_xml_fk news_id) - (xml_teams message) - let nlocs :: [NewsLocation] = map (from_xml_fk news_id) - (xml_locations message) - nt_ids <- mapM insert nts - loc_ids <- mapM insert nlocs - - return $ ImportSucceeded (1 + (length nt_ids) + (length loc_ids)) - - --- * Tasty Tests +-- | A list of all tests for this module. +-- news_tests :: TestTree news_tests = testGroup @@ -343,16 +421,18 @@ news_tests = test_unpickle_succeeds ] +-- | Make sure our codegen is producing the correct database names. +-- test_news_fields_have_correct_names :: TestTree test_news_fields_have_correct_names = testCase "news fields get correct database names" $ mapM_ check (zip actual expected) where -- This is cool, it uses the (derived) Data instance of - -- News.Message to get its constructor names. + -- News.News to get its constructor names. field_names :: [String] field_names = - constrFields . head $ dataTypeConstrs $ dataTypeOf (undefined :: Message) + constrFields . head $ dataTypeConstrs $ dataTypeOf (undefined :: News) expected :: [String] expected = @@ -364,8 +444,10 @@ test_news_fields_have_correct_names = check (x,y) = (x @?= y) --- | Warning, succeess of this test does not mean that unpickling --- succeeded. +-- | If we unpickle something and then pickle it, we should wind up +-- with the same thing we started with. WARNING: success of this +-- test does not mean that unpickling succeeded. +-- test_pickle_of_unpickle_is_identity :: TestTree test_pickle_of_unpickle_is_identity = testGroup "pickle-unpickle tests" [ check "pickle composed with unpickle is the identity" @@ -375,10 +457,12 @@ test_pickle_of_unpickle_is_identity = testGroup "pickle-unpickle tests" "test/xml/newsxml-with-editor.xml" ] where check desc path = testCase desc $ do - (expected :: [MessageXml], actual) <- pickle_unpickle "message" path + (expected, actual) <- pickle_unpickle pickle_message path actual @?= expected +-- | Make sure we can actually unpickle these things. +-- test_unpickle_succeeds :: TestTree test_unpickle_succeeds = testGroup "unpickle tests" [ check "unpickling succeeds"