X-Git-Url: http://gitweb.michael.orlitzky.com/?a=blobdiff_plain;f=src%2FTSN%2FXML%2FNews.hs;h=f401d41cb2dbb67d139395a6089c36f09ac9a23d;hb=2f6ea95c2b3e19545c09ed5ff81d90bb3b120c16;hp=a36ff7edec1abcbf4a2b862853071c5f2d6a205f;hpb=d88388235ec3df702f80e70d50898850befef6ed;p=dead%2Fhtsn-import.git diff --git a/src/TSN/XML/News.hs b/src/TSN/XML/News.hs index a36ff7e..f401d41 100644 --- a/src/TSN/XML/News.hs +++ b/src/TSN/XML/News.hs @@ -11,6 +11,7 @@ -- module TSN.XML.News ( dtd, + has_only_single_sms, pickle_message, -- * Tests news_tests, @@ -45,6 +46,16 @@ import Test.Tasty ( TestTree, testGroup ) import Test.Tasty.HUnit ( (@?=), testCase ) import Text.XML.HXT.Core ( PU, + XmlTree, + (/>), + (>>>), + addNav, + descendantAxis, + filterAxis, + followingSiblingAxis, + hasName, + remNav, + runLA, xp13Tuple, xpAttr, xpElem, @@ -53,7 +64,6 @@ import Text.XML.HXT.Core ( xpOption, xpPair, xpText, - xpTriple, xpWrap ) -- Local imports. @@ -63,13 +73,14 @@ import TSN.Codegen ( import TSN.Database ( insert_or_select ) import TSN.DbImport ( DbImport(..), ImportResult(..), run_dbmigrate ) import TSN.Picklers ( xp_time_stamp ) -import TSN.Location ( Location(..) ) +import TSN.Location ( Location(..), pickle_location ) import TSN.XmlImport ( XmlImport(..) ) import Xml ( FromXml(..), ToDb(..), pickle_unpickle, unpickleable, + unsafe_read_document, unsafe_unpickle ) @@ -109,7 +120,7 @@ data Message = xml_url :: Maybe String, xml_teams :: [NewsTeam], xml_locations :: [Location], - xml_sms :: String, + xml_sms :: Maybe String, xml_editor :: Maybe String, xml_text :: Maybe String, -- Text and continue seem to show up in pairs, xml_continue :: Maybe String, -- either both present or both missing. @@ -127,7 +138,7 @@ data News = db_mid :: MsgId, db_sport :: String, db_url :: Maybe String, - db_sms :: String, + db_sms :: Maybe String, db_editor :: Maybe String, db_text :: Maybe String, db_continue :: Maybe String, @@ -203,8 +214,37 @@ data News_Location = News_Location + +-- | Some newsxml documents contain two \ elements in a row, +-- violating the DTD. The second one has always been empty, but it's +-- irrelevant: we can't parse these, and would like to detect them +-- in order to report the fact that the busted document is +-- unsupported. -- --- Database code +-- This function detects whether two \ elements appear in a +-- row, as siblings. +-- +has_only_single_sms :: XmlTree -> Bool +has_only_single_sms xmltree = + case elements of + [] -> True + _ -> False + where + parse :: XmlTree -> [XmlTree] + parse = runLA $ hasName "/" + /> hasName "message" + >>> addNav + >>> descendantAxis + >>> filterAxis (hasName "SMS") + >>> followingSiblingAxis + >>> remNav + >>> hasName "SMS" + + elements = parse xmltree + + +-- +-- * Database code -- -- | Define 'dbmigrate' and 'dbimport' for 'Message's. The import is @@ -251,7 +291,7 @@ mkPersist defaultCodegenConfig [groundhog| constructors: - name: NewsTeam uniques: - - name: unique_news_team + - name: unique_news_teams type: constraint fields: [team_name] @@ -342,20 +382,6 @@ pickle_msg_id = to_tuple m = (db_msg_id m, db_event_id m) --- | Convert a 'Location' to/from XML. --- -pickle_location :: PU Location -pickle_location = - xpElem "location" $ - xpWrap (from_tuple, to_tuple) $ - xpTriple (xpOption (xpElem "city" xpText)) - (xpOption (xpElem "state" xpText)) - (xpElem "country" xpText) - where - from_tuple = - uncurryN Location - to_tuple l = (city l, state l, country l) - -- | Convert a 'Message' to/from XML. -- @@ -371,7 +397,7 @@ pickle_message = (xpElem "url" $ xpOption xpText) (xpList pickle_news_team) (xpList pickle_location) - (xpElem "SMS" xpText) + (xpElem "SMS" $ xpOption xpText) (xpOption (xpElem "Editor" xpText)) (xpOption (xpElem "text" xpText)) pickle_continue @@ -422,7 +448,8 @@ news_tests = [ test_news_fields_have_correct_names, test_on_delete_cascade, test_pickle_of_unpickle_is_identity, - test_unpickle_succeeds ] + test_unpickle_succeeds, + test_sms_detected_correctly ] -- | Make sure our codegen is producing the correct database names. @@ -465,7 +492,10 @@ test_pickle_of_unpickle_is_identity = testGroup "pickle-unpickle tests" "test/xml/newsxml.xml", check "pickle composed with unpickle is the identity (with Editor)" - "test/xml/newsxml-with-editor.xml" ] + "test/xml/newsxml-with-editor.xml", + + check "pickle composed with unpickle is the identity (empty SMS)" + "test/xml/newsxml-empty-sms.xml" ] where check desc path = testCase desc $ do (expected, actual) <- pickle_unpickle pickle_message path @@ -480,7 +510,10 @@ test_unpickle_succeeds = testGroup "unpickle tests" "test/xml/newsxml.xml", check "unpickling succeeds (with Editor)" - "test/xml/newsxml-with-editor.xml" ] + "test/xml/newsxml-with-editor.xml", + + check "unpickling succeeds (empty SMS)" + "test/xml/newsxml-empty-sms.xml" ] where check desc path = testCase desc $ do actual <- unpickleable path pickle_message @@ -496,6 +529,10 @@ test_on_delete_cascade = testGroup "cascading delete tests" [ check "deleting news deletes its children" "test/xml/newsxml.xml" 4 -- 2 news_teams and 2 news_locations that should remain. + , + check "deleting news deletes its children (empty SMS)" + "test/xml/newsxml-empty-sms.xml" + 4 -- 2 news_teams and 2 news_locations ] where check desc path expected = testCase desc $ do @@ -521,3 +558,22 @@ test_on_delete_cascade = testGroup "cascading delete tests" count_e <- countAll e return $ count_a + count_b + count_c + count_d + count_e actual @?= expected + + +-- | We want to make sure the single-SMS documents and the multi-SMS +-- documents are identified correctly. +-- +test_sms_detected_correctly :: TestTree +test_sms_detected_correctly = + testGroup "newsxml SMS count determined correctly" + [ check "test/xml/newsxml.xml" + "single SMS detected correctly" + True, + check "test/xml/newsxml-multiple-sms.xml" + "multiple SMS detected correctly" + False ] + where + check path desc expected = testCase desc $ do + xmltree <- unsafe_read_document path + let actual = has_only_single_sms xmltree + actual @?= expected