X-Git-Url: http://gitweb.michael.orlitzky.com/?a=blobdiff_plain;f=src%2FTSN%2FXML%2FNews.hs;h=10c51956fa20c23a16ff45b3469bc477aac078a7;hb=9b28d5ed0e1570f0890eae1ee01721688c20e266;hp=a36ff7edec1abcbf4a2b862853071c5f2d6a205f;hpb=d88388235ec3df702f80e70d50898850befef6ed;p=dead%2Fhtsn-import.git diff --git a/src/TSN/XML/News.hs b/src/TSN/XML/News.hs index a36ff7e..10c5195 100644 --- a/src/TSN/XML/News.hs +++ b/src/TSN/XML/News.hs @@ -11,6 +11,7 @@ -- module TSN.XML.News ( dtd, + has_only_single_sms, pickle_message, -- * Tests news_tests, @@ -45,6 +46,16 @@ import Test.Tasty ( TestTree, testGroup ) import Test.Tasty.HUnit ( (@?=), testCase ) import Text.XML.HXT.Core ( PU, + XmlTree, + (/>), + (>>>), + addNav, + descendantAxis, + filterAxis, + followingSiblingAxis, + hasName, + remNav, + runLA, xp13Tuple, xpAttr, xpElem, @@ -53,7 +64,6 @@ import Text.XML.HXT.Core ( xpOption, xpPair, xpText, - xpTriple, xpWrap ) -- Local imports. @@ -63,13 +73,14 @@ import TSN.Codegen ( import TSN.Database ( insert_or_select ) import TSN.DbImport ( DbImport(..), ImportResult(..), run_dbmigrate ) import TSN.Picklers ( xp_time_stamp ) -import TSN.Location ( Location(..) ) +import TSN.Location ( Location(..), pickle_location ) import TSN.XmlImport ( XmlImport(..) ) import Xml ( FromXml(..), ToDb(..), pickle_unpickle, unpickleable, + unsafe_read_document, unsafe_unpickle ) @@ -203,8 +214,37 @@ data News_Location = News_Location + +-- | Some newsxml documents contain two \ elements in a row, +-- violating the DTD. The second one has always been empty, but it's +-- irrelevant: we can't parse these, and would like to detect them +-- in order to report the fact that the busted document is +-- unsupported. +-- +-- This function detects whether two \ elements appear in a +-- row, as siblings. +-- +has_only_single_sms :: XmlTree -> Bool +has_only_single_sms xmltree = + case elements of + [] -> True + _ -> False + where + parse :: XmlTree -> [XmlTree] + parse = runLA $ hasName "/" + /> hasName "message" + >>> addNav + >>> descendantAxis + >>> filterAxis (hasName "SMS") + >>> followingSiblingAxis + >>> remNav + >>> hasName "SMS" + + elements = parse xmltree + + -- --- Database code +-- * Database code -- -- | Define 'dbmigrate' and 'dbimport' for 'Message's. The import is @@ -342,20 +382,6 @@ pickle_msg_id = to_tuple m = (db_msg_id m, db_event_id m) --- | Convert a 'Location' to/from XML. --- -pickle_location :: PU Location -pickle_location = - xpElem "location" $ - xpWrap (from_tuple, to_tuple) $ - xpTriple (xpOption (xpElem "city" xpText)) - (xpOption (xpElem "state" xpText)) - (xpElem "country" xpText) - where - from_tuple = - uncurryN Location - to_tuple l = (city l, state l, country l) - -- | Convert a 'Message' to/from XML. -- @@ -422,7 +448,8 @@ news_tests = [ test_news_fields_have_correct_names, test_on_delete_cascade, test_pickle_of_unpickle_is_identity, - test_unpickle_succeeds ] + test_unpickle_succeeds, + test_sms_detected_correctly ] -- | Make sure our codegen is producing the correct database names. @@ -521,3 +548,22 @@ test_on_delete_cascade = testGroup "cascading delete tests" count_e <- countAll e return $ count_a + count_b + count_c + count_d + count_e actual @?= expected + + +-- | We want to make sure the single-SMS documents and the multi-SMS +-- documents are identified correctly. +-- +test_sms_detected_correctly :: TestTree +test_sms_detected_correctly = + testGroup "newsxml SMS count determined correctly" + [ check "test/xml/newsxml.xml" + "single SMS detected correctly" + True, + check "test/xml/newsxml-multiple-sms.xml" + "multiple SMS detected correctly" + False ] + where + check path desc expected = testCase desc $ do + xmltree <- unsafe_read_document path + let actual = has_only_single_sms xmltree + actual @?= expected