X-Git-Url: http://gitweb.michael.orlitzky.com/?a=blobdiff_plain;f=src%2FTSN%2FXML%2FNews.hs;h=10c51956fa20c23a16ff45b3469bc477aac078a7;hb=9b28d5ed0e1570f0890eae1ee01721688c20e266;hp=3f9fef5375b58ec440eed4c041985871ed0d6300;hpb=9a8a222bae7eb08001a99bb693dd20ed8a2693a1;p=dead%2Fhtsn-import.git diff --git a/src/TSN/XML/News.hs b/src/TSN/XML/News.hs index 3f9fef5..10c5195 100644 --- a/src/TSN/XML/News.hs +++ b/src/TSN/XML/News.hs @@ -11,6 +11,7 @@ -- module TSN.XML.News ( dtd, + has_only_single_sms, pickle_message, -- * Tests news_tests, @@ -45,6 +46,16 @@ import Test.Tasty ( TestTree, testGroup ) import Test.Tasty.HUnit ( (@?=), testCase ) import Text.XML.HXT.Core ( PU, + XmlTree, + (/>), + (>>>), + addNav, + descendantAxis, + filterAxis, + followingSiblingAxis, + hasName, + remNav, + runLA, xp13Tuple, xpAttr, xpElem, @@ -69,6 +80,7 @@ import Xml ( ToDb(..), pickle_unpickle, unpickleable, + unsafe_read_document, unsafe_unpickle ) @@ -202,8 +214,37 @@ data News_Location = News_Location + +-- | Some newsxml documents contain two \ elements in a row, +-- violating the DTD. The second one has always been empty, but it's +-- irrelevant: we can't parse these, and would like to detect them +-- in order to report the fact that the busted document is +-- unsupported. +-- +-- This function detects whether two \ elements appear in a +-- row, as siblings. +-- +has_only_single_sms :: XmlTree -> Bool +has_only_single_sms xmltree = + case elements of + [] -> True + _ -> False + where + parse :: XmlTree -> [XmlTree] + parse = runLA $ hasName "/" + /> hasName "message" + >>> addNav + >>> descendantAxis + >>> filterAxis (hasName "SMS") + >>> followingSiblingAxis + >>> remNav + >>> hasName "SMS" + + elements = parse xmltree + + -- --- Database code +-- * Database code -- -- | Define 'dbmigrate' and 'dbimport' for 'Message's. The import is @@ -407,7 +448,8 @@ news_tests = [ test_news_fields_have_correct_names, test_on_delete_cascade, test_pickle_of_unpickle_is_identity, - test_unpickle_succeeds ] + test_unpickle_succeeds, + test_sms_detected_correctly ] -- | Make sure our codegen is producing the correct database names. @@ -506,3 +548,22 @@ test_on_delete_cascade = testGroup "cascading delete tests" count_e <- countAll e return $ count_a + count_b + count_c + count_d + count_e actual @?= expected + + +-- | We want to make sure the single-SMS documents and the multi-SMS +-- documents are identified correctly. +-- +test_sms_detected_correctly :: TestTree +test_sms_detected_correctly = + testGroup "newsxml SMS count determined correctly" + [ check "test/xml/newsxml.xml" + "single SMS detected correctly" + True, + check "test/xml/newsxml-multiple-sms.xml" + "multiple SMS detected correctly" + False ] + where + check path desc expected = testCase desc $ do + xmltree <- unsafe_read_document path + let actual = has_only_single_sms xmltree + actual @?= expected