--
module TSN.XML.News (
dtd,
+ has_only_single_sms,
pickle_message,
-- * Tests
news_tests,
import Test.Tasty.HUnit ( (@?=), testCase )
import Text.XML.HXT.Core (
PU,
+ XmlTree,
+ (/>),
+ (>>>),
+ addNav,
+ descendantAxis,
+ filterAxis,
+ followingSiblingAxis,
+ hasName,
+ remNav,
+ runLA,
xp13Tuple,
xpAttr,
xpElem,
ToDb(..),
pickle_unpickle,
unpickleable,
+ unsafe_read_document,
unsafe_unpickle )
+
+-- | Some newsxml documents contain two \<SMS\> elements in a row,
+-- violating the DTD. The second one has always been empty, but it's
+-- irrelevant: we can't parse these, and would like to detect them
+-- in order to report the fact that the busted document is
+-- unsupported.
+--
+-- This function detects whether two \<SMS\> elements appear in a
+-- row, as siblings.
+--
+has_only_single_sms :: XmlTree -> Bool
+has_only_single_sms xmltree =
+ case elements of
+ [] -> True
+ _ -> False
+ where
+ parse :: XmlTree -> [XmlTree]
+ parse = runLA $ hasName "/"
+ /> hasName "message"
+ >>> addNav
+ >>> descendantAxis
+ >>> filterAxis (hasName "SMS")
+ >>> followingSiblingAxis
+ >>> remNav
+ >>> hasName "SMS"
+
+ elements = parse xmltree
+
+
--
--- Database code
+-- * Database code
--
-- | Define 'dbmigrate' and 'dbimport' for 'Message's. The import is
[ test_news_fields_have_correct_names,
test_on_delete_cascade,
test_pickle_of_unpickle_is_identity,
- test_unpickle_succeeds ]
+ test_unpickle_succeeds,
+ test_sms_detected_correctly ]
-- | Make sure our codegen is producing the correct database names.
count_e <- countAll e
return $ count_a + count_b + count_c + count_d + count_e
actual @?= expected
+
+
+-- | We want to make sure the single-SMS documents and the multi-SMS
+-- documents are identified correctly.
+--
+test_sms_detected_correctly :: TestTree
+test_sms_detected_correctly =
+ testGroup "newsxml SMS count determined correctly"
+ [ check "test/xml/newsxml.xml"
+ "single SMS detected correctly"
+ True,
+ check "test/xml/newsxml-multiple-sms.xml"
+ "multiple SMS detected correctly"
+ False ]
+ where
+ check path desc expected = testCase desc $ do
+ xmltree <- unsafe_read_document path
+ let actual = has_only_single_sms xmltree
+ actual @?= expected