--
module TSN.XML.News (
dtd,
+ has_only_single_sms,
pickle_message,
-- * Tests
news_tests,
import Test.Tasty.HUnit ( (@?=), testCase )
import Text.XML.HXT.Core (
PU,
+ XmlTree,
+ (/>),
+ (>>>),
+ addNav,
+ descendantAxis,
+ filterAxis,
+ followingSiblingAxis,
+ hasName,
+ remNav,
+ runLA,
xp13Tuple,
xpAttr,
xpElem,
xpOption,
xpPair,
xpText,
- xpTriple,
xpWrap )
-- Local imports.
import TSN.Database ( insert_or_select )
import TSN.DbImport ( DbImport(..), ImportResult(..), run_dbmigrate )
import TSN.Picklers ( xp_time_stamp )
-import TSN.Location ( Location(..) )
+import TSN.Location ( Location(..), pickle_location )
import TSN.XmlImport ( XmlImport(..) )
import Xml (
FromXml(..),
ToDb(..),
pickle_unpickle,
unpickleable,
+ unsafe_read_document,
unsafe_unpickle )
+
+-- | Some newsxml documents contain two \<SMS\> elements in a row,
+-- violating the DTD. The second one has always been empty, but it's
+-- irrelevant: we can't parse these, and would like to detect them
+-- in order to report the fact that the busted document is
+-- unsupported.
+--
+-- This function detects whether two \<SMS\> elements appear in a
+-- row, as siblings.
+--
+has_only_single_sms :: XmlTree -> Bool
+has_only_single_sms xmltree =
+ case elements of
+ [] -> True
+ _ -> False
+ where
+ parse :: XmlTree -> [XmlTree]
+ parse = runLA $ hasName "/"
+ /> hasName "message"
+ >>> addNav
+ >>> descendantAxis
+ >>> filterAxis (hasName "SMS")
+ >>> followingSiblingAxis
+ >>> remNav
+ >>> hasName "SMS"
+
+ elements = parse xmltree
+
+
--
--- Database code
+-- * Database code
--
-- | Define 'dbmigrate' and 'dbimport' for 'Message's. The import is
constructors:
- name: NewsTeam
uniques:
- - name: unique_news_team
+ - name: unique_news_teams
type: constraint
fields: [team_name]
to_tuple m = (db_msg_id m, db_event_id m)
--- | Convert a 'Location' to/from XML.
---
-pickle_location :: PU Location
-pickle_location =
- xpElem "location" $
- xpWrap (from_tuple, to_tuple) $
- xpTriple (xpOption (xpElem "city" xpText))
- (xpOption (xpElem "state" xpText))
- (xpElem "country" xpText)
- where
- from_tuple =
- uncurryN Location
- to_tuple l = (city l, state l, country l)
-
-- | Convert a 'Message' to/from XML.
--
[ test_news_fields_have_correct_names,
test_on_delete_cascade,
test_pickle_of_unpickle_is_identity,
- test_unpickle_succeeds ]
+ test_unpickle_succeeds,
+ test_sms_detected_correctly ]
-- | Make sure our codegen is producing the correct database names.
count_e <- countAll e
return $ count_a + count_b + count_c + count_d + count_e
actual @?= expected
+
+
+-- | We want to make sure the single-SMS documents and the multi-SMS
+-- documents are identified correctly.
+--
+test_sms_detected_correctly :: TestTree
+test_sms_detected_correctly =
+ testGroup "newsxml SMS count determined correctly"
+ [ check "test/xml/newsxml.xml"
+ "single SMS detected correctly"
+ True,
+ check "test/xml/newsxml-multiple-sms.xml"
+ "multiple SMS detected correctly"
+ False ]
+ where
+ check path desc expected = testCase desc $ do
+ xmltree <- unsafe_read_document path
+ let actual = has_only_single_sms xmltree
+ actual @?= expected