--
module TSN.XML.News (
dtd,
+ has_only_single_sms,
pickle_message,
-- * Tests
news_tests,
import Test.Tasty.HUnit ( (@?=), testCase )
import Text.XML.HXT.Core (
PU,
+ XmlTree,
+ (/>),
+ (>>>),
+ addNav,
+ descendantAxis,
+ filterAxis,
+ followingSiblingAxis,
+ hasName,
+ remNav,
+ runLA,
xp13Tuple,
xpAttr,
xpElem,
ToDb(..),
pickle_unpickle,
unpickleable,
+ unsafe_read_document,
unsafe_unpickle )
xml_url :: Maybe String,
xml_teams :: [NewsTeam],
xml_locations :: [Location],
- xml_sms :: String,
+ xml_sms :: Maybe String,
xml_editor :: Maybe String,
xml_text :: Maybe String, -- Text and continue seem to show up in pairs,
xml_continue :: Maybe String, -- either both present or both missing.
db_mid :: MsgId,
db_sport :: String,
db_url :: Maybe String,
- db_sms :: String,
+ db_sms :: Maybe String,
db_editor :: Maybe String,
db_text :: Maybe String,
db_continue :: Maybe String,
+
+-- | Some newsxml documents contain two \<SMS\> elements in a row,
+-- violating the DTD. The second one has always been empty, but it's
+-- irrelevant: we can't parse these, and would like to detect them
+-- in order to report the fact that the busted document is
+-- unsupported.
+--
+-- This function detects whether two \<SMS\> elements appear in a
+-- row, as siblings.
+--
+has_only_single_sms :: XmlTree -> Bool
+has_only_single_sms xmltree =
+ case elements of
+ [] -> True
+ _ -> False
+ where
+ parse :: XmlTree -> [XmlTree]
+ parse = runLA $ hasName "/"
+ /> hasName "message"
+ >>> addNav
+ >>> descendantAxis
+ >>> filterAxis (hasName "SMS")
+ >>> followingSiblingAxis
+ >>> remNav
+ >>> hasName "SMS"
+
+ elements = parse xmltree
+
+
--
--- Database code
+-- * Database code
--
-- | Define 'dbmigrate' and 'dbimport' for 'Message's. The import is
constructors:
- name: NewsTeam
uniques:
- - name: unique_news_team
+ - name: unique_news_teams
type: constraint
fields: [team_name]
(xpElem "url" $ xpOption xpText)
(xpList pickle_news_team)
(xpList pickle_location)
- (xpElem "SMS" xpText)
+ (xpElem "SMS" $ xpOption xpText)
(xpOption (xpElem "Editor" xpText))
(xpOption (xpElem "text" xpText))
pickle_continue
[ test_news_fields_have_correct_names,
test_on_delete_cascade,
test_pickle_of_unpickle_is_identity,
- test_unpickle_succeeds ]
+ test_unpickle_succeeds,
+ test_sms_detected_correctly ]
-- | Make sure our codegen is producing the correct database names.
"test/xml/newsxml.xml",
check "pickle composed with unpickle is the identity (with Editor)"
- "test/xml/newsxml-with-editor.xml" ]
+ "test/xml/newsxml-with-editor.xml",
+
+ check "pickle composed with unpickle is the identity (empty SMS)"
+ "test/xml/newsxml-empty-sms.xml" ]
where
check desc path = testCase desc $ do
(expected, actual) <- pickle_unpickle pickle_message path
"test/xml/newsxml.xml",
check "unpickling succeeds (with Editor)"
- "test/xml/newsxml-with-editor.xml" ]
+ "test/xml/newsxml-with-editor.xml",
+
+ check "unpickling succeeds (empty SMS)"
+ "test/xml/newsxml-empty-sms.xml" ]
where
check desc path = testCase desc $ do
actual <- unpickleable path pickle_message
[ check "deleting news deletes its children"
"test/xml/newsxml.xml"
4 -- 2 news_teams and 2 news_locations that should remain.
+ ,
+ check "deleting news deletes its children (empty SMS)"
+ "test/xml/newsxml-empty-sms.xml"
+ 4 -- 2 news_teams and 2 news_locations
]
where
check desc path expected = testCase desc $ do
count_e <- countAll e
return $ count_a + count_b + count_c + count_d + count_e
actual @?= expected
+
+
+-- | We want to make sure the single-SMS documents and the multi-SMS
+-- documents are identified correctly.
+--
+test_sms_detected_correctly :: TestTree
+test_sms_detected_correctly =
+ testGroup "newsxml SMS count determined correctly"
+ [ check "test/xml/newsxml.xml"
+ "single SMS detected correctly"
+ True,
+ check "test/xml/newsxml-multiple-sms.xml"
+ "multiple SMS detected correctly"
+ False ]
+ where
+ check path desc expected = testCase desc $ do
+ xmltree <- unsafe_read_document path
+ let actual = has_only_single_sms xmltree
+ actual @?= expected