X-Git-Url: http://gitweb.michael.orlitzky.com/?a=blobdiff_plain;f=src%2FTSN%2FXML%2FNews.hs;h=f401d41cb2dbb67d139395a6089c36f09ac9a23d;hb=2f6ea95c2b3e19545c09ed5ff81d90bb3b120c16;hp=3f9fef5375b58ec440eed4c041985871ed0d6300;hpb=83bad08d7f28143cdaae42156d951b421fa15a8a;p=dead%2Fhtsn-import.git diff --git a/src/TSN/XML/News.hs b/src/TSN/XML/News.hs index 3f9fef5..f401d41 100644 --- a/src/TSN/XML/News.hs +++ b/src/TSN/XML/News.hs @@ -11,6 +11,7 @@ -- module TSN.XML.News ( dtd, + has_only_single_sms, pickle_message, -- * Tests news_tests, @@ -45,6 +46,16 @@ import Test.Tasty ( TestTree, testGroup ) import Test.Tasty.HUnit ( (@?=), testCase ) import Text.XML.HXT.Core ( PU, + XmlTree, + (/>), + (>>>), + addNav, + descendantAxis, + filterAxis, + followingSiblingAxis, + hasName, + remNav, + runLA, xp13Tuple, xpAttr, xpElem, @@ -69,6 +80,7 @@ import Xml ( ToDb(..), pickle_unpickle, unpickleable, + unsafe_read_document, unsafe_unpickle ) @@ -108,7 +120,7 @@ data Message = xml_url :: Maybe String, xml_teams :: [NewsTeam], xml_locations :: [Location], - xml_sms :: String, + xml_sms :: Maybe String, xml_editor :: Maybe String, xml_text :: Maybe String, -- Text and continue seem to show up in pairs, xml_continue :: Maybe String, -- either both present or both missing. @@ -126,7 +138,7 @@ data News = db_mid :: MsgId, db_sport :: String, db_url :: Maybe String, - db_sms :: String, + db_sms :: Maybe String, db_editor :: Maybe String, db_text :: Maybe String, db_continue :: Maybe String, @@ -202,8 +214,37 @@ data News_Location = News_Location + +-- | Some newsxml documents contain two \ elements in a row, +-- violating the DTD. The second one has always been empty, but it's +-- irrelevant: we can't parse these, and would like to detect them +-- in order to report the fact that the busted document is +-- unsupported. +-- +-- This function detects whether two \ elements appear in a +-- row, as siblings. +-- +has_only_single_sms :: XmlTree -> Bool +has_only_single_sms xmltree = + case elements of + [] -> True + _ -> False + where + parse :: XmlTree -> [XmlTree] + parse = runLA $ hasName "/" + /> hasName "message" + >>> addNav + >>> descendantAxis + >>> filterAxis (hasName "SMS") + >>> followingSiblingAxis + >>> remNav + >>> hasName "SMS" + + elements = parse xmltree + + -- --- Database code +-- * Database code -- -- | Define 'dbmigrate' and 'dbimport' for 'Message's. The import is @@ -250,7 +291,7 @@ mkPersist defaultCodegenConfig [groundhog| constructors: - name: NewsTeam uniques: - - name: unique_news_team + - name: unique_news_teams type: constraint fields: [team_name] @@ -356,7 +397,7 @@ pickle_message = (xpElem "url" $ xpOption xpText) (xpList pickle_news_team) (xpList pickle_location) - (xpElem "SMS" xpText) + (xpElem "SMS" $ xpOption xpText) (xpOption (xpElem "Editor" xpText)) (xpOption (xpElem "text" xpText)) pickle_continue @@ -407,7 +448,8 @@ news_tests = [ test_news_fields_have_correct_names, test_on_delete_cascade, test_pickle_of_unpickle_is_identity, - test_unpickle_succeeds ] + test_unpickle_succeeds, + test_sms_detected_correctly ] -- | Make sure our codegen is producing the correct database names. @@ -450,7 +492,10 @@ test_pickle_of_unpickle_is_identity = testGroup "pickle-unpickle tests" "test/xml/newsxml.xml", check "pickle composed with unpickle is the identity (with Editor)" - "test/xml/newsxml-with-editor.xml" ] + "test/xml/newsxml-with-editor.xml", + + check "pickle composed with unpickle is the identity (empty SMS)" + "test/xml/newsxml-empty-sms.xml" ] where check desc path = testCase desc $ do (expected, actual) <- pickle_unpickle pickle_message path @@ -465,7 +510,10 @@ test_unpickle_succeeds = testGroup "unpickle tests" "test/xml/newsxml.xml", check "unpickling succeeds (with Editor)" - "test/xml/newsxml-with-editor.xml" ] + "test/xml/newsxml-with-editor.xml", + + check "unpickling succeeds (empty SMS)" + "test/xml/newsxml-empty-sms.xml" ] where check desc path = testCase desc $ do actual <- unpickleable path pickle_message @@ -481,6 +529,10 @@ test_on_delete_cascade = testGroup "cascading delete tests" [ check "deleting news deletes its children" "test/xml/newsxml.xml" 4 -- 2 news_teams and 2 news_locations that should remain. + , + check "deleting news deletes its children (empty SMS)" + "test/xml/newsxml-empty-sms.xml" + 4 -- 2 news_teams and 2 news_locations ] where check desc path expected = testCase desc $ do @@ -506,3 +558,22 @@ test_on_delete_cascade = testGroup "cascading delete tests" count_e <- countAll e return $ count_a + count_b + count_c + count_d + count_e actual @?= expected + + +-- | We want to make sure the single-SMS documents and the multi-SMS +-- documents are identified correctly. +-- +test_sms_detected_correctly :: TestTree +test_sms_detected_correctly = + testGroup "newsxml SMS count determined correctly" + [ check "test/xml/newsxml.xml" + "single SMS detected correctly" + True, + check "test/xml/newsxml-multiple-sms.xml" + "multiple SMS detected correctly" + False ] + where + check path desc expected = testCase desc $ do + xmltree <- unsafe_read_document path + let actual = has_only_single_sms xmltree + actual @?= expected