--
module TSN.XML.News (
dtd,
+ has_only_single_sms,
pickle_message,
-- * Tests
news_tests,
import Test.Tasty.HUnit ( (@?=), testCase )
import Text.XML.HXT.Core (
PU,
+ XmlTree,
+ (/>),
+ (>>>),
+ addNav,
+ descendantAxis,
+ filterAxis,
+ followingSiblingAxis,
+ hasName,
+ remNav,
+ runLA,
xp13Tuple,
xpAttr,
xpElem,
xpOption,
xpPair,
xpText,
- xpTriple,
xpWrap )
-- Local imports.
import TSN.Database ( insert_or_select )
import TSN.DbImport ( DbImport(..), ImportResult(..), run_dbmigrate )
import TSN.Picklers ( xp_time_stamp )
-import TSN.Location ( Location(..) )
+import TSN.Location ( Location(..), pickle_location )
import TSN.XmlImport ( XmlImport(..) )
import Xml (
FromXml(..),
ToDb(..),
pickle_unpickle,
unpickleable,
+ unsafe_read_document,
unsafe_unpickle )
xml_sport :: String,
xml_url :: Maybe String,
xml_teams :: [NewsTeam],
- xml_locations :: [NewsLocationXml],
+ xml_locations :: [Location],
xml_sms :: String,
xml_editor :: Maybe String,
xml_text :: Maybe String, -- Text and continue seem to show up in pairs,
(DefaultKey NewsTeam)
--- * NewsLocationXml
-
--- | The XML type for locations as they show up in the news. The
--- associated database type comes from "TSN.Location".
---
-data NewsLocationXml =
- NewsLocationXml {
- xml_city :: Maybe String,
- xml_state :: Maybe String,
- xml_country :: String }
- deriving (Eq, Show)
-
-
-instance ToDb NewsLocationXml where
- -- | The database analogue of a NewsLocationXml is a Location.
- type Db NewsLocationXml = Location
-
-
-instance FromXml NewsLocationXml where
- -- | To convert from the XML representation to the database one, we
- -- don't have to do anything. Just copy the fields.
- --
- from_xml NewsLocationXml{..} =
- Location xml_city xml_state xml_country
-
-
--- | Allow us to import the XML representation directly into the
--- database, without having to perform the conversion manually first.
---
-instance XmlImport NewsLocationXml
-
-
-- * News_Location
-- | Mapping between 'News' records and 'Location' records in the
+
+-- | Some newsxml documents contain two \<SMS\> elements in a row,
+-- violating the DTD. The second one has always been empty, but it's
+-- irrelevant: we can't parse these, and would like to detect them
+-- in order to report the fact that the busted document is
+-- unsupported.
--
--- Database code
+-- This function detects whether two \<SMS\> elements appear in a
+-- row, as siblings.
+--
+has_only_single_sms :: XmlTree -> Bool
+has_only_single_sms xmltree =
+ case elements of
+ [] -> True
+ _ -> False
+ where
+ parse :: XmlTree -> [XmlTree]
+ parse = runLA $ hasName "/"
+ /> hasName "message"
+ >>> addNav
+ >>> descendantAxis
+ >>> filterAxis (hasName "SMS")
+ >>> followingSiblingAxis
+ >>> remNav
+ >>> hasName "SMS"
+
+ elements = parse xmltree
+
+
+--
+-- * Database code
--
-- | Define 'dbmigrate' and 'dbimport' for 'Message's. The import is
mapM_ insert_ news_news_teams
-- Do all of that over again for the Locations.
- loc_ids <- mapM insert_xml_or_select (xml_locations message)
+ loc_ids <- mapM insert_or_select (xml_locations message)
let news_news_locations = map (News_Location news_id) loc_ids
mapM_ insert_ news_news_locations
to_tuple m = (db_msg_id m, db_event_id m)
--- | Convert a 'NewsLocationXml' to/from XML.
---
-pickle_location :: PU NewsLocationXml
-pickle_location =
- xpElem "location" $
- xpWrap (from_tuple, to_tuple) $
- xpTriple (xpOption (xpElem "city" xpText))
- (xpOption (xpElem "state" xpText))
- (xpElem "country" xpText)
- where
- from_tuple =
- uncurryN NewsLocationXml
- to_tuple l = (xml_city l, xml_state l, xml_country l)
-
-- | Convert a 'Message' to/from XML.
--
[ test_news_fields_have_correct_names,
test_on_delete_cascade,
test_pickle_of_unpickle_is_identity,
- test_unpickle_succeeds ]
+ test_unpickle_succeeds,
+ test_sms_detected_correctly ]
-- | Make sure our codegen is producing the correct database names.
count_e <- countAll e
return $ count_a + count_b + count_c + count_d + count_e
actual @?= expected
+
+
+-- | We want to make sure the single-SMS documents and the multi-SMS
+-- documents are identified correctly.
+--
+test_sms_detected_correctly :: TestTree
+test_sms_detected_correctly =
+ testGroup "newsxml SMS count determined correctly"
+ [ check "test/xml/newsxml.xml"
+ "single SMS detected correctly"
+ True,
+ check "test/xml/newsxml-multiple-sms.xml"
+ "multiple SMS detected correctly"
+ False ]
+ where
+ check path desc expected = testCase desc $ do
+ xmltree <- unsafe_read_document path
+ let actual = has_only_single_sms xmltree
+ actual @?= expected