From 928a90109375caaa888ae4413151b560838948b7 Mon Sep 17 00:00:00 2001 From: Michael Orlitzky Date: Sat, 24 Jan 2015 16:35:25 -0500 Subject: [PATCH] Re-enable DTD validation. This enables DTD validation in the default parse_opts. To work around some broken document types, we've added another parse_opts_novalidate and used that where applicable. The test suite is still failing so this is a work-in-progress. --- src/TSN/XML/News.hs | 4 ++-- src/TSN/XML/Weather.hs | 6 +++--- src/Xml.hs | 19 +++++++++++++++++-- 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/src/TSN/XML/News.hs b/src/TSN/XML/News.hs index 645d1da..77dc74a 100644 --- a/src/TSN/XML/News.hs +++ b/src/TSN/XML/News.hs @@ -80,7 +80,7 @@ import Xml ( ToDb(..), pickle_unpickle, unpickleable, - unsafe_read_document, + unsafe_read_invalid_document, unsafe_unpickle ) @@ -574,6 +574,6 @@ test_sms_detected_correctly = False ] where check path desc expected = testCase desc $ do - xmltree <- unsafe_read_document path + xmltree <- unsafe_read_invalid_document path let actual = has_only_single_sms xmltree actual @?= expected diff --git a/src/TSN/XML/Weather.hs b/src/TSN/XML/Weather.hs index 2f3a3ca..2bcb736 100644 --- a/src/TSN/XML/Weather.hs +++ b/src/TSN/XML/Weather.hs @@ -80,7 +80,7 @@ import Xml ( ToDb(..), pickle_unpickle, unpickleable, - unsafe_read_document, + unsafe_read_invalid_document, unsafe_unpickle ) @@ -743,7 +743,7 @@ test_types_detected_correctly = False ] where check path desc expected = testCase desc $ do - xmltree <- unsafe_read_document path + xmltree <- unsafe_read_invalid_document path let actual = is_type1 xmltree actual @?= expected @@ -764,6 +764,6 @@ test_normal_teams_detected_correctly = False ] where check desc path expected = testCase desc $ do - xmltree <- unsafe_read_document path + xmltree <- unsafe_read_invalid_document path let actual = teams_are_normal xmltree actual @?= expected diff --git a/src/Xml.hs b/src/Xml.hs index ec409ac..d4ea467 100644 --- a/src/Xml.hs +++ b/src/Xml.hs @@ -10,9 +10,11 @@ module Xml ( FromXmlFk(..), ToDb(..), parse_opts, + parse_opts_novalidate, pickle_unpickle, unpickleable, unsafe_read_document, + unsafe_read_invalid_document, unsafe_unpickle ) where @@ -97,8 +99,14 @@ newtype DtdName = DtdName String -- parse_opts :: SysConfigList parse_opts = [ withRemoveWS yes, - withSubstDTDEntities no, - withValidate no ] + withSubstDTDEntities no ] + +-- | Like 'parse_opts' except we don't validate the document against +-- its DTD. This is useful when we need to parse a document that we +-- /know/ is invalid so that we can deliver a better error message. +-- +parse_opts_novalidate :: SysConfigList +parse_opts_novalidate = (withValidate no) : parse_opts -- | Given an @unpickler@ and a @filepath@, attempt to unpickle the @@ -174,3 +182,10 @@ unsafe_unpickle filepath unpickler = unsafe_read_document :: FilePath -> IO XmlTree unsafe_read_document filepath = fmap head $ runX $ readDocument parse_opts filepath + +-- | The same as 'unsafe_read_document', except it allows you to read +-- documents which don't validate against their DTDs. +-- +unsafe_read_invalid_document :: FilePath -> IO XmlTree +unsafe_read_invalid_document filepath = + fmap head $ runX $ readDocument parse_opts_novalidate filepath -- 2.49.0