From 569740692ca60f6c41fe2d9000661dd1059c7a0a Mon Sep 17 00:00:00 2001 From: Michael Orlitzky Date: Thu, 30 Sep 2010 21:35:43 -0400 Subject: [PATCH] Add " unescaping. Add HUnit tests for the Twitter.Xml module (replace_entities). --- src/Twitter/Xml.hs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/Twitter/Xml.hs b/src/Twitter/Xml.hs index 20015d3..a278081 100644 --- a/src/Twitter/Xml.hs +++ b/src/Twitter/Xml.hs @@ -3,6 +3,7 @@ module Twitter.Xml where import Data.Maybe +import Test.HUnit import Text.Regex (mkRegex, subRegex) import Text.XML.HaXml @@ -50,6 +51,7 @@ user_screen_name = keep /> (tag "screen_name") /> txt -- character represented by that entity. xml_entities :: [(String, String)] xml_entities = [("[lr]dquo", "\""), + ("quot", "\""), ("[mn]dash", "-"), ("nbsp", " "), ("#8217", "'"), @@ -71,3 +73,16 @@ unescape_recursive replacements target = replacement = (replacements !! 0) from = "&" ++ (fst replacement) ++ ";" to = (snd replacement) + + + +xml_tests :: [Test] +xml_tests = [ test_replace_entities ] + + +test_replace_entities :: Test +test_replace_entities = + TestCase $ assertEqual "All entities are replaced correctly." expected_text actual_text + where + actual_text = (replace_entities ""The moon is gay," said <insert the current president of the United States of America>. “It’s OK—–he’s not a real doctor.”") + expected_text = "\"The moon is gay,\" said . \"It's OK--he's not a real doctor.\"" -- 2.43.2