From: Michael Orlitzky Date: Fri, 1 Oct 2010 01:35:43 +0000 (-0400) Subject: Add " unescaping. X-Git-Url: https://gitweb.michael.orlitzky.com/?a=commitdiff_plain;h=569740692ca60f6c41fe2d9000661dd1059c7a0a;p=dead%2Fhalcyon.git Add " unescaping. Add HUnit tests for the Twitter.Xml module (replace_entities). --- diff --git a/src/Twitter/Xml.hs b/src/Twitter/Xml.hs index 20015d3..a278081 100644 --- a/src/Twitter/Xml.hs +++ b/src/Twitter/Xml.hs @@ -3,6 +3,7 @@ module Twitter.Xml where import Data.Maybe +import Test.HUnit import Text.Regex (mkRegex, subRegex) import Text.XML.HaXml @@ -50,6 +51,7 @@ user_screen_name = keep /> (tag "screen_name") /> txt -- character represented by that entity. xml_entities :: [(String, String)] xml_entities = [("[lr]dquo", "\""), + ("quot", "\""), ("[mn]dash", "-"), ("nbsp", " "), ("#8217", "'"), @@ -71,3 +73,16 @@ unescape_recursive replacements target = replacement = (replacements !! 0) from = "&" ++ (fst replacement) ++ ";" to = (snd replacement) + + + +xml_tests :: [Test] +xml_tests = [ test_replace_entities ] + + +test_replace_entities :: Test +test_replace_entities = + TestCase $ assertEqual "All entities are replaced correctly." expected_text actual_text + where + actual_text = (replace_entities ""The moon is gay," said <insert the current president of the United States of America>. “It’s OK—–he’s not a real doctor.”") + expected_text = "\"The moon is gay,\" said . \"It's OK--he's not a real doctor.\""