X-Git-Url: http://gitweb.michael.orlitzky.com/?p=dead%2Fhalcyon.git;a=blobdiff_plain;f=src%2FTwitter%2FXml.hs;h=1b1ea48e9015ce1732e6898d4dcbfdf7b98b3176;hp=a278081688b2542c59385f5dfff8361ea6a492af;hb=27c6a7e62a428ceb1d2a60d456b075feee196da9;hpb=569740692ca60f6c41fe2d9000661dd1059c7a0a diff --git a/src/Twitter/Xml.hs b/src/Twitter/Xml.hs index a278081..1b1ea48 100644 --- a/src/Twitter/Xml.hs +++ b/src/Twitter/Xml.hs @@ -2,7 +2,6 @@ module Twitter.Xml where -import Data.Maybe import Test.HUnit import Text.Regex (mkRegex, subRegex) import Text.XML.HaXml @@ -16,6 +15,13 @@ get_char_data (CRef ref) = Just (verbatim ref) -- Entities. get_char_data _ = Nothing +-- |A 'CFilter' returning all top-level elements. +-- The name is due to the fact that if we retrieve more than +-- one status, they will be wrapped in a tag, and +-- thus not be top-level. +single_status :: CFilter +single_status = (tag "status") + -- |A 'CFilter' returning all tags within . all_statuses :: CFilter all_statuses = (tag "statuses" /> tag "status") @@ -51,13 +57,15 @@ user_screen_name = keep /> (tag "screen_name") /> txt -- character represented by that entity. xml_entities :: [(String, String)] xml_entities = [("[lr]dquo", "\""), - ("quot", "\""), + ("quot", "\""), ("[mn]dash", "-"), ("nbsp", " "), ("#8217", "'"), ("amp", "&"), ("lt", "<"), - ("gt", ">")] + ("gt", ">"), + ("#8230", "..."), + ("hellip", "...")] -- |Replace all of the XML entities in target. replace_entities :: String -> String @@ -84,5 +92,5 @@ test_replace_entities :: Test test_replace_entities = TestCase $ assertEqual "All entities are replaced correctly." expected_text actual_text where - actual_text = (replace_entities ""The moon is gay," said <insert the current president of the United States of America>. “It’s OK—–he’s not a real doctor.”") - expected_text = "\"The moon is gay,\" said . \"It's OK--he's not a real doctor.\"" + actual_text = (replace_entities ""The moon is gay……" said <insert the current president of the United States of America>. “It’s OK—–he’s not a real doctor.”") + expected_text = "\"The moon is gay......\" said . \"It's OK--he's not a real doctor.\""