X-Git-Url: http://gitweb.michael.orlitzky.com/?p=dead%2Fhalcyon.git;a=blobdiff_plain;f=src%2FTwitter%2FXml.hs;fp=src%2FTwitter%2FXml.hs;h=0000000000000000000000000000000000000000;hp=871d2164dee05737e9861d45c2e400dfde76e741;hb=9b6d95a82745ced2a58d9bc4ded555ee36b36673;hpb=81f6cb2ec955695d8d1a4619dab69e8fa4b3fb27

diff --git a/src/Twitter/Xml.hs b/src/Twitter/Xml.hs
deleted file mode 100644
index 871d216..0000000
--- a/src/Twitter/Xml.hs
+++ /dev/null
@@ -1,154 +0,0 @@
--- |Application-specific XML functions.
-module Twitter.Xml
-where
-
-import Data.Char (chr)
-import Test.HUnit
-import Text.Regex (matchRegex, mkRegex, subRegex)
-import Text.XML.HaXml
-
--- |Returns the 'CharData' contained within the given 'Content', or
--- 'Nothing' if no acceptable CharData was found. It will parse either
--- a 'CString' ('String') or 'CRef' (XML entity reference).
-get_char_data :: Content i -> (Maybe CharData)
-get_char_data (CString _ cd _) = Just cd
-get_char_data (CRef ref _) = Just (verbatim ref) -- Entities.
-get_char_data _ = Nothing
-
-
--- |A 'CFilter' returning all top-level <status> elements.
--- The name is due to the fact that if we retrieve more than
--- one status, they will be wrapped in a <statuses> tag, and
--- thus not be top-level.
-single_status :: CFilter i
-single_status = (tag "status")
-
--- |A 'CFilter' returning all <status> tags within <statuses>.
-all_statuses :: CFilter i
-all_statuses = (tag "statuses" /> tag "status")
-
--- |Finds the text of the <id> element contained within some other
--- content. Called unique_id here because status_id is used elsewhere.
-unique_id :: CFilter i
-unique_id = keep /> (tag "id") /> txt
-
--- |Finds the text of the <created_at> element contained within some
--- other element.
-status_created_at :: CFilter i
-status_created_at = keep /> (tag "created_at") /> txt
-
--- |Finds the text of the <text> element contained within some
--- other element.
-status_text :: CFilter i
-status_text = keep /> (tag "text") /> txt
-
--- |Finds the XML of the <user> element contained within some other
--- element.
-status_user :: CFilter i
-status_user = keep /> (tag "user")
-
--- | Finds the text of the <retweeted> element contained within some
---   other element.
-status_retweeted :: CFilter i
-status_retweeted = keep /> (tag "retweeted") /> txt
-
--- | Finds the text of the <in_reply_to_status_id> element contained
---   within some other element.
-status_reply_to_status_id :: CFilter i
-status_reply_to_status_id = keep /> (tag "in_reply_to_status_id") /> txt
-
--- |Finds the text of the <screen_name> element contained within some
--- other element.
-user_screen_name :: CFilter i
-user_screen_name = keep /> (tag "screen_name") /> txt
-
--- |A wrapper around the 'read' function which returns either Nothing
--- or (Just <the thing that could be read>).
-maybe_read :: (Read a) => String -> Maybe a
-maybe_read str =
-    case (reads str) of
-      [] -> Nothing
-      ((y,_):_) -> Just y
-
--- |Takes a unicode codepoint in decimal and returns it as a
--- one-character string.
-entity_from_codepoint :: String -> String
-entity_from_codepoint codepoint =
-    case (maybe_read codepoint) of
-      Nothing  -> ""
-      Just num -> [(chr num)]
-
-
--- | A list of tuples whose first entry is a regular expression
---   matching XML entities, and whose second entry is the ASCII
---   character represented by that entity.
---
---   For some reason, ampersands are escaped twice in the status
---   text. Rather than unescape everything twice, we just stick "amp"
---   in the list again.
-xml_entities :: [(String, String)]
-xml_entities = [("[lr]dquo", "\""),
-                ("quot",     "\""),
-                ("[mn]dash", "-"),
-                ("nbsp",     " "),
-                ("amp",      "&"),
-                ("amp",      "&"),
-                ("lt",       "<"),
-                ("gt",       ">"),
-                ("hellip",   "â¦")]
-
--- |Replace all of the XML entities in target.
-replace_entities :: String -> String
-replace_entities target =
-    unescape_numeric (unescape_recursive xml_entities target)
-
--- |Recursively unescape all numeric entities in the given String.
-unescape_numeric :: String -> String
-unescape_numeric target =
-    case match of
-      Nothing -> target
-      Just subexprs ->
-          case subexprs of
-            []   -> target
-            s1:_ ->
-                let this_entity_regex = mkRegex ("&#" ++ s1 ++ ";") in
-                let replacement = entity_from_codepoint s1 in
-                let new_target = subRegex this_entity_regex target replacement in
-                unescape_numeric new_target
-    where
-      from = "&#([0-9]+);"
-      match = matchRegex (mkRegex from) target
-
-
-
--- |The recursive function which does the real work for
--- 'replace_entities'.
-unescape_recursive :: [(String, String)] -> String -> String
-unescape_recursive [] target = target
-unescape_recursive replacements target =
-    unescape_recursive (tail replacements) (subRegex (mkRegex from) target to)
-    where
-      replacement = (replacements !! 0)
-      from = "&" ++ (fst replacement) ++ ";"
-      to = (snd replacement)
-
-
-
-xml_tests :: [Test]
-xml_tests = [ test_replace_entities, test_double_unescape ]
-
-
-test_replace_entities :: Test
-test_replace_entities =
-    TestCase $ assertEqual "All entities are replaced correctly." expected_text actual_text
-    where
-      actual_text = (replace_entities "&quot;The moon is gay&#8230;&hellip;&quot; said &lt;insert the current president of the United States of America&gt;. &ldquo;It&#8217;s OK&mdash;&ndash;he&#8217;s not a real doctor.&rdquo;")
-      expected_text = "\"The moon is gayâ¦â¦\" said <insert the current president of the United States of America>. \"Itâs OK--heâs not a real doctor.\""
-
-
-test_double_unescape :: Test
-test_double_unescape =
-    TestCase $ assertEqual "The status text is unescaped twice." expected_text actual_text
-    where
-      actual_text = (replace_entities "As a kid, I'd pull a girl's hair to let her know I liked her, but now that I'm older &amp;amp; wiser I simply hit her with my car.")
-      expected_text = "As a kid, I'd pull a girl's hair to let her know I liked her, but now that I'm older & wiser I simply hit her with my car."