X-Git-Url: http://gitweb.michael.orlitzky.com/?p=dead%2Fhalcyon.git;a=blobdiff_plain;f=src%2FTwitter%2FXml.hs;h=20015d37abcd58a43167b413a3552e45fb062d85;hp=e4cf5aceffa1bff8a1f9e8b5c824702cb5d067fb;hb=69b8af30f49aaad0f5c051998d2556b9ec291df7;hpb=53c26134670775854677470e402b8b41f19246d1 diff --git a/src/Twitter/Xml.hs b/src/Twitter/Xml.hs index e4cf5ac..20015d3 100644 --- a/src/Twitter/Xml.hs +++ b/src/Twitter/Xml.hs @@ -1,3 +1,4 @@ +-- |Application-specific XML functions. module Twitter.Xml where @@ -5,32 +6,48 @@ import Data.Maybe import Text.Regex (mkRegex, subRegex) import Text.XML.HaXml +-- |Returns the 'CharData' contained within the given 'Content', or +-- 'Nothing' if no acceptable CharData was found. It will parse either +-- a 'CString' ('String') or 'CRef' (XML entity reference). get_char_data :: Content -> (Maybe CharData) get_char_data (CString _ cd) = Just cd get_char_data (CRef ref) = Just (verbatim ref) -- Entities. get_char_data _ = Nothing +-- |A 'CFilter' returning all tags within . all_statuses :: CFilter all_statuses = (tag "statuses" /> tag "status") --- Called unique_id here because status_id is used elsewhere. +-- |Finds the text of the element contained within some other +-- content. Called unique_id here because status_id is used elsewhere. unique_id :: CFilter unique_id = keep /> (tag "id") /> txt +-- |Finds the text of the element contained within some +-- other element. status_created_at :: CFilter status_created_at = keep /> (tag "created_at") /> txt +-- |Finds the text of the element contained within some +-- other element. status_text :: CFilter status_text = keep /> (tag "text") /> txt +-- |Finds the XML of the element contained within some other +-- element. status_user :: CFilter status_user = keep /> (tag "user") +-- |Finds the text of the element contained within some +-- other element. user_screen_name :: CFilter user_screen_name = keep /> (tag "screen_name") /> txt +-- |A list of tuples whose first entry is a regular expression +-- matching XML entities, and whose second entry is the ASCII +-- character represented by that entity. xml_entities :: [(String, String)] xml_entities = [("[lr]dquo", "\""), ("[mn]dash", "-"), @@ -40,9 +57,12 @@ xml_entities = [("[lr]dquo", "\""), ("lt", "<"), ("gt", ">")] +-- |Replace all of the XML entities in target. replace_entities :: String -> String replace_entities target = unescape_recursive xml_entities target +-- |The recursive function which does the real work for +-- 'replace_entities'. unescape_recursive :: [(String, String)] -> String -> String unescape_recursive [] target = target unescape_recursive replacements target =