]> gitweb.michael.orlitzky.com - dead/halcyon.git/blob - src/Twitter/Xml.hs
20015d37abcd58a43167b413a3552e45fb062d85
[dead/halcyon.git] / src / Twitter / Xml.hs
1 -- |Application-specific XML functions.
2 module Twitter.Xml
3 where
4
5 import Data.Maybe
6 import Text.Regex (mkRegex, subRegex)
7 import Text.XML.HaXml
8
9 -- |Returns the 'CharData' contained within the given 'Content', or
10 -- 'Nothing' if no acceptable CharData was found. It will parse either
11 -- a 'CString' ('String') or 'CRef' (XML entity reference).
12 get_char_data :: Content -> (Maybe CharData)
13 get_char_data (CString _ cd) = Just cd
14 get_char_data (CRef ref) = Just (verbatim ref) -- Entities.
15 get_char_data _ = Nothing
16
17
18 -- |A 'CFilter' returning all <status> tags within <statuses>.
19 all_statuses :: CFilter
20 all_statuses = (tag "statuses" /> tag "status")
21
22 -- |Finds the text of the <id> element contained within some other
23 -- content. Called unique_id here because status_id is used elsewhere.
24 unique_id :: CFilter
25 unique_id = keep /> (tag "id") /> txt
26
27 -- |Finds the text of the <created_at> element contained within some
28 -- other element.
29 status_created_at :: CFilter
30 status_created_at = keep /> (tag "created_at") /> txt
31
32 -- |Finds the text of the <text> element contained within some
33 -- other element.
34 status_text :: CFilter
35 status_text = keep /> (tag "text") /> txt
36
37 -- |Finds the XML of the <user> element contained within some other
38 -- element.
39 status_user :: CFilter
40 status_user = keep /> (tag "user")
41
42 -- |Finds the text of the <screen_name> element contained within some
43 -- other element.
44 user_screen_name :: CFilter
45 user_screen_name = keep /> (tag "screen_name") /> txt
46
47
48 -- |A list of tuples whose first entry is a regular expression
49 -- matching XML entities, and whose second entry is the ASCII
50 -- character represented by that entity.
51 xml_entities :: [(String, String)]
52 xml_entities = [("[lr]dquo", "\""),
53 ("[mn]dash", "-"),
54 ("nbsp", " "),
55 ("#8217", "'"),
56 ("amp", "&"),
57 ("lt", "<"),
58 ("gt", ">")]
59
60 -- |Replace all of the XML entities in target.
61 replace_entities :: String -> String
62 replace_entities target = unescape_recursive xml_entities target
63
64 -- |The recursive function which does the real work for
65 -- 'replace_entities'.
66 unescape_recursive :: [(String, String)] -> String -> String
67 unescape_recursive [] target = target
68 unescape_recursive replacements target =
69 unescape_recursive (tail replacements) (subRegex (mkRegex from) target to)
70 where
71 replacement = (replacements !! 0)
72 from = "&" ++ (fst replacement) ++ ";"
73 to = (snd replacement)