1 -- |Application-specific XML functions.
7 import Text.Regex (mkRegex, subRegex)
10 -- |Returns the 'CharData' contained within the given 'Content', or
11 -- 'Nothing' if no acceptable CharData was found. It will parse either
12 -- a 'CString' ('String') or 'CRef' (XML entity reference).
13 get_char_data :: Content -> (Maybe CharData)
14 get_char_data (CString _ cd) = Just cd
15 get_char_data (CRef ref) = Just (verbatim ref) -- Entities.
16 get_char_data _ = Nothing
19 -- |A 'CFilter' returning all <status> tags within <statuses>.
20 all_statuses :: CFilter
21 all_statuses = (tag "statuses" /> tag "status")
23 -- |Finds the text of the <id> element contained within some other
24 -- content. Called unique_id here because status_id is used elsewhere.
26 unique_id = keep /> (tag "id") /> txt
28 -- |Finds the text of the <created_at> element contained within some
30 status_created_at :: CFilter
31 status_created_at = keep /> (tag "created_at") /> txt
33 -- |Finds the text of the <text> element contained within some
35 status_text :: CFilter
36 status_text = keep /> (tag "text") /> txt
38 -- |Finds the XML of the <user> element contained within some other
40 status_user :: CFilter
41 status_user = keep /> (tag "user")
43 -- |Finds the text of the <screen_name> element contained within some
45 user_screen_name :: CFilter
46 user_screen_name = keep /> (tag "screen_name") /> txt
49 -- |A list of tuples whose first entry is a regular expression
50 -- matching XML entities, and whose second entry is the ASCII
51 -- character represented by that entity.
52 xml_entities :: [(String, String)]
53 xml_entities = [("[lr]dquo", "\""),
64 -- |Replace all of the XML entities in target.
65 replace_entities :: String -> String
66 replace_entities target = unescape_recursive xml_entities target
68 -- |The recursive function which does the real work for
69 -- 'replace_entities'.
70 unescape_recursive :: [(String, String)] -> String -> String
71 unescape_recursive [] target = target
72 unescape_recursive replacements target =
73 unescape_recursive (tail replacements) (subRegex (mkRegex from) target to)
75 replacement = (replacements !! 0)
76 from = "&" ++ (fst replacement) ++ ";"
77 to = (snd replacement)
82 xml_tests = [ test_replace_entities ]
85 test_replace_entities :: Test
86 test_replace_entities =
87 TestCase $ assertEqual "All entities are replaced correctly." expected_text actual_text
89 actual_text = (replace_entities ""The moon is gay……" said <insert the current president of the United States of America>. “It’s OK—–he’s not a real doctor.”")
90 expected_text = "\"The moon is gay......\" said <insert the current president of the United States of America>. \"It's OK--he's not a real doctor.\""