6 import Test.Tasty ( TestTree, testGroup )
7 import Test.Tasty.HUnit ( (@?=), testCase )
8 import Text.HTML.TagSoup.Entity ( lookupEntity )
11 -- | Replace (x)html entities in a 'String'. To do this, we search
12 -- through the string looking for ampersands which may indicate the
13 -- beginning of an entity.
15 -- If we find one and there's a semicolon after it, we attempt to
16 -- look up the identifier that we found between the ampersand and
17 -- the semicolon. If an entity is found, we replace the ampersand,
18 -- semicolon, and everything in between with the entity. However if
19 -- no corresponding entity is found, we leave everything alone.
23 -- >>> replace_entities "Hello, world!"
26 -- >>> replace_entities "Hello; world!"
29 -- >>> replace_entities "Hello, world & other worlds!"
30 -- "Hello, world & other worlds!"
32 -- >>> replace_entities "Hello, world & other worlds; hello indeed!"
33 -- "Hello, world & other worlds; hello indeed!"
35 -- >>> putStrLn $ replace_entities "Hello world—I guess"
36 -- Hello world—I guess
38 replace_entities :: String -> String
39 replace_entities [] = []
40 replace_entities ('&':xs) =
41 let (b, a) = break (== ';') xs in
42 case (lookupEntity b, a) of
43 (Just s, ';':as) -> s ++ replace_entities as
44 _ -> '&' : replace_entities xs
45 replace_entities (x:xs) = x : replace_entities xs
48 html_tests :: TestTree
50 testGroup "HTML Tests" [ test_replace_entities ]
53 test_replace_entities :: TestTree
54 test_replace_entities =
55 testCase description $ actual @?= expected
57 description = "all entities are replaced correctly."
60 ""The moon is gay……" " ++
61 "said <insert the current president of the " ++
62 "United States of America>. “It’s " ++
63 "OK—–he’s not a real doctor.”"
65 "\"The moon is gay……\" said <insert " ++
66 "the current president of the United States of America>. " ++
67 "“It’s OK—–he’s not a real doctor.”"