]> gitweb.michael.orlitzky.com - dead/halcyon.git/blob - src/Html.hs
Clean up a bunch of code and comments.
[dead/halcyon.git] / src / Html.hs
1 module Html (
2 html_tests,
3 replace_entities )
4 where
5
6 import Test.Tasty ( TestTree, testGroup )
7 import Test.Tasty.HUnit ( (@?=), testCase )
8 import Text.HTML.TagSoup.Entity ( lookupEntity )
9
10
11 -- | Replace (x)html entities in a 'String'. To do this, we search
12 -- through the string looking for ampersands which may indicate the
13 -- beginning of an entity.
14 --
15 -- If we find one and there's a semicolon after it, we attempt to
16 -- look up the identifier that we found between the ampersand and
17 -- the semicolon. If an entity is found, we replace the ampersand,
18 -- semicolon, and everything in between with the entity. However if
19 -- no corresponding entity is found, we leave everything alone.
20 --
21 -- Examples:
22 --
23 -- >>> replace_entities "Hello, world!"
24 -- "Hello, world!"
25 --
26 -- >>> replace_entities "Hello; world!"
27 -- "Hello; world!"
28 --
29 -- >>> replace_entities "Hello, world & other worlds!"
30 -- "Hello, world & other worlds!"
31 --
32 -- >>> replace_entities "Hello, world & other worlds; hello indeed!"
33 -- "Hello, world & other worlds; hello indeed!"
34 --
35 -- >>> putStrLn $ replace_entities "Hello world—I guess"
36 -- Hello world—I guess
37 --
38 replace_entities :: String -> String
39 replace_entities [] = []
40 replace_entities ('&':xs) =
41 let (b, a) = break (== ';') xs in
42 case (lookupEntity b, a) of
43 (Just s, ';':as) -> s ++ replace_entities as
44 _ -> '&' : replace_entities xs
45 replace_entities (x:xs) = x : replace_entities xs
46
47
48 html_tests :: TestTree
49 html_tests =
50 testGroup "HTML Tests" [ test_replace_entities ]
51
52
53 test_replace_entities :: TestTree
54 test_replace_entities =
55 testCase description $ actual @?= expected
56 where
57 description = "all entities are replaced correctly."
58 actual =
59 replace_entities $
60 ""The moon is gay……" " ++
61 "said <insert the current president of the " ++
62 "United States of America>. “It’s " ++
63 "OK—–he’s not a real doctor.”"
64 expected =
65 "\"The moon is gay……\" said <insert " ++
66 "the current president of the United States of America>. " ++
67 "“It’s OK—–he’s not a real doctor.”"