module Html (
html_tests,
replace_entities )
where
import Test.Tasty ( TestTree, testGroup )
import Test.Tasty.HUnit ( (@?=), testCase )
import Text.HTML.TagSoup.Entity ( lookupEntity )
-- | Replace (x)html entities in a 'String'. To do this, we search
-- through the string looking for ampersands which may indicate the
-- beginning of an entity.
--
-- If we find one and there's a semicolon after it, we attempt to
-- look up the identifier that we found between the ampersand and
-- the semicolon. If an entity is found, we replace the ampersand,
-- semicolon, and everything in between with the entity. However if
-- no corresponding entity is found, we leave everything alone.
--
-- Examples:
--
-- >>> replace_entities "Hello, world!"
-- "Hello, world!"
--
-- >>> replace_entities "Hello; world!"
-- "Hello; world!"
--
-- >>> replace_entities "Hello, world & other worlds!"
-- "Hello, world & other worlds!"
--
-- >>> replace_entities "Hello, world & other worlds; hello indeed!"
-- "Hello, world & other worlds; hello indeed!"
--
-- >>> putStrLn $ replace_entities "Hello world—I guess"
-- Hello world—I guess
--
replace_entities :: String -> String
replace_entities [] = []
replace_entities ('&':xs) =
let (b, a) = break (== ';') xs in
case (lookupEntity b, a) of
(Just s, ';':as) -> s ++ replace_entities as
_ -> '&' : replace_entities xs
replace_entities (x:xs) = x : replace_entities xs
html_tests :: TestTree
html_tests =
testGroup "HTML Tests" [ test_replace_entities ]
test_replace_entities :: TestTree
test_replace_entities =
testCase description $ actual @?= expected
where
description = "all entities are replaced correctly."
actual =
replace_entities $
""The moon is gay……" " ++
"said <insert the current president of the " ++
"United States of America>. “It’s " ++
"OK—–he’s not a real doctor.”"
expected =
"\"The moon is gay……\" said . " ++
"“It’s OK—–he’s not a real doctor.”"