module Html ( html_tests, replace_entities ) where import Test.Tasty ( TestTree, testGroup ) import Test.Tasty.HUnit ( (@?=), testCase ) import Text.HTML.TagSoup.Entity ( lookupEntity ) -- | Replace (x)html entities in a 'String'. To do this, we search -- through the string looking for ampersands which may indicate the -- beginning of an entity. -- -- If we find one and there's a semicolon after it, we attempt to -- look up the identifier that we found between the ampersand and -- the semicolon. If an entity is found, we replace the ampersand, -- semicolon, and everything in between with the entity. However if -- no corresponding entity is found, we leave everything alone. -- -- Examples: -- -- >>> replace_entities "Hello, world!" -- "Hello, world!" -- -- >>> replace_entities "Hello; world!" -- "Hello; world!" -- -- >>> replace_entities "Hello, world & other worlds!" -- "Hello, world & other worlds!" -- -- >>> replace_entities "Hello, world & other worlds; hello indeed!" -- "Hello, world & other worlds; hello indeed!" -- -- >>> putStrLn $ replace_entities "Hello world—I guess" -- Hello world—I guess -- replace_entities :: String -> String replace_entities [] = [] replace_entities ('&':xs) = let (b, a) = break (== ';') xs in case (lookupEntity b, a) of (Just s, ';':as) -> s ++ replace_entities as _ -> '&' : replace_entities xs replace_entities (x:xs) = x : replace_entities xs html_tests :: TestTree html_tests = testGroup "HTML Tests" [ test_replace_entities ] test_replace_entities :: TestTree test_replace_entities = testCase description $ actual @?= expected where description = "all entities are replaced correctly." actual = replace_entities $ ""The moon is gay……" " ++ "said <insert the current president of the " ++ "United States of America>. “It’s " ++ "OK—–he’s not a real doctor.”" expected = "\"The moon is gay……\" said . " ++ "“It’s OK—–he’s not a real doctor.”"