-module Html
+module Html (
+ html_tests,
+ replace_entities )
where
-import Test.HUnit
-import Text.HTML.TagSoup.Entity (lookupEntity)
+import Test.Tasty ( TestTree, testGroup )
+import Test.Tasty.HUnit ( (@?=), testCase )
+import Text.HTML.TagSoup.Entity ( lookupEntity )
+
+-- | Replace (x)html entities in a 'String'. To do this, we search
+-- through the string looking for ampersands which may indicate the
+-- beginning of an entity.
+--
+-- If we find one and there's a semicolon after it, we attempt to
+-- look up the identifier that we found between the ampersand and
+-- the semicolon. If an entity is found, we replace the ampersand,
+-- semicolon, and everything in between with the entity. However if
+-- no corresponding entity is found, we leave everything alone.
+--
+-- Examples:
+--
+-- >>> replace_entities "Hello, world!"
+-- "Hello, world!"
+--
+-- >>> replace_entities "Hello; world!"
+-- "Hello; world!"
+--
+-- >>> replace_entities "Hello, world & other worlds!"
+-- "Hello, world & other worlds!"
+--
+-- >>> replace_entities "Hello, world & other worlds; hello indeed!"
+-- "Hello, world & other worlds; hello indeed!"
+--
+-- >>> putStrLn $ replace_entities "Hello world—I guess"
+-- Hello world—I guess
+--
replace_entities :: String -> String
replace_entities [] = []
replace_entities ('&':xs) =
let (b, a) = break (== ';') xs in
case (lookupEntity b, a) of
- (Just c, ';':as) -> c : replace_entities as
+ (Just s, ';':as) -> s ++ replace_entities as
_ -> '&' : replace_entities xs
replace_entities (x:xs) = x : replace_entities xs
-html_tests :: [Test]
-html_tests = [ test_replace_entities ]
+html_tests :: TestTree
+html_tests =
+ testGroup "HTML Tests" [ test_replace_entities ]
+
-test_replace_entities :: Test
+test_replace_entities :: TestTree
test_replace_entities =
- TestCase $ assertEqual description expected_text actual_text
- where
- description = "All entities are replaced correctly."
- actual_text =
- replace_entities $
- ""The moon is gay……" " ++
- "said <insert the current president of the " ++
- "United States of America>. “It’s " ++
- "OK—–he’s not a real doctor.”"
- expected_text =
- "\"The moon is gay……\" said <insert " ++
- "the current president of the United States of America>. " ++
- "“It’s OK—–he’s not a real doctor.”"
+ testCase description $ actual @?= expected
+ where
+ description = "all entities are replaced correctly."
+ actual =
+ replace_entities $
+ ""The moon is gay……" " ++
+ "said <insert the current president of the " ++
+ "United States of America>. “It’s " ++
+ "OK—–he’s not a real doctor.”"
+ expected =
+ "\"The moon is gay……\" said <insert " ++
+ "the current president of the United States of America>. " ++
+ "“It’s OK—–he’s not a real doctor.”"