+
+
+
+--
+-- Misc
+--
+
+image_srcs :: (ArrowXml a) => a XmlTree URL
+image_srcs =
+ css "img"
+ >>>
+ getAttrValue "src"
+
+make_image_srcs_absolute :: (ArrowXml a) => a XmlTree XmlTree
+make_image_srcs_absolute =
+ processTopDown (make_srcs_absolute `when` is_image)
+ where
+ change_src :: (ArrowXml a) => a XmlTree XmlTree
+ change_src =
+ changeAttrValue try_make_absolute_url
+
+ make_srcs_absolute :: (ArrowXml a) => a XmlTree XmlTree
+ make_srcs_absolute =
+ processAttrl $ change_src `when` hasName "src"
+
+
+--
+-- Tests
+--
+
+test_preprocess_links :: Assertion
+test_preprocess_links = do
+ actual_xml' <- runX $ input_xml >>> preprocess >>> css "body"
+ let actual_xml = actual_xml' !! 0
+
+ expected_xml' <- runX $ expected_xml'' >>> css "body"
+ let expected_xml = expected_xml' !! 0
+
+ assertEqual
+ "Links are replaced with spans"
+ expected_xml
+ actual_xml
+ where
+ input_html = "<body><a href=\"#\">Hello, world!</a></body>"
+ input_xml = parseHtml input_html
+ expected_html = "<body><span>Hello, world!</span></body>"
+ expected_xml'' = parseHtml expected_html
+
+
+test_absolve_images :: Assertion
+test_absolve_images = do
+ actual_xml' <- runX $ input_xml >>> preprocess >>> css "body"
+ let actual_xml = actual_xml' !! 0
+
+ expected_xml' <- runX $ expected_xml'' >>> css "body"
+ let expected_xml = expected_xml' !! 0
+
+ assertEqual
+ "Image srcs are made absolute"
+ expected_xml
+ actual_xml
+ where
+ input_html =
+ "<body>" ++
+ "<img src=\"/images/2012/example.jpg\" />" ++
+ "</body>"
+ input_xml = parseHtml input_html
+ expected_html =
+ "<body>" ++
+ "<img src=\"https://lwn.net/images/2012/example.jpg\" />" ++
+ "</body>"
+ expected_xml'' = parseHtml expected_html
+
+
+test_comments_removed :: Assertion
+test_comments_removed = do
+ actual_xml' <- runX $ input_xml >>> preprocess >>> css "body"
+ let actual_xml = actual_xml' !! 0
+
+ expected_xml' <- runX $ expected_xml'' >>> css "body"
+ let expected_xml = expected_xml' !! 0
+
+ assertEqual
+ "Comment links are removed"
+ expected_xml
+ actual_xml
+ where
+ input_html =
+ "<body><p>" ++
+ "<a href=\"/Articles/501490/#Comments\">Comments (6 posted)</a>" ++
+ "</p></body>"
+ input_xml = parseHtml input_html
+
+ expected_html = "<body><p></p></body>"
+ expected_xml'' = parseHtml expected_html
+
+
+
+page_tests :: Test
+page_tests =
+ testGroup "Page Tests" [
+ testCase "Links are replaced with spans" test_preprocess_links,
+ testCase "Image srcs are made absolute" test_absolve_images,
+ testCase "Comment links are removed" test_comments_removed ]