+xhtml_to_epub epmd xhtml = do
+ stylesheet <- construct_stylesheet
+ writeEPUB
+ (Just stylesheet)
+ []
+ my_writer_options
+ (read_html xhtml)
+ where
+ my_writer_options = defaultWriterOptions { writerEPUBMetadata = epmd }
+ read_html = readHtml defaultParserState
+
+
+--
+-- Tests
+--
+
+test_preprocess_links :: Assertion
+test_preprocess_links = do
+ actual_xml' <- runX $ input_xml >>> preprocess >>> css "body"
+ let actual_xml = actual_xml' !! 0
+
+ expected_xml' <- runX $ expected_xml'' >>> css "body"
+ let expected_xml = expected_xml' !! 0
+
+ assertEqual
+ "Links are replaced with spans"
+ expected_xml
+ actual_xml
+ where
+ input_html = "<body><a href=\"#\">Hello, world!</a></body>"
+ input_xml = parseHtml input_html
+ expected_html = "<body><span>Hello, world!</span></body>"
+ expected_xml'' = parseHtml expected_html
+
+
+test_absolve_images :: Assertion
+test_absolve_images = do
+ actual_xml' <- runX $ input_xml >>> preprocess >>> css "body"
+ let actual_xml = actual_xml' !! 0
+
+ expected_xml' <- runX $ expected_xml'' >>> css "body"
+ let expected_xml = expected_xml' !! 0
+
+ assertEqual
+ "Image srcs are made absolute"
+ expected_xml
+ actual_xml
+ where
+ input_html =
+ "<body>" ++
+ "<img src=\"/images/2012/example.jpg\" />" ++
+ "</body>"
+ input_xml = parseHtml input_html
+ expected_html =
+ "<body>" ++
+ "<img src=\"https://lwn.net/images/2012/example.jpg\" />" ++
+ "</body>"
+ expected_xml'' = parseHtml expected_html
+
+
+test_comments_removed :: Assertion
+test_comments_removed = do
+ actual_xml' <- runX $ input_xml >>> preprocess >>> css "body"
+ let actual_xml = actual_xml' !! 0
+
+ expected_xml' <- runX $ expected_xml'' >>> css "body"
+ let expected_xml = expected_xml' !! 0
+
+ assertEqual
+ "Comment links are removed"
+ expected_xml
+ actual_xml
+ where
+ input_html =
+ "<body><p>" ++
+ "<a href=\"/Articles/501490/#Comments\">Comments (6 posted)</a>" ++
+ "</p></body>"
+ input_xml = parseHtml input_html
+
+ expected_html = "<body><p></p></body>"
+ expected_xml'' = parseHtml expected_html
+
+
+test_full_story_urls_parsed :: Assertion
+test_full_story_urls_parsed = do
+ actual <- runX $ actual'
+
+ assertEqual
+ "Full Story URLs are parsed"
+ expected
+ actual
+ where
+ expected = ["/Articles/500738/", "/Articles/501837/"]
+
+ full_story_html =
+ concat ["<p>",
+ "<a href=\"/Articles/500738/\">Full Story</a> ",
+ "(<a href=\"/Articles/500738/#Comments\">comments: 49</a>)",
+ "<p>",
+ "<a href=\"/Articles/501837/\">Full Story</a> ",
+ "(<a href=\"/Articles/501837/#Comments\">comments: none</a>)",
+ "<p>"]
+
+ xml = parseHtml full_story_html
+ actual' = xml >>> full_story_urls
+
+page_tests :: Test
+page_tests =
+ testGroup "Page Tests" [
+ testCase "Links are replaced with spans" test_preprocess_links,
+ testCase "Image srcs are made absolute" test_absolve_images,
+ testCase "Comment links are removed" test_comments_removed,
+ testCase "Full Story URLs are parsed" test_full_story_urls_parsed ]