From ce53d0d1ff76cc45f1f66504976a0549999677c0 Mon Sep 17 00:00:00 2001 From: Michael Orlitzky Date: Fri, 13 Jul 2012 22:43:46 -0400 Subject: [PATCH] Remove full story paragraphs if we aren't going to use them. --- src/LWN/Page.hs | 4 +++- src/LWN/XHTML.hs | 6 ++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/LWN/Page.hs b/src/LWN/Page.hs index 3dccf46..ccc2590 100644 --- a/src/LWN/Page.hs +++ b/src/LWN/Page.hs @@ -58,6 +58,7 @@ import LWN.XHTML ( is_image, preprocess, remove_byline, + remove_full_story_paragraphs, remove_title, to_xhtml, to_xml, @@ -196,7 +197,8 @@ parse cfg xml = do story_map <- download_full_stories cfg xml return $ xml >>> insert_full_stories story_map else do - return xml + -- Get rid of them if we don't want them. + return $ xml >>> remove_full_story_paragraphs let clean_xml = fs_xml >>> preprocess image_map <- download_images clean_xml diff --git a/src/LWN/XHTML.hs b/src/LWN/XHTML.hs index 5d3bbd1..c54bada 100644 --- a/src/LWN/XHTML.hs +++ b/src/LWN/XHTML.hs @@ -9,6 +9,7 @@ module LWN.XHTML ( parse_lwn, preprocess, remove_byline, + remove_full_story_paragraphs, remove_title, to_xhtml, to_xml, @@ -90,6 +91,11 @@ is_link = isElem >>> hasName "a" +remove_full_story_paragraphs :: (ArrowXml a) => a XmlTree XmlTree +remove_full_story_paragraphs = + processTopDown $ none `when` full_story_paragraph + + remove_comment_links :: (ArrowXml a) => a XmlTree XmlTree remove_comment_links = processTopDown $ kill_comments `when` is_link -- 2.44.2