X-Git-Url: http://gitweb.michael.orlitzky.com/?a=blobdiff_plain;f=src%2FLWN%2FPage.hs;h=30c54b550a530777eb9ccfd818b16785c46cc0ef;hb=da4d3ce666bd8fc7a0011ecf495714bb55f0127d;hp=3dccf466fea98f49a8ee42dcf44a2cdebe66a42c;hpb=d7eb43dabd083ff2e12e9cfbf9bc2b6fcaa55e0b;p=dead%2Flwn-epub.git diff --git a/src/LWN/Page.hs b/src/LWN/Page.hs index 3dccf46..30c54b5 100644 --- a/src/LWN/Page.hs +++ b/src/LWN/Page.hs @@ -34,6 +34,7 @@ import Text.XML.HXT.Core ( getChildren, getText, hasName, + none, processAttrl, processTopDown, this, @@ -58,6 +59,7 @@ import LWN.XHTML ( is_image, preprocess, remove_byline, + remove_full_story_paragraphs, remove_title, to_xhtml, to_xml, @@ -127,8 +129,8 @@ insert_full_stories story_map = lookup_func :: (ArrowXml a) => URL -> a XmlTree XmlTree lookup_func href = case Map.lookup href story_map of - -- Leave it alone if we don't have the full story. - Nothing -> this + -- Drop the paragraph if we don't have the contents. + Nothing -> none Just v -> to_xml v article_xml :: (ArrowXml a) => a XmlTree XmlTree @@ -196,7 +198,8 @@ parse cfg xml = do story_map <- download_full_stories cfg xml return $ xml >>> insert_full_stories story_map else do - return xml + -- Get rid of them if we don't want them. + return $ xml >>> remove_full_story_paragraphs let clean_xml = fs_xml >>> preprocess image_map <- download_images clean_xml