X-Git-Url: http://gitweb.michael.orlitzky.com/?p=dead%2Flwn-epub.git;a=blobdiff_plain;f=src%2FLWN%2FXHTML.hs;h=f1a91c249b41608e764f15ca7e8922f06afa2a7c;hp=5d3bbd17327a8eaf20bec29152ed30de9aa9c7df;hb=b054d19e638a4983f88243e09f1aa474ae405998;hpb=68bab2c1cc272e99c94447afb93678e2b4d4bc72 diff --git a/src/LWN/XHTML.hs b/src/LWN/XHTML.hs index 5d3bbd1..f1a91c2 100644 --- a/src/LWN/XHTML.hs +++ b/src/LWN/XHTML.hs @@ -9,12 +9,14 @@ module LWN.XHTML ( parse_lwn, preprocess, remove_byline, + remove_full_story_paragraphs, remove_title, to_xhtml, to_xml, xml_from_contents) where +import Data.String.Utils (replace) import Text.HandsomeSoup (css) import Text.XML.HXT.Core ( (>>>), @@ -24,6 +26,7 @@ import Text.XML.HXT.Core ( SysConfigList, XmlTree, changeAttrValue, + changeText, deep, getAttrValue, hasAttrValue, @@ -83,12 +86,32 @@ preprocess = remove_comment_links >>> replace_links_with_spans + >>> + replace_double_newline_in_pre is_link :: (ArrowXml a) => a XmlTree XmlTree is_link = isElem >>> hasName "a" +is_pre :: (ArrowXml a) => a XmlTree XmlTree +is_pre = + isElem + >>> + hasName "pre" + +replace_double_newline_in_pre :: (ArrowXml a) => a XmlTree XmlTree +replace_double_newline_in_pre = + processTopDown $ fix_it `when` is_pre + where + fix_it :: (ArrowXml a) => a XmlTree XmlTree + fix_it = + changeText $ replace "\n\n" "

" + +remove_full_story_paragraphs :: (ArrowXml a) => a XmlTree XmlTree +remove_full_story_paragraphs = + processTopDown $ none `when` full_story_paragraph + remove_comment_links :: (ArrowXml a) => a XmlTree XmlTree remove_comment_links =