X-Git-Url: http://gitweb.michael.orlitzky.com/?a=blobdiff_plain;ds=sidebyside;f=src%2FLWN%2FXHTML.hs;h=f1a91c249b41608e764f15ca7e8922f06afa2a7c;hb=3903bd0cecc36d39b444d8cc1ab94b7df0ec1d36;hp=a2f103fa0d83b7ae433f239036e02456eb6e08f3;hpb=fc0052e451aa03675ebd9a128dfa46573b9357d7;p=dead%2Flwn-epub.git
diff --git a/src/LWN/XHTML.hs b/src/LWN/XHTML.hs
index a2f103f..f1a91c2 100644
--- a/src/LWN/XHTML.hs
+++ b/src/LWN/XHTML.hs
@@ -9,14 +9,15 @@ module LWN.XHTML (
parse_lwn,
preprocess,
remove_byline,
+ remove_full_story_paragraphs,
remove_title,
to_xhtml,
to_xml,
xml_from_contents)
where
+import Data.String.Utils (replace)
import Text.HandsomeSoup (css)
-import Text.Regex.Posix ((=~))
import Text.XML.HXT.Core (
(>>>),
(/>),
@@ -25,6 +26,7 @@ import Text.XML.HXT.Core (
SysConfigList,
XmlTree,
changeAttrValue,
+ changeText,
deep,
getAttrValue,
hasAttrValue,
@@ -84,12 +86,32 @@ preprocess =
remove_comment_links
>>>
replace_links_with_spans
+ >>>
+ replace_double_newline_in_pre
is_link :: (ArrowXml a) => a XmlTree XmlTree
is_link =
isElem >>> hasName "a"
+is_pre :: (ArrowXml a) => a XmlTree XmlTree
+is_pre =
+ isElem
+ >>>
+ hasName "pre"
+
+replace_double_newline_in_pre :: (ArrowXml a) => a XmlTree XmlTree
+replace_double_newline_in_pre =
+ processTopDown $ fix_it `when` is_pre
+ where
+ fix_it :: (ArrowXml a) => a XmlTree XmlTree
+ fix_it =
+ changeText $ replace "\n\n" "
"
+
+remove_full_story_paragraphs :: (ArrowXml a) => a XmlTree XmlTree
+remove_full_story_paragraphs =
+ processTopDown $ none `when` full_story_paragraph
+
remove_comment_links :: (ArrowXml a) => a XmlTree XmlTree
remove_comment_links =
@@ -163,7 +185,7 @@ full_story_link =
hasName "a"
>>>
ifA
- (this /> hasText (=~ "Full Story"))
+ (this /> hasText (== "Full Story"))
this
none