From b054d19e638a4983f88243e09f1aa474ae405998 Mon Sep 17 00:00:00 2001 From: Michael Orlitzky Date: Fri, 13 Jul 2012 23:38:31 -0400 Subject: [PATCH] Replace double newlines with "

" in
 tags.
 Supply a stylesheet (based on the default) to writeEPUB. Set white-space:
 pre-line for 
 elements.

---
 src/LWN/Page.hs  | 34 +++++++++++++++++++++++++++-------
 src/LWN/XHTML.hs | 17 +++++++++++++++++
 2 files changed, 44 insertions(+), 7 deletions(-)

diff --git a/src/LWN/Page.hs b/src/LWN/Page.hs
index 30c54b5..cdcd1a6 100644
--- a/src/LWN/Page.hs
+++ b/src/LWN/Page.hs
@@ -20,7 +20,9 @@ import Text.Pandoc (
   defaultWriterOptions,
   readHtml,
   writeEPUB,
-  writerEPUBMetadata)
+  writerEPUBMetadata,
+  writerUserDataDir)
+import Text.Pandoc.Shared ( readDataFile )
 import Text.XML.HXT.Core (
   ArrowXml,
   IOSArrow,
@@ -113,6 +115,20 @@ instance XHTML Page where
 
 
 
+-- | Stolen from writeEPUB.
+default_stylesheet :: IO String
+default_stylesheet =
+  -- This comes with Pandoc, I guess.
+  readDataFile (writerUserDataDir defaultWriterOptions) "epub.css"
+
+
+construct_stylesheet :: IO String
+construct_stylesheet = do
+  defaults <- default_stylesheet
+  -- Allow word-wrapping in 
 elements.
+  let my_additions = "\n" ++ "pre { white-space: pre-wrap; }" ++ "\n"
+  return $ defaults ++ my_additions
+
 page_from_url :: Cfg -> URL -> IO (Maybe Page)
 page_from_url cfg url = do
   contents <- get_article_contents cfg url  
@@ -401,12 +417,16 @@ epublish obj handle = do
   hClose handle
 
 xhtml_to_epub :: String -> String -> IO B.ByteString
-xhtml_to_epub epmd =
-   write_epub . read_html
-   where
-     my_writer_options = defaultWriterOptions { writerEPUBMetadata = epmd }
-     write_epub = writeEPUB Nothing [] my_writer_options
-     read_html  = readHtml defaultParserState
+xhtml_to_epub epmd xhtml = do
+  stylesheet <- construct_stylesheet
+  writeEPUB
+    (Just stylesheet)
+    []
+    my_writer_options
+    (read_html xhtml)
+  where
+    my_writer_options = defaultWriterOptions { writerEPUBMetadata = epmd }
+    read_html  = readHtml defaultParserState
 
 
 --
diff --git a/src/LWN/XHTML.hs b/src/LWN/XHTML.hs
index c54bada..f1a91c2 100644
--- a/src/LWN/XHTML.hs
+++ b/src/LWN/XHTML.hs
@@ -16,6 +16,7 @@ module LWN.XHTML (
   xml_from_contents)
 where
 
+import Data.String.Utils (replace)
 import Text.HandsomeSoup (css)
 import Text.XML.HXT.Core (
   (>>>),
@@ -25,6 +26,7 @@ import Text.XML.HXT.Core (
   SysConfigList,
   XmlTree,
   changeAttrValue,
+  changeText,
   deep,
   getAttrValue,
   hasAttrValue,
@@ -84,12 +86,27 @@ preprocess =
   remove_comment_links
   >>>
   replace_links_with_spans
+  >>>
+  replace_double_newline_in_pre
 
 
 is_link :: (ArrowXml a) => a XmlTree XmlTree
 is_link =
   isElem >>> hasName "a"
 
+is_pre :: (ArrowXml a) => a XmlTree XmlTree
+is_pre =
+  isElem
+  >>>
+  hasName "pre"
+
+replace_double_newline_in_pre :: (ArrowXml a) => a XmlTree XmlTree
+replace_double_newline_in_pre =
+  processTopDown $ fix_it `when` is_pre
+  where
+    fix_it :: (ArrowXml a) => a XmlTree XmlTree
+    fix_it =
+      changeText $ replace "\n\n" "

" remove_full_story_paragraphs :: (ArrowXml a) => a XmlTree XmlTree remove_full_story_paragraphs = -- 2.43.2