X-Git-Url: http://gitweb.michael.orlitzky.com/?p=dead%2Flwn-epub.git;a=blobdiff_plain;f=src%2FLWN%2FPage.hs;h=3d53284c4b66e9c67ff222d19982ce4e2ced45d2;hp=c61bf6a5c028dd55e41c71a75748426790dd9885;hb=6a7cfdf0880ee5c5367e794babb30fa7eac22f39;hpb=22c3f63a91361fddfe8315d7e7d5daef42700957;ds=sidebyside

diff --git a/src/LWN/Page.hs b/src/LWN/Page.hs
index c61bf6a..3d53284 100644
--- a/src/LWN/Page.hs
+++ b/src/LWN/Page.hs
@@ -20,7 +20,9 @@ import Text.Pandoc (
   defaultWriterOptions,
   readHtml,
   writeEPUB,
-  writerEPUBMetadata)
+  writerEPUBMetadata,
+  writerUserDataDir)
+import Text.Pandoc.Shared ( readDataFile )
 import Text.XML.HXT.Core (
   ArrowXml,
   IOSArrow,
@@ -34,6 +36,7 @@ import Text.XML.HXT.Core (
   getChildren,
   getText,
   hasName,
+  none,
   processAttrl,
   processTopDown,
   this,
@@ -42,7 +45,7 @@ import Text.XML.HXT.Core (
   when)
 import Text.HandsomeSoup (css, parseHtml)
 
-import Configuration (Cfg)
+import Configuration (Cfg, full_stories)
 import LWN.Article
 import LWN.HTTP (
   ImageMap,
@@ -58,6 +61,7 @@ import LWN.XHTML (
   is_image,
   preprocess,
   remove_byline,
+  remove_full_story_paragraphs,
   remove_title,
   to_xhtml,
   to_xml,
@@ -111,6 +115,20 @@ instance XHTML Page where
 
 
 
+-- | Stolen from writeEPUB.
+default_stylesheet :: IO String
+default_stylesheet =
+  -- This comes with Pandoc, I guess.
+  readDataFile (writerUserDataDir defaultWriterOptions) "epub.css"
+
+
+construct_stylesheet :: IO String
+construct_stylesheet = do
+  defaults <- default_stylesheet
+  -- Allow word-wrapping in <pre> elements.
+  let my_additions = "\n" ++ "pre { white-space: pre-wrap; }" ++ "\n"
+  return $ defaults ++ my_additions
+
 page_from_url :: Cfg -> URL -> IO (Maybe Page)
 page_from_url cfg url = do
   contents <- get_article_contents cfg url  
@@ -127,14 +145,14 @@ insert_full_stories story_map =
     lookup_func :: (ArrowXml a) => URL -> a XmlTree XmlTree
     lookup_func href =
       case Map.lookup href story_map of
-        -- Leave it alone if we don't have the full story.
-        Nothing -> this
+        -- Drop the paragraph if we don't have the contents.
+        Nothing -> none
         Just v -> to_xml v
 
     article_xml :: (ArrowXml a) => a XmlTree XmlTree
     article_xml =
       lookup_func
-      $<
+      $< -- From HXT's Control.Arrow.ArrowList
       (this /> full_story_link >>> getAttrValue "href")
                    
 replace_remote_img_srcs :: (ArrowXml a) => ImageMap -> a XmlTree XmlTree
@@ -192,8 +210,12 @@ download_full_stories cfg xml = do
 
 parse :: Cfg -> IOSArrow XmlTree XmlTree -> IO (Maybe Page)
 parse cfg xml = do
-  story_map <- download_full_stories cfg xml
-  let fs_xml = xml >>> insert_full_stories story_map
+  fs_xml <- if (full_stories cfg) then do
+             story_map <- download_full_stories cfg xml
+             return $ xml >>> insert_full_stories story_map
+           else do
+             -- Get rid of them if we don't want them.
+             return $ xml >>> remove_full_story_paragraphs
 
   let clean_xml = fs_xml >>> preprocess
   image_map <- download_images clean_xml
@@ -261,8 +283,6 @@ ap_parse_articles xml = do
   parsed_byline   <- parse_byline xml
   parsed_body     <- ap_parse_body xml
 
-  putStrLn $ fromJust parsed_headline
-
   if (isNothing parsed_headline) || (isNothing parsed_body)
   then return []
   else do
@@ -282,9 +302,9 @@ fp_parse :: IOSArrow XmlTree XmlTree -> IO (Maybe Page)
 fp_parse xml = do
     hl <- parse_headline xml
     parsed_articles <- fp_parse_articles xml
-    case parsed_articles of
-      []          -> return Nothing
-      x -> return $ Just $ FullPage (fromJust hl) x
+    return $ case parsed_articles of
+      [] -> Nothing
+      x  -> Just $ FullPage (fromJust hl) x
 
 
 
@@ -397,12 +417,16 @@ epublish obj handle = do
   hClose handle
 
 xhtml_to_epub :: String -> String -> IO B.ByteString
-xhtml_to_epub epmd =
-   write_epub . read_html
-   where
-     my_writer_options = defaultWriterOptions { writerEPUBMetadata = epmd }
-     write_epub = writeEPUB Nothing [] my_writer_options
-     read_html  = readHtml defaultParserState
+xhtml_to_epub epmd xhtml = do
+  stylesheet <- construct_stylesheet
+  writeEPUB
+    (Just stylesheet)
+    []
+    my_writer_options
+    (read_html xhtml)
+  where
+    my_writer_options = defaultWriterOptions { writerEPUBMetadata = epmd }
+    read_html  = readHtml defaultParserState
 
 
 --