X-Git-Url: http://gitweb.michael.orlitzky.com/?p=dead%2Flwn-epub.git;a=blobdiff_plain;f=src%2FLWN%2FPage.hs;h=3d53284c4b66e9c67ff222d19982ce4e2ced45d2;hp=49faa0bb4d6d1b0c77c0b4c6eb8d00e1a9261c50;hb=6a7cfdf0880ee5c5367e794babb30fa7eac22f39;hpb=5ac7dd7f301ba633b38d7bf2361044d25204bb6e diff --git a/src/LWN/Page.hs b/src/LWN/Page.hs index 49faa0b..3d53284 100644 --- a/src/LWN/Page.hs +++ b/src/LWN/Page.hs @@ -20,7 +20,9 @@ import Text.Pandoc ( defaultWriterOptions, readHtml, writeEPUB, - writerEPUBMetadata) + writerEPUBMetadata, + writerUserDataDir) +import Text.Pandoc.Shared ( readDataFile ) import Text.XML.HXT.Core ( ArrowXml, IOSArrow, @@ -34,6 +36,7 @@ import Text.XML.HXT.Core ( getChildren, getText, hasName, + none, processAttrl, processTopDown, this, @@ -42,7 +45,7 @@ import Text.XML.HXT.Core ( when) import Text.HandsomeSoup (css, parseHtml) -import Configuration (Cfg) +import Configuration (Cfg, full_stories) import LWN.Article import LWN.HTTP ( ImageMap, @@ -58,6 +61,7 @@ import LWN.XHTML ( is_image, preprocess, remove_byline, + remove_full_story_paragraphs, remove_title, to_xhtml, to_xml, @@ -111,6 +115,20 @@ instance XHTML Page where +-- | Stolen from writeEPUB. +default_stylesheet :: IO String +default_stylesheet = + -- This comes with Pandoc, I guess. + readDataFile (writerUserDataDir defaultWriterOptions) "epub.css" + + +construct_stylesheet :: IO String +construct_stylesheet = do + defaults <- default_stylesheet + -- Allow word-wrapping in
 elements.
+  let my_additions = "\n" ++ "pre { white-space: pre-wrap; }" ++ "\n"
+  return $ defaults ++ my_additions
+
 page_from_url :: Cfg -> URL -> IO (Maybe Page)
 page_from_url cfg url = do
   contents <- get_article_contents cfg url  
@@ -127,14 +145,14 @@ insert_full_stories story_map =
     lookup_func :: (ArrowXml a) => URL -> a XmlTree XmlTree
     lookup_func href =
       case Map.lookup href story_map of
-        -- Leave it alone if we don't have the full story.
-        Nothing -> this
+        -- Drop the paragraph if we don't have the contents.
+        Nothing -> none
         Just v -> to_xml v
 
     article_xml :: (ArrowXml a) => a XmlTree XmlTree
     article_xml =
       lookup_func
-      $<
+      $< -- From HXT's Control.Arrow.ArrowList
       (this /> full_story_link >>> getAttrValue "href")
                    
 replace_remote_img_srcs :: (ArrowXml a) => ImageMap -> a XmlTree XmlTree
@@ -192,8 +210,12 @@ download_full_stories cfg xml = do
 
 parse :: Cfg -> IOSArrow XmlTree XmlTree -> IO (Maybe Page)
 parse cfg xml = do
-  story_map <- download_full_stories cfg xml
-  let fs_xml = xml >>> insert_full_stories story_map
+  fs_xml <- if (full_stories cfg) then do
+             story_map <- download_full_stories cfg xml
+             return $ xml >>> insert_full_stories story_map
+           else do
+             -- Get rid of them if we don't want them.
+             return $ xml >>> remove_full_story_paragraphs
 
   let clean_xml = fs_xml >>> preprocess
   image_map <- download_images clean_xml
@@ -280,9 +302,9 @@ fp_parse :: IOSArrow XmlTree XmlTree -> IO (Maybe Page)
 fp_parse xml = do
     hl <- parse_headline xml
     parsed_articles <- fp_parse_articles xml
-    case parsed_articles of
-      []          -> return Nothing
-      x -> return $ Just $ FullPage (fromJust hl) x
+    return $ case parsed_articles of
+      [] -> Nothing
+      x  -> Just $ FullPage (fromJust hl) x
 
 
 
@@ -395,12 +417,16 @@ epublish obj handle = do
   hClose handle
 
 xhtml_to_epub :: String -> String -> IO B.ByteString
-xhtml_to_epub epmd =
-   write_epub . read_html
-   where
-     my_writer_options = defaultWriterOptions { writerEPUBMetadata = epmd }
-     write_epub = writeEPUB Nothing [] my_writer_options
-     read_html  = readHtml defaultParserState
+xhtml_to_epub epmd xhtml = do
+  stylesheet <- construct_stylesheet
+  writeEPUB
+    (Just stylesheet)
+    []
+    my_writer_options
+    (read_html xhtml)
+  where
+    my_writer_options = defaultWriterOptions { writerEPUBMetadata = epmd }
+    read_html  = readHtml defaultParserState
 
 
 --