defaultWriterOptions,
readHtml,
writeEPUB,
- writerEPUBMetadata)
+ writerEPUBMetadata,
+ writerUserDataDir)
+import Text.Pandoc.Shared ( readDataFile )
import Text.XML.HXT.Core (
ArrowXml,
IOSArrow,
getChildren,
getText,
hasName,
+ none,
processAttrl,
processTopDown,
this,
is_image,
preprocess,
remove_byline,
+ remove_full_story_paragraphs,
remove_title,
to_xhtml,
to_xml,
+-- | Stolen from writeEPUB.
+default_stylesheet :: IO String
+default_stylesheet =
+ -- This comes with Pandoc, I guess.
+ readDataFile (writerUserDataDir defaultWriterOptions) "epub.css"
+
+
+construct_stylesheet :: IO String
+construct_stylesheet = do
+ defaults <- default_stylesheet
+ -- Allow word-wrapping in <pre> elements.
+ let my_additions = "\n" ++ "pre { white-space: pre-wrap; }" ++ "\n"
+ return $ defaults ++ my_additions
+
page_from_url :: Cfg -> URL -> IO (Maybe Page)
page_from_url cfg url = do
contents <- get_article_contents cfg url
lookup_func :: (ArrowXml a) => URL -> a XmlTree XmlTree
lookup_func href =
case Map.lookup href story_map of
- -- Leave it alone if we don't have the full story.
- Nothing -> this
+ -- Drop the paragraph if we don't have the contents.
+ Nothing -> none
Just v -> to_xml v
article_xml :: (ArrowXml a) => a XmlTree XmlTree
article_xml =
lookup_func
- $<
+ $< -- From HXT's Control.Arrow.ArrowList
(this /> full_story_link >>> getAttrValue "href")
replace_remote_img_srcs :: (ArrowXml a) => ImageMap -> a XmlTree XmlTree
parse :: Cfg -> IOSArrow XmlTree XmlTree -> IO (Maybe Page)
parse cfg xml = do
- story_map <- download_full_stories cfg xml
- let fs_xml = if (full_stories cfg) then
- xml >>> insert_full_stories story_map
- else
- xml
+ fs_xml <- if (full_stories cfg) then do
+ story_map <- download_full_stories cfg xml
+ return $ xml >>> insert_full_stories story_map
+ else do
+ -- Get rid of them if we don't want them.
+ return $ xml >>> remove_full_story_paragraphs
let clean_xml = fs_xml >>> preprocess
image_map <- download_images clean_xml
fp_parse xml = do
hl <- parse_headline xml
parsed_articles <- fp_parse_articles xml
- case parsed_articles of
- [] -> return Nothing
- x -> return $ Just $ FullPage (fromJust hl) x
+ return $ case parsed_articles of
+ [] -> Nothing
+ x -> Just $ FullPage (fromJust hl) x
hClose handle
xhtml_to_epub :: String -> String -> IO B.ByteString
-xhtml_to_epub epmd =
- write_epub . read_html
- where
- my_writer_options = defaultWriterOptions { writerEPUBMetadata = epmd }
- write_epub = writeEPUB Nothing [] my_writer_options
- read_html = readHtml defaultParserState
+xhtml_to_epub epmd xhtml = do
+ stylesheet <- construct_stylesheet
+ writeEPUB
+ (Just stylesheet)
+ []
+ my_writer_options
+ (read_html xhtml)
+ where
+ my_writer_options = defaultWriterOptions { writerEPUBMetadata = epmd }
+ read_html = readHtml defaultParserState
--