From 6f0e6cbece7e1b1a3c6b43d19eb2f29088af981c Mon Sep 17 00:00:00 2001 From: Michael Orlitzky Date: Sun, 24 Jun 2012 22:37:49 -0400 Subject: [PATCH] Combine ArticlePage and FullPage into one Page type. Refactor the Epublishable class into the new Page type. Remove images before parsing rather than while parsing a full page. Add an output file option. --- src/CommandLine.hs | 26 +++- src/Epublishable.hs | 43 ------ src/LWN/ArticlePage.hs | 101 ------------- src/LWN/FullPage.hs | 210 -------------------------- src/LWN/Page.hs | 329 +++++++++++++++++++++++++++++++++++++++++ src/Main.hs | 57 ++++--- 6 files changed, 383 insertions(+), 383 deletions(-) delete mode 100644 src/Epublishable.hs delete mode 100644 src/LWN/ArticlePage.hs delete mode 100644 src/LWN/FullPage.hs create mode 100644 src/LWN/Page.hs diff --git a/src/CommandLine.hs b/src/CommandLine.hs index d9be034..a61cc30 100644 --- a/src/CommandLine.hs +++ b/src/CommandLine.hs @@ -17,9 +17,12 @@ import System.Console.CmdArgs ( argPos, cmdArgsApply, cmdArgsMode, + def, details, + help, program, typ, + typFile, summary ) @@ -33,7 +36,8 @@ import ExitCodes data Args = - Args { article :: String } + Args { output :: FilePath, + article :: String } deriving (Show, Data, Typeable) @@ -47,9 +51,15 @@ lwn_epub_summary :: String lwn_epub_summary = program_name ++ "-" ++ (showVersion version) +output_help :: String +output_help = "Output file, defaults to stdout" + arg_spec :: Mode (CmdArgs Args) arg_spec = cmdArgsMode $ - Args { article = "" &= argPos 0 &= typ "ARTICLE" } + Args { + output = def &= typFile &= help output_help, + article = def &= argPos 0 &= typ "ARTICLE" + } &= program program_name &= summary lwn_epub_summary &= details [description] @@ -60,6 +70,7 @@ is_missing_arg_error :: String -> Bool is_missing_arg_error s = startswith "Requires at least" s + parse_args :: IO (CmdArgs Args) parse_args = do x <- getArgs @@ -68,14 +79,17 @@ parse_args = do Right result -> return result Left err -> if (is_missing_arg_error err) then + -- Disregard the error message, show help instead. withArgs ["--help"] parse_args else do hPutStrLn stderr err exitWith (ExitFailure exit_args_parse_failed) - -- Disregard the error message, show help instead. +-- | Really get the command-line arguments. This calls 'parse_args' +-- first to replace the default "wrong number of arguments" error, +-- and then runs 'cmdArgsApply' on the result to do what the +-- 'cmdArgs' function usually does. apply_args :: IO Args -apply_args = do - x <- parse_args - cmdArgsApply x +apply_args = + parse_args >>= cmdArgsApply diff --git a/src/Epublishable.hs b/src/Epublishable.hs deleted file mode 100644 index 3ca9b68..0000000 --- a/src/Epublishable.hs +++ /dev/null @@ -1,43 +0,0 @@ -module Epublishable -where - -import Text.Pandoc -import qualified Data.ByteString.Lazy as B (ByteString, writeFile) -import Data.Time (getCurrentTime) -import Data.Tree.NTree.TypeDefs (NTree) -import System.FilePath (normalise) -import Text.XML.HXT.Core (IOSArrow, XNode, XmlTree) - -import XHTML - - -class (XHTML a) => Epublishable a where - parse :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe a) - - title :: a -> String - - metadata :: a -> IO String - metadata obj = do - date <- getCurrentTime - return $ - "http://lwn.net/\n" ++ - "" ++ (show date) ++ "\n" ++ - "en-US\n" ++ - "Copyright Eklektix, Inc.\n" ++ - "" ++ (title obj) ++ "\n" - - epublish :: a -> FilePath -> IO () - epublish obj path = do - let xhtml = to_xhtml obj - epmd <- metadata obj - epub <- xhtml_to_epub epmd xhtml - let normalized_path = normalise path - B.writeFile normalized_path epub - -xhtml_to_epub :: String -> String -> IO B.ByteString -xhtml_to_epub epmd = - write_epub . read_html - where - my_writer_options = defaultWriterOptions { writerEPUBMetadata = epmd } - write_epub = writeEPUB Nothing [] my_writer_options - read_html = readHtml defaultParserState diff --git a/src/LWN/ArticlePage.hs b/src/LWN/ArticlePage.hs deleted file mode 100644 index 5964013..0000000 --- a/src/LWN/ArticlePage.hs +++ /dev/null @@ -1,101 +0,0 @@ -module LWN.ArticlePage -where - -import Data.String.Utils (strip) -import Data.Maybe (fromJust) -import Data.Tree.NTree.TypeDefs (NTree) -import Text.XML.HXT.Core ( - IOSArrow, - XmlTree, - XNode, - (>>>), - (/>), - getText, - runX, - xshow - ) -import Text.HandsomeSoup (css) - -import Epublishable -import LWN.Article -import XHTML - --- | Defines the ArticlePage data type, containing one 'Article'. -data ArticlePage = ArticlePage { article :: Article } - - -instance XHTML ArticlePage where - to_xhtml (ArticlePage a) = - "" ++ - "" ++ - "" ++ - " " ++ - " " ++ (show $ LWN.Article.title a) ++ "" ++ - "" ++ - "" ++ - "
" ++ - (to_xhtml a) ++ - "
" ++ - "" ++ - "" - - -instance Epublishable ArticlePage where - parse xml = do - articles <- parse_articles xml - case articles of - Just [x] -> return $ Just $ ArticlePage x - _ -> return Nothing - - title (ArticlePage x) = show $ LWN.Article.title x - - --- | Takes data from an LWN page and determines whether or not it's a --- single article (as opposed to a page with multiple articles). -is_article_page :: String -> IO Bool -is_article_page _ = return True - - -parse_headline :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String) -parse_headline xml = do - let element_filter = xml >>> css "div.PageHeadline h1" - let element_text_filter = element_filter /> getText - element_text <- runX element_text_filter - return $ case element_text of - [x] -> Just $ strip x - [] -> Nothing - _ -> error "Found more than one headline." - -parse_byline :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String) -parse_byline xml = do - let element_filter = xml >>> css "div.Byline" - let element_text_filter = element_filter /> getText - element_text <- runX element_text_filter - return $ case element_text of - [x] -> Just $ strip x - [] -> Nothing - _ -> error "Found more than one byline." - - -parse_body :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String) -parse_body xml = do - let element_filter = xml >>> css "div.ArticleText" - let element_html_filter = xshow element_filter - element_html <- runX element_html_filter - return $ case element_html of - [x] -> Just x - [] -> Nothing - _ -> error "Found more than one article." - - -parse_articles :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe [Article]) -parse_articles xml = do - parsed_headline <- parse_headline xml - parsed_byline <- parse_byline xml - parsed_body <- parse_body xml - let title' = Title (fromJust parsed_headline) - let byline' = Byline parsed_byline - let body' = BodyHtml (fromJust parsed_body) - return $ Just $ [Article title' byline' body'] diff --git a/src/LWN/FullPage.hs b/src/LWN/FullPage.hs deleted file mode 100644 index 1ba7910..0000000 --- a/src/LWN/FullPage.hs +++ /dev/null @@ -1,210 +0,0 @@ -module LWN.FullPage -where - -import Data.String.Utils (split, strip) -import Data.Maybe (catMaybes, fromJust) -import Data.Tree.NTree.TypeDefs (NTree) -import Text.XML.HXT.Core ( - ArrowXml, - IOSArrow, - XmlTree, - XNode, - (>>>), - (/>), - (//>), - getChildren, - getText, - hasAttrValue, - hasName, - none, - processTopDown, - runX, - xshow, - when - ) -import Text.HandsomeSoup (css, parseHtml) - -import Epublishable -import LWN.Article -import XHTML - --- | An LWN page with more than one article on it. These require --- different parsing and display functions than the single-article --- pages. -data FullPage = FullPage { headline :: String, - articles :: [Article] } - -articles_xhtml :: FullPage -> String -articles_xhtml fp = concatMap to_xhtml (articles fp) - -instance XHTML FullPage where - to_xhtml fp = - "" ++ - "" ++ - "" ++ - " " ++ - " " ++ (headline fp) ++ "" ++ - "" ++ - "" ++ - "
" ++ - "

" ++ (headline fp) ++ "

" ++ - (articles_xhtml fp) ++ - "
" ++ - "" ++ - "" - -instance Epublishable FullPage where - parse xml = do - hl <- parse_headline xml - parsed_articles <- parse_articles xml - case parsed_articles of - them@(_:_) -> return $ Just $ FullPage (fromJust hl) them - _ -> return Nothing - - title = headline - - -parse_headline :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String) -parse_headline xml = do - let element_filter = xml >>> css "div.PageHeadline h1" - let element_text_filter = element_filter /> getText - element_text <- runX element_text_filter - return $ case element_text of - [x] -> Just $ strip x - [] -> Nothing - _ -> error "Found more than one headline." - -parse_article_byline :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String) -parse_article_byline xml = do - let element_filter = xml >>> css "div.FeatureByLine" - let element_text_filter = element_filter /> getText - element_text <- runX element_text_filter - return $ case element_text of - [x] -> Just $ strip x - [] -> Nothing - _ -> error "Found more than one article byline." - - -parse_article_title :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String) -parse_article_title xml = do - let element_filter = xml >>> css "h2.SummaryHL" - let element_text_filter = element_filter //> getText - element_text <- runX element_text_filter - return $ case element_text of - [x] -> Just $ strip x - [] -> Nothing - _ -> error "Found more than one article title." - - - -is_title :: (ArrowXml a) => a XmlTree XmlTree -is_title = - (hasName "h2") - >>> - (hasAttrValue "class" (== "SummaryHL")) - - -is_byline :: (ArrowXml a) => a XmlTree XmlTree -is_byline = - (hasName "div") - >>> - (hasAttrValue "class" (== "FeatureByLine")) - - -is_image :: (ArrowXml a) => a XmlTree XmlTree -is_image = - hasName "img" - - -remove_title :: (ArrowXml a) => a XmlTree XmlTree -remove_title = - processTopDown ((none) `when` is_title) - - -remove_byline :: (ArrowXml a) => a XmlTree XmlTree -remove_byline = - processTopDown ((none) `when` is_byline) - - -remove_images :: (ArrowXml a) => a XmlTree XmlTree -remove_images = - processTopDown ((none) `when` is_image) - - - -parse_article_body :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String) -parse_article_body xml = do - -- First, delete the article title and byline. - let clean_xml' = xml >>> remove_title >>> remove_byline >>> remove_images - -- The only child of the body element should be a div.lwn-article - -- since we wrapped the article's HTML in that. - let clean_xml = clean_xml' >>> css "body" >>> getChildren - clean_html <- runX . xshow $ clean_xml - return $ case clean_html of - [x] -> Just x - [] -> Nothing - _ -> error "Found more than one article body." - -parse_article :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe Article) -parse_article xml = do - parsed_article_title <- parse_article_title xml - parsed_article_byline <- parse_article_byline xml - parsed_article_body <- parse_article_body xml - let title' = Title $ fromJust parsed_article_title - let byline' = Byline parsed_article_byline - let body' = BodyHtml $ fromJust parsed_article_body - return $ Just $ Article title' byline' body' - -parse_html_article :: String -> IO (Maybe Article) -parse_html_article html = do - let xml = parseHtml $ wrap_in_body_div html - parse_article xml - - --- | In the full page, all of the article titles and bodies are --- wrapped in a div.ArticleText. -parse_bodies :: IOSArrow XmlTree (NTree XNode) -> IOSArrow XmlTree (NTree XNode) -parse_bodies xml = - xml >>> css "div.ArticleText" - - --- Debug, print a string. -print_article :: String -> IO () -print_article s = do - putStrLn "-----------" - putStrLn "- Article -" - putStrLn "-----------" - putStrLn "" - putStrLn s - putStrLn "" - - --- Debug, print an article's body html. -print_body :: Article -> IO () -print_body x = - print_article bh - where - bh' = body_html x - bh = getBodyHtml bh' - - -parse_articles :: IOSArrow XmlTree (NTree XNode) -> IO [Article] -parse_articles xml = do - bodies <- runX . xshow $ parse_bodies xml - let article_separator = "

" - let split_articles'' = split article_separator (concat bodies) - -- The first element will contain the crap before the first . - let split_articles' = tail split_articles'' - -- Put the separator back, it was lost during the split. - let split_articles = map (article_separator ++) split_articles' - --_ <- mapM print_article split_articles - real_articles <- mapM parse_html_article split_articles - let just_articles = catMaybes real_articles - return just_articles - - -wrap_in_body_div :: String -> String -wrap_in_body_div s = - "
" ++ s ++ "
" diff --git a/src/LWN/Page.hs b/src/LWN/Page.hs new file mode 100644 index 0000000..4d61cfb --- /dev/null +++ b/src/LWN/Page.hs @@ -0,0 +1,329 @@ +module LWN.Page +where + +import Text.Pandoc +import Data.Time (getCurrentTime) +import System.IO (Handle) +import qualified Data.ByteString.Lazy as B (ByteString, hPut) +import Data.String.Utils (split, strip) +import Data.Maybe (catMaybes, fromJust, isNothing) +import Data.Tree.NTree.TypeDefs (NTree) +import Text.XML.HXT.Core ( + ArrowXml, + IOSArrow, + XmlTree, + XNode, + (>>>), + (/>), + (//>), + getChildren, + getText, + hasAttrValue, + hasName, + none, + processTopDown, + runX, + xshow, + when + ) +import Text.HandsomeSoup (css, parseHtml) + +import LWN.Article +import XHTML + +data Page = + -- | An LWN page with one article on it. + ArticlePage { article :: Article } | + + -- | An LWN page with more than one article on it. These require + -- different parsing and display functions than the single-article + -- pages. + FullPage { headline :: String, + articles :: [Article] } + + +instance XHTML Page where + to_xhtml (ArticlePage a) = + "" ++ + "" ++ + "" ++ + " " ++ + " " ++ (show $ LWN.Article.title a) ++ "" ++ + "" ++ + "" ++ + "
" ++ + (to_xhtml a) ++ + "
" ++ + "" ++ + "" + + to_xhtml (FullPage hl as) = + "" ++ + "" ++ + "" ++ + " " ++ + " " ++ hl ++ "" ++ + "" ++ + "" ++ + "
" ++ + "

" ++ hl ++ "

" ++ + (concatMap to_xhtml as) ++ + "
" ++ + "" ++ + "" + + + +remove_images :: (ArrowXml a) => a XmlTree XmlTree +remove_images = + processTopDown ((none) `when` is_image) + + +-- | Preprocessing common to both page types. +preprocess :: IOSArrow XmlTree (NTree XNode) -> IOSArrow XmlTree (NTree XNode) +preprocess xml = + xml >>>remove_images + + +parse :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe Page) +parse xml = do + let clean_xml = preprocess xml + appr <- ap_parse clean_xml + fppr <- fp_parse clean_xml + return $ + if (isNothing appr) then + fppr + else + appr + +-- +-- ArticlePage Stuff +-- +ap_parse :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe Page) +ap_parse xml = do + arts <- ap_parse_articles xml + case arts of + Just [x] -> return $ Just $ ArticlePage x + _ -> return Nothing + + +ap_parse_headline :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String) +ap_parse_headline xml = do + let element_filter = xml >>> css "div.PageHeadline h1" + let element_text_filter = element_filter /> getText + element_text <- runX element_text_filter + return $ case element_text of + [x] -> Just $ strip x + [] -> Nothing + _ -> error "Found more than one headline." + +ap_parse_byline :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String) +ap_parse_byline xml = do + let element_filter = xml >>> css "div.Byline" + let element_text_filter = element_filter /> getText + element_text <- runX element_text_filter + return $ case element_text of + [x] -> Just $ strip x + [] -> Nothing + _ -> error "Found more than one byline." + + +ap_parse_body :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String) +ap_parse_body xml = do + let element_filter = xml >>> css "div.ArticleText" + let element_html_filter = xshow element_filter + element_html <- runX element_html_filter + return $ case element_html of + [x] -> Just x + [] -> Nothing + _ -> error "Found more than one article." + + +ap_parse_articles :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe [Article]) +ap_parse_articles xml = do + parsed_headline <- ap_parse_headline xml + parsed_byline <- ap_parse_byline xml + parsed_body <- ap_parse_body xml + let title' = Title (fromJust parsed_headline) + let byline' = Byline parsed_byline + let body' = BodyHtml (fromJust parsed_body) + return $ Just $ [Article title' byline' body'] + + + + +-- +-- FullPage Stuff +-- + +fp_parse :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe Page) +fp_parse xml = do + hl <- fp_parse_headline xml + parsed_articles <- fp_parse_articles xml + case parsed_articles of + them@(_:_) -> return $ Just $ FullPage (fromJust hl) them + _ -> return Nothing + + + + +fp_parse_headline :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String) +fp_parse_headline xml = do + let element_filter = xml >>> css "div.PageHeadline h1" + let element_text_filter = element_filter /> getText + element_text <- runX element_text_filter + return $ case element_text of + [x] -> Just $ strip x + [] -> Nothing + _ -> error "Found more than one headline." + +fp_parse_article_byline :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String) +fp_parse_article_byline xml = do + let element_filter = xml >>> css "div.FeatureByLine" + let element_text_filter = element_filter /> getText + element_text <- runX element_text_filter + return $ case element_text of + [x] -> Just $ strip x + [] -> Nothing + _ -> error "Found more than one article byline." + + +fp_parse_article_title :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String) +fp_parse_article_title xml = do + let element_filter = xml >>> css "h2.SummaryHL" + let element_text_filter = element_filter //> getText + element_text <- runX element_text_filter + return $ case element_text of + [x] -> Just $ strip x + [] -> Nothing + _ -> error "Found more than one article title." + + + +is_title :: (ArrowXml a) => a XmlTree XmlTree +is_title = + (hasName "h2") + >>> + (hasAttrValue "class" (== "SummaryHL")) + + +is_byline :: (ArrowXml a) => a XmlTree XmlTree +is_byline = + (hasName "div") + >>> + (hasAttrValue "class" (== "FeatureByLine")) + + +is_image :: (ArrowXml a) => a XmlTree XmlTree +is_image = + hasName "img" + + +remove_title :: (ArrowXml a) => a XmlTree XmlTree +remove_title = + processTopDown ((none) `when` is_title) + + +remove_byline :: (ArrowXml a) => a XmlTree XmlTree +remove_byline = + processTopDown ((none) `when` is_byline) + + + +fp_parse_article_body :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe String) +fp_parse_article_body xml = do + -- First, delete the article title and byline. + let clean_xml' = xml >>> remove_title >>> remove_byline + -- The only child of the body element should be a div.lwn-article + -- since we wrapped the article's HTML in that. + let clean_xml = clean_xml' >>> css "body" >>> getChildren + clean_html <- runX . xshow $ clean_xml + return $ case clean_html of + [x] -> Just x + [] -> Nothing + _ -> error "Found more than one article body." + +fp_parse_article :: IOSArrow XmlTree (NTree XNode) -> IO (Maybe Article) +fp_parse_article xml = do + parsed_article_title <- fp_parse_article_title xml + parsed_article_byline <- fp_parse_article_byline xml + parsed_article_body <- fp_parse_article_body xml + let title' = Title $ fromJust parsed_article_title + let byline' = Byline parsed_article_byline + let body' = BodyHtml $ fromJust parsed_article_body + return $ Just $ Article title' byline' body' + +parse_html_article :: String -> IO (Maybe Article) +parse_html_article html = do + let xml = parseHtml $ wrap_in_body_div html + fp_parse_article xml + + +-- | In the full page, all of the article titles and bodies are +-- wrapped in a div.ArticleText. +parse_bodies :: IOSArrow XmlTree (NTree XNode) -> IOSArrow XmlTree (NTree XNode) +parse_bodies xml = + xml >>> css "div.ArticleText" + + +fp_parse_articles :: IOSArrow XmlTree (NTree XNode) -> IO [Article] +fp_parse_articles xml = do + bodies <- runX . xshow $ parse_bodies xml + let article_separator = "

" + let split_articles'' = split article_separator (concat bodies) + -- The first element will contain the crap before the first . + let split_articles' = tail split_articles'' + -- Put the separator back, it was lost during the split. + let split_articles = map (article_separator ++) split_articles' + --_ <- mapM print_article split_articles + real_articles <- mapM parse_html_article split_articles + let just_articles = catMaybes real_articles + return just_articles + + +wrap_in_body_div :: String -> String +wrap_in_body_div s = + "
" ++ s ++ "
" + + + + +-- +-- Epublishable stuff +-- + +title :: Page -> String +title (ArticlePage a) = getTitle $ LWN.Article.title a +title (FullPage hl _) = hl + + +metadata :: Page -> IO String +metadata obj = do + date <- getCurrentTime + return $ + "http://lwn.net/\n" ++ + "" ++ (show date) ++ "\n" ++ + "en-US\n" ++ + "Copyright Eklektix, Inc.\n" ++ + "" ++ (LWN.Page.title obj) ++ "\n" + + +epublish :: Page -> Handle -> IO () +epublish obj handle = do + let xhtml = to_xhtml obj + epmd <- metadata obj + epub <- xhtml_to_epub epmd xhtml + B.hPut handle epub + + +xhtml_to_epub :: String -> String -> IO B.ByteString +xhtml_to_epub epmd = + write_epub . read_html + where + my_writer_options = defaultWriterOptions { writerEPUBMetadata = epmd } + write_epub = writeEPUB Nothing [] my_writer_options + read_html = readHtml defaultParserState diff --git a/src/Main.hs b/src/Main.hs index 8687488..373d423 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -2,16 +2,16 @@ module Main where -import Data.Maybe (fromJust) -import System.Console.CmdArgs (cmdArgsRun) - +import System.IO ( + Handle, + IOMode (WriteMode), + openBinaryFile, + stdout + ) import Text.XML.HXT.Core - import CommandLine (Args(..), apply_args) -import Epublishable -import LWN.ArticlePage -import LWN.FullPage +import LWN.Page my_read :: String -> IOStateArrow s b XmlTree my_read = @@ -20,24 +20,35 @@ my_read = withInputEncoding utf8, withWarnings no ] +-- | If we're given an empty path, return a handle to +-- 'stdout'. Otherwise, open the given file and return a read/write +-- handle to that. +get_output_handle :: FilePath -> IO Handle +get_output_handle path = + if (null path) then + return stdout + else + openBinaryFile path WriteMode + + +-- | Convert the given article to either a URL or a filesystem +-- path. If the given article exists on the filesystem, we assume +-- it's a file. Otherwise, we check to see if it's a URL. Failing +-- that, we try to construct a URL from what we're given and do our +-- best. +real_article_path :: String -> IO String +real_article_path = return . id + main :: IO () main = do Args{..} <- apply_args - print article - - -- let article_html = my_read "test/fixtures/501317-article.html" - -- ioap <- parse article_html - -- let article_page :: ArticlePage = fromJust $ ioap - -- epublish article_page "single_article.epub" - - -- let page_html = my_read "test/fixtures/500848-page.html" - -- ioap_f <- parse page_html - -- let full_page :: FullPage = fromJust $ ioap_f - -- epublish full_page "full_page.epub" + output_handle <- get_output_handle output + input_path <- real_article_path article + let html = my_read input_path + result <- parse html - -- let bigpage_html = my_read "test/fixtures/50844-bigpage.html" - -- ioap_bp <- parse bigpage_html - -- let bigpage :: FullPage = fromJust $ ioap_bp - -- epublish bigpage "bigpage.epub" + case result of + Just stuff -> epublish stuff output_handle + Nothing -> return () - -- putStrLn "Done." + putStrLn "Done." -- 2.44.2