X-Git-Url: http://gitweb.michael.orlitzky.com/?p=dead%2Flwn-epub.git;a=blobdiff_plain;f=src%2FLWN%2FPage.hs;h=3027eaef6c83e76067a744acf98be3a8b6b8579f;hp=2bbe21ae8e918ed49d658cac3fe05b0aa4905843;hb=2953924e2016393a1ffb9e2e82b4c90c8c57dfd3;hpb=2bf48a15ded4501a127bdfe5b29eaf01acf6576e diff --git a/src/LWN/Page.hs b/src/LWN/Page.hs index 2bbe21a..3027eae 100644 --- a/src/LWN/Page.hs +++ b/src/LWN/Page.hs @@ -3,19 +3,28 @@ module LWN.Page where -import qualified Data.Map as Map +import qualified Data.Map as Map (lookup) import Data.Time (getCurrentTime) -import System.IO (Handle, hClose, hFlush) import qualified Data.ByteString.Lazy as B (ByteString, hPut) import Data.String.Utils (split, strip) import Data.Maybe (catMaybes, fromJust, isNothing) +import Prelude hiding (readFile) +import System.Directory (doesFileExist) +import System.IO (Handle, hClose, hFlush, hPutStrLn, stderr) +import System.IO.UTF8 (readFile) import Test.HUnit (Assertion, assertEqual) import Test.Framework (Test, testGroup) import Test.Framework.Providers.HUnit (testCase) -import Text.Pandoc +import Text.Pandoc ( + defaultParserState, + defaultWriterOptions, + readHtml, + writeEPUB, + writerEPUBMetadata) import Text.XML.HXT.Core ( ArrowXml, IOSArrow, + IOStateArrow, XmlTree, (>>>), (/>), @@ -34,31 +43,66 @@ import Text.XML.HXT.Core ( runX, setElemName, xshow, - when - ) + when) import Text.HandsomeSoup (css, parseHtml) +import Configuration (Cfg, password, use_account, username) import LWN.Article -import LWN.HTTP (save_image) +import LWN.HTTP ( + ImageMap, + download_image_urls, + get_page, + log_in, + make_cookie_jar) import LWN.URI (URL, try_make_absolute_url) +import LWN.XHTML (XHTML, parse_lwn, to_xhtml) import Misc (contains) -import XHTML --- Map absolute image URLs to local system file paths where the image --- referenced by the URL is stored. -type ImageMap = Map.Map URL FilePath -download_image_urls :: [URL] -> IO ImageMap -download_image_urls image_urls = do - files <- mapM save_image image_urls - let pairs = zip image_urls files - return $ foldl my_insert empty_map pairs - where - empty_map = Map.empty :: ImageMap +-- | Try to parse the given article using HXT. We try a few different +-- methods; if none of them work, we return 'Nothing'. +get_xml_from_article :: Cfg -> URL -> IO (Maybe (IOStateArrow s b XmlTree)) +get_xml_from_article cfg article_name = do + my_article <- real_article_path article_name + is_file <- doesFileExist my_article + case is_file of + True -> do + contents <- readFile my_article + return $ Just $ parse_lwn contents + False -> do + -- Download the URL and try to parse it. + if use_account cfg then do + -- use_account would be false if these fromJusts would fail. + cj <- make_cookie_jar + li_result <- log_in cj + (fromJust $ username cfg) + (fromJust $ password cfg) + + case li_result of + Left err -> do + let msg = "Failed to log in. " ++ err + hPutStrLn stderr msg + Right response_body -> do + hPutStrLn stderr response_body + + html <- get_page (Just cj) my_article + + case html of + Left err -> do + let msg = "Failed to retrieve page. " ++ err + hPutStrLn stderr msg + return Nothing + Right h -> return $ Just $ parse_lwn h + else do + html <- get_page Nothing my_article + case html of + Left err -> do + let msg = "Failed to retrieve page. " ++ err + hPutStrLn stderr msg + return Nothing + Right h -> return $ Just $ parse_lwn h + - my_insert :: ImageMap -> (URL, Maybe FilePath) -> ImageMap - my_insert dict (_, Nothing) = dict - my_insert dict (k, Just v) = Map.insert k v dict -- Should be called *after* preprocessing. download_images :: IOSArrow XmlTree XmlTree -> IO ImageMap @@ -89,9 +133,7 @@ instance XHTML Page where "