X-Git-Url: http://gitweb.michael.orlitzky.com/?a=blobdiff_plain;f=src%2FLWN%2FPage.hs;h=cdcd1a602e1add49f729f5a5f7263eebae2e69a5;hb=9522dae5ae266206b10e5517215e9d7143bf3df8;hp=2bbe21ae8e918ed49d658cac3fe05b0aa4905843;hpb=2bf48a15ded4501a127bdfe5b29eaf01acf6576e;p=dead%2Flwn-epub.git diff --git a/src/LWN/Page.hs b/src/LWN/Page.hs index 2bbe21a..cdcd1a6 100644 --- a/src/LWN/Page.hs +++ b/src/LWN/Page.hs @@ -3,20 +3,31 @@ module LWN.Page where -import qualified Data.Map as Map +import Control.Concurrent.ParallelIO (parallel) +import qualified Data.Map as Map (lookup) import Data.Time (getCurrentTime) -import System.IO (Handle, hClose, hFlush) import qualified Data.ByteString.Lazy as B (ByteString, hPut) import Data.String.Utils (split, strip) +import qualified Data.Map as Map (Map, empty, insert) import Data.Maybe (catMaybes, fromJust, isNothing) +import Prelude hiding (readFile) +import System.IO (Handle, hClose, hFlush) import Test.HUnit (Assertion, assertEqual) import Test.Framework (Test, testGroup) import Test.Framework.Providers.HUnit (testCase) -import Text.Pandoc +import Text.Pandoc ( + defaultParserState, + defaultWriterOptions, + readHtml, + writeEPUB, + writerEPUBMetadata, + writerUserDataDir) +import Text.Pandoc.Shared ( readDataFile ) import Text.XML.HXT.Core ( ArrowXml, IOSArrow, XmlTree, + ($<), (>>>), (/>), (//>), @@ -24,47 +35,39 @@ import Text.XML.HXT.Core ( getAttrValue, getChildren, getText, - hasAttrValue, hasName, - isElem, - mkName, none, processAttrl, processTopDown, + this, runX, - setElemName, xshow, - when - ) + when) import Text.HandsomeSoup (css, parseHtml) +import Configuration (Cfg, full_stories) import LWN.Article -import LWN.HTTP (save_image) -import LWN.URI (URL, try_make_absolute_url) -import Misc (contains) -import XHTML - --- Map absolute image URLs to local system file paths where the image --- referenced by the URL is stored. -type ImageMap = Map.Map URL FilePath - -download_image_urls :: [URL] -> IO ImageMap -download_image_urls image_urls = do - files <- mapM save_image image_urls - let pairs = zip image_urls files - return $ foldl my_insert empty_map pairs - where - empty_map = Map.empty :: ImageMap +import LWN.HTTP ( + ImageMap, + download_image_urls, + get_article_contents) +import LWN.URI (URL) +import LWN.XHTML ( + XHTML, + full_story_urls, + image_srcs, + full_story_link, + full_story_paragraph, + is_image, + preprocess, + remove_byline, + remove_full_story_paragraphs, + remove_title, + to_xhtml, + to_xml, + xml_from_contents) - my_insert :: ImageMap -> (URL, Maybe FilePath) -> ImageMap - my_insert dict (_, Nothing) = dict - my_insert dict (k, Just v) = Map.insert k v dict --- Should be called *after* preprocessing. -download_images :: IOSArrow XmlTree XmlTree -> IO ImageMap -download_images xml = do - image_urls <- runX $ xml >>> image_srcs - download_image_urls image_urls data Page = @@ -89,9 +92,7 @@ instance XHTML Page where "