From 22c3f63a91361fddfe8315d7e7d5daef42700957 Mon Sep 17 00:00:00 2001 From: Michael Orlitzky Date: Sun, 8 Jul 2012 23:31:37 -0400 Subject: [PATCH] Use parallel-io instead of mapM to download images and full stories. --- lwn-epub.cabal | 1 + src/LWN/HTTP.hs | 3 ++- src/LWN/Page.hs | 4 ++-- src/Main.hs | 4 ++++ 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/lwn-epub.cabal b/lwn-epub.cabal index c9a505c..08ee1bf 100644 --- a/lwn-epub.cabal +++ b/lwn-epub.cabal @@ -24,6 +24,7 @@ executable lwn-epub MissingH == 1.1.*, network == 2.3.*, pandoc == 1.9.*, + parallel-io == 0.3.*, regex-posix == 0.95.*, temporary == 1.*, test-framework == 0.6.*, diff --git a/src/LWN/HTTP.hs b/src/LWN/HTTP.hs index 63079fd..ef4fc5a 100644 --- a/src/LWN/HTTP.hs +++ b/src/LWN/HTTP.hs @@ -3,6 +3,7 @@ module LWN.HTTP where +import Control.Concurrent.ParallelIO (parallel) import qualified Data.ByteString as B (hPut) import qualified Data.Map as Map (Map, empty, insert) import Data.Maybe (fromJust, isNothing) @@ -177,7 +178,7 @@ type ImageMap = Map.Map URL FilePath download_image_urls :: [URL] -> IO ImageMap download_image_urls image_urls = do - files <- mapM save_image image_urls + files <- parallel $ map save_image image_urls let pairs = zip image_urls files return $ foldl my_insert empty_map pairs where diff --git a/src/LWN/Page.hs b/src/LWN/Page.hs index f0ada7c..c61bf6a 100644 --- a/src/LWN/Page.hs +++ b/src/LWN/Page.hs @@ -3,6 +3,7 @@ module LWN.Page where +import Control.Concurrent.ParallelIO (parallel) import qualified Data.Map as Map (lookup) import Data.Time (getCurrentTime) import qualified Data.ByteString.Lazy as B (ByteString, hPut) @@ -172,7 +173,7 @@ type StoryMap = Map.Map URL Article -- These come *before* preprocessing. download_full_story_urls :: Cfg -> [URL] -> IO StoryMap download_full_story_urls cfg story_urls = do - pages <- mapM (page_from_url cfg) story_urls + pages <- parallel $ map (page_from_url cfg) story_urls let pairs = zip story_urls pages return $ foldl my_insert empty_map pairs where @@ -352,7 +353,6 @@ fp_parse_articles xml = do [] -> [] -- Put the separator back, it was lost during the split. let split_articles = map (article_separator ++) split_articles' - --_ <- mapM print_article split_articles real_articles <- mapM parse_html_article split_articles let just_articles = catMaybes real_articles return just_articles diff --git a/src/Main.hs b/src/Main.hs index a1230d6..3ce1eef 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -2,6 +2,7 @@ module Main where +import Control.Concurrent.ParallelIO (stopGlobalPool) import System.Directory (doesFileExist) import System.IO ( Handle, @@ -47,3 +48,6 @@ main = do Nothing -> do _ <- show_help return () + + -- Necessary, for some reason. + stopGlobalPool -- 2.44.2