module LWN.HTTP
where
+import Control.Concurrent.ParallelIO (parallel)
import qualified Data.ByteString as B (hPut)
import qualified Data.Map as Map (Map, empty, insert)
+import Data.Maybe (fromJust, isNothing)
import Network.Curl (
CurlCode(..),
CurlOption(..),
import Network.Curl.Download (openURI)
import System.Directory (doesFileExist, getTemporaryDirectory)
import System.IO (hClose, hPutStrLn, stderr)
+import qualified System.IO.UTF8 as Utf8 (readFile)
import System.IO.Temp (openBinaryTempFile, openTempFile)
+import qualified Configuration as C (Cfg(..))
+import LWN.Article (real_article_path)
import LWN.URI (URL, filename)
login_url :: URL
default_curl_opts =
[ -- The Global cache is not thread-friendly.
CurlDNSUseGlobalCache False,
-
+
-- And we don't want to use a DNS cache anyway.
CurlDNSCacheTimeout 0,
CurlCookieJar cookie_jar,
CurlPost True,
CurlPostFields post_data ]
-
+
curl_opts :: [CurlOption]
curl_opts = default_curl_opts ++ post_opts
download_image_urls :: [URL] -> IO ImageMap
download_image_urls image_urls = do
- files <- mapM save_image image_urls
+ files <- parallel $ map save_image image_urls
let pairs = zip image_urls files
return $ foldl my_insert empty_map pairs
where
my_insert :: ImageMap -> (URL, Maybe FilePath) -> ImageMap
my_insert dict (_, Nothing) = dict
my_insert dict (k, Just v) = Map.insert k v dict
+
+
+
+
+
+get_login_cookie :: C.Cfg -> IO C.Cfg
+get_login_cookie cfg
+ | isNothing (C.username cfg) = return cfg
+ | isNothing (C.password cfg) = return cfg
+ | otherwise = do
+ let uname = fromJust $ C.username cfg
+ let pword = fromJust $ C.password cfg
+ cj <- make_cookie_jar
+ li_result <- log_in cj uname pword
+
+ case li_result of
+ Left err -> do
+ let msg = "Failed to log in. " ++ err
+ hPutStrLn stderr msg
+ Right response_body -> do
+ hPutStrLn stderr response_body
+
+ return $ cfg { C.cookie_jar = Just cj }
+
+
+-- | Try to parse the given article using HXT. We try a few different
+-- methods; if none of them work, we return 'Nothing'.
+get_article_contents :: C.Cfg -> URL -> IO (Maybe String)
+get_article_contents cfg article_name = do
+ my_article <- real_article_path article_name
+ is_file <- doesFileExist my_article
+ case is_file of
+ True -> do
+ contents <- Utf8.readFile my_article
+ return $ Just $ contents
+ False -> do
+ -- Download the URL and try to parse it.
+ html <- get_page (C.cookie_jar cfg) my_article
+
+ case html of
+ Left err -> do
+ let msg = "Failed to retrieve page. " ++ err
+ hPutStrLn stderr msg
+ return Nothing
+ Right h -> return $ Just h