module LWN.HTTP
where
+import Control.Concurrent.ParallelIO (parallel)
import qualified Data.ByteString as B (hPut)
-
+import qualified Data.Map as Map (Map, empty, insert)
+import Data.Maybe (fromJust, isNothing)
import Network.Curl (
CurlCode(..),
CurlOption(..),
CurlResponse,
- URLString,
do_curl_,
initialize,
respBody,
respCurlCode,
- withCurlDo
+ withCurlDo
)
import Network.Curl.Download (openURI)
import System.Directory (doesFileExist, getTemporaryDirectory)
-import System.IO (hClose, hPutStrLn, stderr, stdout)
+import System.IO (hClose, hPutStrLn, stderr)
+import qualified System.IO.UTF8 as Utf8 (readFile)
import System.IO.Temp (openBinaryTempFile, openTempFile)
-import LWN.URI (filename)
+import qualified Configuration as C (Cfg(..))
+import LWN.Article (real_article_path)
+import LWN.URI (URL, filename)
-login_url :: URLString
+login_url :: URL
login_url = "https://lwn.net/login"
username_field :: String
default_curl_opts =
[ -- The Global cache is not thread-friendly.
CurlDNSUseGlobalCache False,
-
+
-- And we don't want to use a DNS cache anyway.
CurlDNSCacheTimeout 0,
CurlFollowLocation True,
-- Give it a little time...
- CurlTimeout 45,
-
- -- For debugging.
- CurlVerbose True ]
+ CurlTimeout 45 ]
make_cookie_jar :: IO FilePath
hClose out_handle -- We just want to create it for now.
return out_path
-get_page :: Maybe FilePath -> URLString -> IO (Maybe String)
+get_page :: Maybe FilePath -> URL -> IO (Either String String)
get_page cookie_file url =
withCurlDo $ do
- hPutStrLn stdout ("Getting page: " ++ url ++ "...")
-
-- Create a curl instance.
curl <- initialize
-- Perform the request, and get back a CurlResponse object.
-- The cast is needed to specify how we would like our headers
-- and body returned (Strings).
- putStrLn "Curl options:"
- print curl_opts
-
resp <- do_curl_ curl url curl_opts :: IO CurlResponse
-- Pull out the response code as a CurlCode.
let code = respCurlCode resp
- case code of
- CurlOK -> return $ Just (respBody resp)
- error_code -> do
- hPutStrLn stderr ("HTTP Error: " ++ (show error_code))
+ return $
+ case code of
+ CurlOK -> Right (respBody resp)
+ error_code -> Left ("HTTP Error: " ++ (show error_code))
-- If an error occurred, we want to dump as much information as
-- possible. If this becomes a problem, we can use respGetInfo to
-- query the response object for more information
- return Nothing
where
get_opts =
case cookie_file of
curl_opts = default_curl_opts ++ get_opts
-log_in :: FilePath -> String -> String -> IO Bool
+-- | Log in using curl. Store the resulting session cookies in the
+-- supplied file.
+log_in :: FilePath -> String -> String -> IO (Either String String)
log_in cookie_jar username password =
withCurlDo $ do
- hPutStrLn stdout ("Logging " ++ username ++ " in...")
-
-- Create a curl instance.
curl <- initialize
-- Pull out the response code as a CurlCode.
let code = respCurlCode resp
- case code of
- CurlOK -> return True
- error_code -> do
- hPutStrLn stderr ("HTTP Error: " ++ (show error_code))
+ return $
+ case code of
+ CurlOK -> Right (respBody resp)
+ error_code -> Left $ "HTTP Error: " ++ (show error_code)
-- If an error occurred, we want to dump as much information as
-- possible. If this becomes a problem, we can use respGetInfo to
-- query the response object for more information
- return False
where
post_submit :: String
post_submit = submit_field ++ "=Log+In"
CurlCookieJar cookie_jar,
CurlPost True,
CurlPostFields post_data ]
-
+
curl_opts :: [CurlOption]
curl_opts = default_curl_opts ++ post_opts
-- We need to be able to parse the filename out of the URL
-- so that when we stick our image in the document, the reader
-- knows that type (jpg, png, etc.) it is.
-save_image :: URLString -> IO (Maybe FilePath)
+save_image :: URL -> IO (Maybe FilePath)
save_image url = do
it_exists <- doesFileExist url
if it_exists then do
Right bs -> do
B.hPut out_handle bs
return $ Just out_path
+
+
+
+
+-- | Map absolute image URLs to local system file paths where the
+-- image referenced by the URL is stored.
+type ImageMap = Map.Map URL FilePath
+
+download_image_urls :: [URL] -> IO ImageMap
+download_image_urls image_urls = do
+ files <- parallel $ map save_image image_urls
+ let pairs = zip image_urls files
+ return $ foldl my_insert empty_map pairs
+ where
+ empty_map = Map.empty :: ImageMap
+
+ my_insert :: ImageMap -> (URL, Maybe FilePath) -> ImageMap
+ my_insert dict (_, Nothing) = dict
+ my_insert dict (k, Just v) = Map.insert k v dict
+
+
+
+
+
+get_login_cookie :: C.Cfg -> IO C.Cfg
+get_login_cookie cfg
+ | isNothing (C.username cfg) = return cfg
+ | isNothing (C.password cfg) = return cfg
+ | otherwise = do
+ let uname = fromJust $ C.username cfg
+ let pword = fromJust $ C.password cfg
+ cj <- make_cookie_jar
+ li_result <- log_in cj uname pword
+
+ case li_result of
+ Left err -> do
+ let msg = "Failed to log in. " ++ err
+ hPutStrLn stderr msg
+ Right response_body -> do
+ hPutStrLn stderr response_body
+
+ return $ cfg { C.cookie_jar = Just cj }
+
+
+-- | Try to parse the given article using HXT. We try a few different
+-- methods; if none of them work, we return 'Nothing'.
+get_article_contents :: C.Cfg -> URL -> IO (Maybe String)
+get_article_contents cfg article_name = do
+ my_article <- real_article_path article_name
+ is_file <- doesFileExist my_article
+ case is_file of
+ True -> do
+ contents <- Utf8.readFile my_article
+ return $ Just $ contents
+ False -> do
+ -- Download the URL and try to parse it.
+ html <- get_page (C.cookie_jar cfg) my_article
+
+ case html of
+ Left err -> do
+ let msg = "Failed to retrieve page. " ++ err
+ hPutStrLn stderr msg
+ return Nothing
+ Right h -> return $ Just h