{-# LANGUAGE DoAndIfThenElse #-} module LWN.HTTP where import qualified Data.ByteString as B (hPut) import qualified Data.Map as Map (Map, empty, insert) import Network.Curl ( CurlCode(..), CurlOption(..), CurlResponse, do_curl_, initialize, respBody, respCurlCode, withCurlDo ) import Network.Curl.Download (openURI) import System.Directory (doesFileExist, getTemporaryDirectory) import System.IO (hClose, hPutStrLn, stderr) import System.IO.Temp (openBinaryTempFile, openTempFile) import LWN.URI (URL, filename) login_url :: URL login_url = "https://lwn.net/login" username_field :: String username_field = "Username" password_field :: String password_field = "Password" submit_field :: String submit_field = "submit" default_curl_opts :: [CurlOption] default_curl_opts = [ -- The Global cache is not thread-friendly. CurlDNSUseGlobalCache False, -- And we don't want to use a DNS cache anyway. CurlDNSCacheTimeout 0, -- Follow redirects. CurlFollowLocation True, -- Give it a little time... CurlTimeout 45 ] make_cookie_jar :: IO FilePath make_cookie_jar = do temp_dir <- getTemporaryDirectory let file_name_template = "lwn-epub-cookies.txt" (out_path, out_handle) <- openTempFile temp_dir file_name_template hClose out_handle -- We just want to create it for now. return out_path get_page :: Maybe FilePath -> URL -> IO (Either String String) get_page cookie_file url = withCurlDo $ do -- Create a curl instance. curl <- initialize -- Perform the request, and get back a CurlResponse object. -- The cast is needed to specify how we would like our headers -- and body returned (Strings). resp <- do_curl_ curl url curl_opts :: IO CurlResponse -- Pull out the response code as a CurlCode. let code = respCurlCode resp return $ case code of CurlOK -> Right (respBody resp) error_code -> Left ("HTTP Error: " ++ (show error_code)) -- If an error occurred, we want to dump as much information as -- possible. If this becomes a problem, we can use respGetInfo to -- query the response object for more information where get_opts = case cookie_file of Nothing -> [] Just cookies -> [ CurlCookieFile cookies ] curl_opts = default_curl_opts ++ get_opts -- | Log in using curl. Store the resulting session cookies in the -- supplied file. log_in :: FilePath -> String -> String -> IO (Either String String) log_in cookie_jar username password = withCurlDo $ do -- Create a curl instance. curl <- initialize -- Perform the request, and get back a CurlResponse object. -- The cast is needed to specify how we would like our headers -- and body returned (Strings). resp <- do_curl_ curl login_url curl_opts :: IO CurlResponse -- Pull out the response code as a CurlCode. let code = respCurlCode resp return $ case code of CurlOK -> Right (respBody resp) error_code -> Left $ "HTTP Error: " ++ (show error_code) -- If an error occurred, we want to dump as much information as -- possible. If this becomes a problem, we can use respGetInfo to -- query the response object for more information where post_submit :: String post_submit = submit_field ++ "=Log+In" post_username :: String post_username = username_field ++ "=" ++ username post_password :: String post_password = password_field ++ "=" ++ password post_data :: [String] post_data = [post_username, post_password, post_submit] post_opts :: [CurlOption] post_opts = [ CurlCookieSession True, CurlCookieJar cookie_jar, CurlPost True, CurlPostFields post_data ] curl_opts :: [CurlOption] curl_opts = default_curl_opts ++ post_opts -- | Save the image at 'url'. Saves to a temporary file, and -- returns the path to that file if successful. Otherwise, -- returns 'Nothing'. -- -- We need to be able to parse the filename out of the URL -- so that when we stick our image in the document, the reader -- knows that type (jpg, png, etc.) it is. save_image :: URL -> IO (Maybe FilePath) save_image url = do it_exists <- doesFileExist url if it_exists then do -- It's local, just use it. return $ Just url else do let fn = filename url case fn of Nothing -> return Nothing Just file -> do temp_dir <- getTemporaryDirectory (out_path, out_handle) <- openBinaryTempFile temp_dir file result <- openURI url case result of Left err -> do hPutStrLn stderr ("HTTP Error: " ++ err) return Nothing Right bs -> do B.hPut out_handle bs return $ Just out_path -- | Map absolute image URLs to local system file paths where the -- image referenced by the URL is stored. type ImageMap = Map.Map URL FilePath download_image_urls :: [URL] -> IO ImageMap download_image_urls image_urls = do files <- mapM save_image image_urls let pairs = zip image_urls files return $ foldl my_insert empty_map pairs where empty_map = Map.empty :: ImageMap my_insert :: ImageMap -> (URL, Maybe FilePath) -> ImageMap my_insert dict (_, Nothing) = dict my_insert dict (k, Just v) = Map.insert k v dict