From: Michael Orlitzky Date: Tue, 26 Jun 2012 20:24:38 +0000 (-0400) Subject: Add curl-fu for logging in and retrieving pages. X-Git-Tag: v0.0.1~44 X-Git-Url: http://gitweb.michael.orlitzky.com/?p=dead%2Flwn-epub.git;a=commitdiff_plain;h=241e9b547e48d4af7eb37ca6989775c0d09152bc Add curl-fu for logging in and retrieving pages. --- diff --git a/src/LWN/HTTP.hs b/src/LWN/HTTP.hs new file mode 100644 index 0000000..8b4c14d --- /dev/null +++ b/src/LWN/HTTP.hs @@ -0,0 +1,120 @@ +module LWN.HTTP +where + +import Network.Curl ( + CurlCode(..), + CurlOption(..), + CurlResponse, + URLString, + do_curl_, + initialize, + respBody, + respCurlCode, + withCurlDo + ) +import System.IO (hPutStrLn, stderr) + +login_url :: URLString +login_url = "https://lwn.net/login" + +username_field :: String +username_field = "Username" + +password_field :: String +password_field = "Password" + +submit_field :: String +submit_field = "submit" + + +default_curl_opts :: [CurlOption] +default_curl_opts = + [ -- The Global cache is not thread-friendly. + CurlDNSUseGlobalCache False, + + -- And we don't want to use a DNS cache anyway. + CurlDNSCacheTimeout 0, + + -- Give it a little time... + CurlTimeout 45, + + -- And let us know when things go wrong. + CurlVerbose True ] + + + +get_page :: Maybe FilePath -> URLString -> IO (Maybe String) +get_page cookie_jar url = + withCurlDo $ do + -- Create a curl instance. + curl <- initialize + + -- Perform the request, and get back a CurlResponse object. + -- The cast is needed to specify how we would like our headers + -- and body returned (Strings). + resp <- do_curl_ curl login_url curl_opts :: IO CurlResponse + + -- Pull out the response code as a CurlCode. + let code = respCurlCode resp + + case code of + CurlOK -> return $ Just (respBody resp) + error_code -> do + hPutStrLn stderr ("HTTP Error: " ++ (show error_code)) + -- If an error occurred, we want to dump as much information as + -- possible. If this becomes a problem, we can use respGetInfo to + -- query the response object for more information + return Nothing + where + get_opts = + case cookie_jar of + Nothing -> [] + Just cookies -> [ CurlCookieJar cookies ] + + curl_opts = default_curl_opts ++ get_opts + + +log_in :: FilePath -> String -> String -> IO Bool +log_in cookie_jar username password = + withCurlDo $ do + -- Create a curl instance. + curl <- initialize + + -- Perform the request, and get back a CurlResponse object. + -- The cast is needed to specify how we would like our headers + -- and body returned (Strings). + resp <- do_curl_ curl login_url curl_opts :: IO CurlResponse + + -- Pull out the response code as a CurlCode. + let code = respCurlCode resp + + case code of + CurlOK -> return True + error_code -> do + hPutStrLn stderr ("HTTP Error: " ++ (show error_code)) + -- If an error occurred, we want to dump as much information as + -- possible. If this becomes a problem, we can use respGetInfo to + -- query the response object for more information + return False + where + post_submit :: String + post_submit = submit_field ++ "=Log+In" + + post_username :: String + post_username = username_field ++ "=" ++ username + + post_password :: String + post_password = password_field ++ "=" ++ password + + post_data :: [String] + post_data = [post_username, post_password] + + post_opts :: [CurlOption] + post_opts = + [ CurlCookieSession True, + CurlCookieJar cookie_jar, + CurlPost True, + CurlPostFields post_data ] + + curl_opts :: [CurlOption] + curl_opts = default_curl_opts ++ post_opts