Only fetch login cookies once.
[dead/lwn-epub.git] / src / LWN / HTTP.hs
1 {-# LANGUAGE DoAndIfThenElse #-}
2
3 module LWN.HTTP
4 where
5
6 import qualified Data.ByteString as B (hPut)
7 import qualified Data.Map as Map (Map, empty, insert)
8 import Data.Maybe (fromJust, isNothing)
9 import Network.Curl (
10 CurlCode(..),
11 CurlOption(..),
12 CurlResponse,
13 do_curl_,
14 initialize,
15 respBody,
16 respCurlCode,
17 withCurlDo
18 )
19 import Network.Curl.Download (openURI)
20 import System.Directory (doesFileExist, getTemporaryDirectory)
21 import System.IO (hClose, hPutStrLn, stderr)
22 import qualified System.IO.UTF8 as Utf8 (readFile)
23 import System.IO.Temp (openBinaryTempFile, openTempFile)
24
25 import qualified Configuration as C (Cfg(..))
26 import LWN.Article (real_article_path)
27 import LWN.URI (URL, filename)
28
29 login_url :: URL
30 login_url = "https://lwn.net/login"
31
32 username_field :: String
33 username_field = "Username"
34
35 password_field :: String
36 password_field = "Password"
37
38 submit_field :: String
39 submit_field = "submit"
40
41
42 default_curl_opts :: [CurlOption]
43 default_curl_opts =
44 [ -- The Global cache is not thread-friendly.
45 CurlDNSUseGlobalCache False,
46
47 -- And we don't want to use a DNS cache anyway.
48 CurlDNSCacheTimeout 0,
49
50 -- Follow redirects.
51 CurlFollowLocation True,
52
53 -- Give it a little time...
54 CurlTimeout 45 ]
55
56
57 make_cookie_jar :: IO FilePath
58 make_cookie_jar = do
59 temp_dir <- getTemporaryDirectory
60 let file_name_template = "lwn-epub-cookies.txt"
61 (out_path, out_handle) <- openTempFile temp_dir file_name_template
62 hClose out_handle -- We just want to create it for now.
63 return out_path
64
65 get_page :: Maybe FilePath -> URL -> IO (Either String String)
66 get_page cookie_file url =
67 withCurlDo $ do
68 -- Create a curl instance.
69 curl <- initialize
70
71 -- Perform the request, and get back a CurlResponse object.
72 -- The cast is needed to specify how we would like our headers
73 -- and body returned (Strings).
74 resp <- do_curl_ curl url curl_opts :: IO CurlResponse
75
76 -- Pull out the response code as a CurlCode.
77 let code = respCurlCode resp
78
79 return $
80 case code of
81 CurlOK -> Right (respBody resp)
82 error_code -> Left ("HTTP Error: " ++ (show error_code))
83 -- If an error occurred, we want to dump as much information as
84 -- possible. If this becomes a problem, we can use respGetInfo to
85 -- query the response object for more information
86 where
87 get_opts =
88 case cookie_file of
89 Nothing -> []
90 Just cookies -> [ CurlCookieFile cookies ]
91
92 curl_opts = default_curl_opts ++ get_opts
93
94
95 -- | Log in using curl. Store the resulting session cookies in the
96 -- supplied file.
97 log_in :: FilePath -> String -> String -> IO (Either String String)
98 log_in cookie_jar username password =
99 withCurlDo $ do
100 -- Create a curl instance.
101 curl <- initialize
102
103 -- Perform the request, and get back a CurlResponse object.
104 -- The cast is needed to specify how we would like our headers
105 -- and body returned (Strings).
106 resp <- do_curl_ curl login_url curl_opts :: IO CurlResponse
107
108 -- Pull out the response code as a CurlCode.
109 let code = respCurlCode resp
110
111 return $
112 case code of
113 CurlOK -> Right (respBody resp)
114 error_code -> Left $ "HTTP Error: " ++ (show error_code)
115 -- If an error occurred, we want to dump as much information as
116 -- possible. If this becomes a problem, we can use respGetInfo to
117 -- query the response object for more information
118 where
119 post_submit :: String
120 post_submit = submit_field ++ "=Log+In"
121
122 post_username :: String
123 post_username = username_field ++ "=" ++ username
124
125 post_password :: String
126 post_password = password_field ++ "=" ++ password
127
128 post_data :: [String]
129 post_data = [post_username, post_password, post_submit]
130
131 post_opts :: [CurlOption]
132 post_opts =
133 [ CurlCookieSession True,
134 CurlCookieJar cookie_jar,
135 CurlPost True,
136 CurlPostFields post_data ]
137
138 curl_opts :: [CurlOption]
139 curl_opts = default_curl_opts ++ post_opts
140
141
142 -- | Save the image at 'url'. Saves to a temporary file, and
143 -- returns the path to that file if successful. Otherwise,
144 -- returns 'Nothing'.
145 --
146 -- We need to be able to parse the filename out of the URL
147 -- so that when we stick our image in the document, the reader
148 -- knows that type (jpg, png, etc.) it is.
149 save_image :: URL -> IO (Maybe FilePath)
150 save_image url = do
151 it_exists <- doesFileExist url
152 if it_exists then do
153 -- It's local, just use it.
154 return $ Just url
155 else do
156 let fn = filename url
157 case fn of
158 Nothing -> return Nothing
159 Just file -> do
160 temp_dir <- getTemporaryDirectory
161 (out_path, out_handle) <- openBinaryTempFile temp_dir file
162 result <- openURI url
163 case result of
164 Left err -> do
165 hPutStrLn stderr ("HTTP Error: " ++ err)
166 return Nothing
167 Right bs -> do
168 B.hPut out_handle bs
169 return $ Just out_path
170
171
172
173
174 -- | Map absolute image URLs to local system file paths where the
175 -- image referenced by the URL is stored.
176 type ImageMap = Map.Map URL FilePath
177
178 download_image_urls :: [URL] -> IO ImageMap
179 download_image_urls image_urls = do
180 files <- mapM save_image image_urls
181 let pairs = zip image_urls files
182 return $ foldl my_insert empty_map pairs
183 where
184 empty_map = Map.empty :: ImageMap
185
186 my_insert :: ImageMap -> (URL, Maybe FilePath) -> ImageMap
187 my_insert dict (_, Nothing) = dict
188 my_insert dict (k, Just v) = Map.insert k v dict
189
190
191
192
193
194 get_login_cookie :: C.Cfg -> IO C.Cfg
195 get_login_cookie cfg
196 | isNothing (C.username cfg) = return cfg
197 | isNothing (C.password cfg) = return cfg
198 | otherwise = do
199 let uname = fromJust $ C.username cfg
200 let pword = fromJust $ C.password cfg
201 cj <- make_cookie_jar
202 li_result <- log_in cj uname pword
203
204 case li_result of
205 Left err -> do
206 let msg = "Failed to log in. " ++ err
207 hPutStrLn stderr msg
208 Right response_body -> do
209 hPutStrLn stderr response_body
210
211 return $ cfg { C.cookie_jar = Just cj }
212
213
214 -- | Try to parse the given article using HXT. We try a few different
215 -- methods; if none of them work, we return 'Nothing'.
216 get_article_contents :: C.Cfg -> URL -> IO (Maybe String)
217 get_article_contents cfg article_name = do
218 my_article <- real_article_path article_name
219 is_file <- doesFileExist my_article
220 case is_file of
221 True -> do
222 contents <- Utf8.readFile my_article
223 return $ Just $ contents
224 False -> do
225 -- Download the URL and try to parse it.
226 html <- get_page (C.cookie_jar cfg) my_article
227
228 case html of
229 Left err -> do
230 let msg = "Failed to retrieve page. " ++ err
231 hPutStrLn stderr msg
232 return Nothing
233 Right h -> return $ Just h