]> gitweb.michael.orlitzky.com - dead/lwn-epub.git/blob - src/LWN/HTTP.hs
7da6ed1488d97114247091476c26b57ace5cc7d8
[dead/lwn-epub.git] / src / LWN / HTTP.hs
1 {-# LANGUAGE DoAndIfThenElse #-}
2
3 module LWN.HTTP
4 where
5
6 import qualified Data.ByteString as B (hPut)
7 import qualified Data.Map as Map (Map, empty, insert)
8 import Data.Maybe (fromJust)
9 import Network.Curl (
10 CurlCode(..),
11 CurlOption(..),
12 CurlResponse,
13 do_curl_,
14 initialize,
15 respBody,
16 respCurlCode,
17 withCurlDo
18 )
19 import Network.Curl.Download (openURI)
20 import System.Directory (doesFileExist, getTemporaryDirectory)
21 import System.IO (hClose, hPutStrLn, stderr)
22 import qualified System.IO.UTF8 as Utf8 (readFile)
23 import System.IO.Temp (openBinaryTempFile, openTempFile)
24
25 import qualified Configuration as C (Cfg, password, use_account, username)
26 import LWN.Article (real_article_path)
27 import LWN.URI (URL, filename)
28
29 login_url :: URL
30 login_url = "https://lwn.net/login"
31
32 username_field :: String
33 username_field = "Username"
34
35 password_field :: String
36 password_field = "Password"
37
38 submit_field :: String
39 submit_field = "submit"
40
41
42 default_curl_opts :: [CurlOption]
43 default_curl_opts =
44 [ -- The Global cache is not thread-friendly.
45 CurlDNSUseGlobalCache False,
46
47 -- And we don't want to use a DNS cache anyway.
48 CurlDNSCacheTimeout 0,
49
50 -- Follow redirects.
51 CurlFollowLocation True,
52
53 -- Give it a little time...
54 CurlTimeout 45 ]
55
56
57 make_cookie_jar :: IO FilePath
58 make_cookie_jar = do
59 temp_dir <- getTemporaryDirectory
60 let file_name_template = "lwn-epub-cookies.txt"
61 (out_path, out_handle) <- openTempFile temp_dir file_name_template
62 hClose out_handle -- We just want to create it for now.
63 return out_path
64
65 get_page :: Maybe FilePath -> URL -> IO (Either String String)
66 get_page cookie_file url =
67 withCurlDo $ do
68 -- Create a curl instance.
69 curl <- initialize
70
71 -- Perform the request, and get back a CurlResponse object.
72 -- The cast is needed to specify how we would like our headers
73 -- and body returned (Strings).
74 resp <- do_curl_ curl url curl_opts :: IO CurlResponse
75
76 -- Pull out the response code as a CurlCode.
77 let code = respCurlCode resp
78
79 return $
80 case code of
81 CurlOK -> Right (respBody resp)
82 error_code -> Left ("HTTP Error: " ++ (show error_code))
83 -- If an error occurred, we want to dump as much information as
84 -- possible. If this becomes a problem, we can use respGetInfo to
85 -- query the response object for more information
86 where
87 get_opts =
88 case cookie_file of
89 Nothing -> []
90 Just cookies -> [ CurlCookieFile cookies ]
91
92 curl_opts = default_curl_opts ++ get_opts
93
94
95 -- | Log in using curl. Store the resulting session cookies in the
96 -- supplied file.
97 log_in :: FilePath -> String -> String -> IO (Either String String)
98 log_in cookie_jar username password =
99 withCurlDo $ do
100 -- Create a curl instance.
101 curl <- initialize
102
103 -- Perform the request, and get back a CurlResponse object.
104 -- The cast is needed to specify how we would like our headers
105 -- and body returned (Strings).
106 resp <- do_curl_ curl login_url curl_opts :: IO CurlResponse
107
108 -- Pull out the response code as a CurlCode.
109 let code = respCurlCode resp
110
111 return $
112 case code of
113 CurlOK -> Right (respBody resp)
114 error_code -> Left $ "HTTP Error: " ++ (show error_code)
115 -- If an error occurred, we want to dump as much information as
116 -- possible. If this becomes a problem, we can use respGetInfo to
117 -- query the response object for more information
118 where
119 post_submit :: String
120 post_submit = submit_field ++ "=Log+In"
121
122 post_username :: String
123 post_username = username_field ++ "=" ++ username
124
125 post_password :: String
126 post_password = password_field ++ "=" ++ password
127
128 post_data :: [String]
129 post_data = [post_username, post_password, post_submit]
130
131 post_opts :: [CurlOption]
132 post_opts =
133 [ CurlCookieSession True,
134 CurlCookieJar cookie_jar,
135 CurlPost True,
136 CurlPostFields post_data ]
137
138 curl_opts :: [CurlOption]
139 curl_opts = default_curl_opts ++ post_opts
140
141
142 -- | Save the image at 'url'. Saves to a temporary file, and
143 -- returns the path to that file if successful. Otherwise,
144 -- returns 'Nothing'.
145 --
146 -- We need to be able to parse the filename out of the URL
147 -- so that when we stick our image in the document, the reader
148 -- knows that type (jpg, png, etc.) it is.
149 save_image :: URL -> IO (Maybe FilePath)
150 save_image url = do
151 it_exists <- doesFileExist url
152 if it_exists then do
153 -- It's local, just use it.
154 return $ Just url
155 else do
156 let fn = filename url
157 case fn of
158 Nothing -> return Nothing
159 Just file -> do
160 temp_dir <- getTemporaryDirectory
161 (out_path, out_handle) <- openBinaryTempFile temp_dir file
162 result <- openURI url
163 case result of
164 Left err -> do
165 hPutStrLn stderr ("HTTP Error: " ++ err)
166 return Nothing
167 Right bs -> do
168 B.hPut out_handle bs
169 return $ Just out_path
170
171
172
173
174 -- | Map absolute image URLs to local system file paths where the
175 -- image referenced by the URL is stored.
176 type ImageMap = Map.Map URL FilePath
177
178 download_image_urls :: [URL] -> IO ImageMap
179 download_image_urls image_urls = do
180 files <- mapM save_image image_urls
181 let pairs = zip image_urls files
182 return $ foldl my_insert empty_map pairs
183 where
184 empty_map = Map.empty :: ImageMap
185
186 my_insert :: ImageMap -> (URL, Maybe FilePath) -> ImageMap
187 my_insert dict (_, Nothing) = dict
188 my_insert dict (k, Just v) = Map.insert k v dict
189
190
191 -- | Try to parse the given article using HXT. We try a few different
192 -- methods; if none of them work, we return 'Nothing'.
193 get_article_contents :: C.Cfg -> URL -> IO (Maybe String)
194 get_article_contents cfg article_name = do
195 my_article <- real_article_path article_name
196 is_file <- doesFileExist my_article
197 case is_file of
198 True -> do
199 contents <- Utf8.readFile my_article
200 return $ Just $ contents
201 False -> do
202 -- Download the URL and try to parse it.
203 if C.use_account cfg then do
204 -- use_account would be false if these fromJusts would fail.
205 cj <- make_cookie_jar
206 li_result <- log_in cj
207 (fromJust $ C.username cfg)
208 (fromJust $ C.password cfg)
209
210 case li_result of
211 Left err -> do
212 let msg = "Failed to log in. " ++ err
213 hPutStrLn stderr msg
214 Right response_body -> do
215 hPutStrLn stderr response_body
216
217 html <- get_page (Just cj) my_article
218
219 case html of
220 Left err -> do
221 let msg = "Failed to retrieve page. " ++ err
222 hPutStrLn stderr msg
223 return Nothing
224 Right h -> return $ Just h
225 else do
226 html <- get_page Nothing my_article
227 case html of
228 Left err -> do
229 let msg = "Failed to retrieve page. " ++ err
230 hPutStrLn stderr msg
231 return Nothing
232 Right h -> return $ Just h