]> gitweb.michael.orlitzky.com - dead/lwn-epub.git/blob - src/LWN/HTTP.hs
Replace the curl routines with http-conduit ones.
[dead/lwn-epub.git] / src / LWN / HTTP.hs
1 {-# LANGUAGE DoAndIfThenElse #-}
2
3 module LWN.HTTP
4 where
5
6 import Control.Concurrent.ParallelIO (parallel)
7 import qualified Data.ByteString.Char8 as C (ByteString, pack)
8 import qualified Data.ByteString.Lazy as L (ByteString, hPut)
9 import Data.ByteString.Lazy.UTF8 (toString)
10 import qualified Data.Map as Map (Map, empty, insert)
11 import Data.Maybe (fromJust, isNothing)
12 import Data.Time (getCurrentTime)
13 import Network.HTTP.Conduit (
14 CookieJar,
15 Response(..),
16 Request(..),
17 httpLbs,
18 insertCookiesIntoRequest,
19 parseUrl,
20 updateCookieJar,
21 urlEncodedBody,
22 withManager
23 )
24 import Network.HTTP.Types (status200, status302)
25 import Network.HTTP.Types.Method (methodPost)
26 import System.Directory (doesFileExist, getTemporaryDirectory)
27 import System.IO (hPutStrLn, stderr)
28 import qualified System.IO.UTF8 as Utf8 (readFile)
29 import System.IO.Temp (openBinaryTempFile)
30
31 -- Also grab the empty cookie jar from Configuration since we'll
32 -- use it in a few places.
33 import qualified Configuration as C (Cfg(..), cj_empty)
34 import LWN.Article (real_article_path)
35 import LWN.URI (URL, filename)
36
37 -- | The type of response we get back from http-conduit's httpLbs.
38 type LBSResponse = Response L.ByteString
39
40
41 login_url :: URL
42 login_url = "https://lwn.net/login"
43
44 username_field :: C.ByteString
45 username_field = C.pack "Username"
46
47 password_field :: C.ByteString
48 password_field = C.pack "Password"
49
50 submit_field :: C.ByteString
51 submit_field = C.pack "submit"
52
53
54 -- | Get the requested URL as a L.ByteString, or return the response
55 -- as an error. Use the given cookie jar for the request.
56 get_page :: CookieJar -> URL -> IO (Either LBSResponse L.ByteString)
57 get_page cj url = do
58 init_req <- parseUrl url
59 let req' = init_req { checkStatus = \_ _ -> Nothing }
60 now <- getCurrentTime
61 let (req, _) = insertCookiesIntoRequest req' cj now
62 resp <- withManager $ httpLbs req
63
64 return $ if (responseStatus resp) == status200 then
65 Right (responseBody resp)
66 else
67 Left resp
68
69
70 -- | Log in using curl. Store the resulting session cookies in the
71 -- supplied file.
72 log_in :: String -> String -> IO (Either LBSResponse CookieJar)
73 log_in username password = do
74 init_req <- parseUrl login_url
75 let req' = init_req { method = methodPost,
76 checkStatus = \_ _ -> Nothing,
77 redirectCount = 0 }
78 let req = urlEncodedBody post_data req'
79
80 resp <- withManager $ httpLbs req
81
82 -- The login page redirects. If we follow it, we lose our cookies.
83 if (responseStatus resp) == status302 then do
84 now <- getCurrentTime
85 let (cj,_) = updateCookieJar resp req now C.cj_empty
86 return $ Right cj
87 else do
88 return $ Left resp
89
90 where
91 post_submit :: (C.ByteString, C.ByteString)
92 post_submit = (submit_field, C.pack "Log+In")
93
94 post_username :: (C.ByteString, C.ByteString)
95 post_username = (username_field, C.pack username)
96
97 post_password :: (C.ByteString, C.ByteString)
98 post_password = (password_field, C.pack password)
99
100 post_data :: [(C.ByteString, C.ByteString)]
101 post_data = [post_username, post_password, post_submit]
102
103
104
105 -- | Save the image at 'url'. Saves to a temporary file, and
106 -- returns the path to that file if successful. Otherwise,
107 -- returns 'Nothing'.
108 --
109 -- We need to be able to parse the filename out of the URL
110 -- so that when we stick our image in the document, the reader
111 -- knows that type (jpg, png, etc.) it is.
112 save_image :: URL -> IO (Maybe FilePath)
113 save_image url = do
114 it_exists <- doesFileExist url
115 if it_exists then do
116 -- It's local, just use it.
117 return $ Just url
118 else do
119 let fn = filename url
120 case fn of
121 Nothing -> return Nothing
122 Just file -> do
123 temp_dir <- getTemporaryDirectory
124 (out_path, out_handle) <- openBinaryTempFile temp_dir file
125 -- We don't need to be logged in to get the images, so use an
126 -- empty cookie jar.
127 result <- get_page C.cj_empty url
128 case result of
129 Left err -> do
130 hPutStrLn stderr $ "Failed to retrieve image. " ++
131 "Server response:\n" ++ (show err)
132 return Nothing
133 Right bs -> do
134 L.hPut out_handle bs
135 return $ Just out_path
136
137
138
139
140 -- | Map absolute image URLs to local system file paths where the
141 -- image referenced by the URL is stored.
142 type ImageMap = Map.Map URL FilePath
143
144 download_image_urls :: [URL] -> IO ImageMap
145 download_image_urls image_urls = do
146 files <- parallel $ map save_image image_urls
147 let pairs = zip image_urls files
148 return $ foldl my_insert empty_map pairs
149 where
150 empty_map = Map.empty :: ImageMap
151
152 my_insert :: ImageMap -> (URL, Maybe FilePath) -> ImageMap
153 my_insert dict (_, Nothing) = dict
154 my_insert dict (k, Just v) = Map.insert k v dict
155
156
157
158
159
160 get_login_cookie :: C.Cfg -> IO C.Cfg
161 get_login_cookie cfg
162 | isNothing (C.username cfg) = return cfg
163 | isNothing (C.password cfg) = return cfg
164 | otherwise = do
165 let uname = fromJust $ C.username cfg
166 let pword = fromJust $ C.password cfg
167 li_result <- log_in uname pword
168
169 case li_result of
170 Left err -> do
171 let msg = "Failed to log in. Server response:\n" ++ (show err)
172 hPutStrLn stderr msg
173 return cfg
174 Right cj -> return $ cfg { C.cookie_jar = cj }
175
176
177 -- | Try to parse the given article using HXT. We try a few different
178 -- methods; if none of them work, we return 'Nothing'.
179 get_article_contents :: C.Cfg -> URL -> IO (Maybe String)
180 get_article_contents cfg article_name = do
181 my_article <- real_article_path article_name
182 is_file <- doesFileExist my_article
183 case is_file of
184 True -> do
185 contents <- Utf8.readFile my_article
186 return $ Just $ contents
187 False -> do
188 -- Download the URL.
189 html <- get_page (C.cookie_jar cfg) my_article
190
191 case html of
192 Left err -> do
193 let msg = "Failed to retrieve article. " ++
194 "Server response:\n" ++ (show err)
195 hPutStrLn stderr msg
196 return Nothing
197 Right lbs_article ->
198 return $ Just (toString lbs_article)