Source reorganization and cleanup.
[dead/lwn-epub.git] / src / LWN / HTTP.hs
1 {-# LANGUAGE DoAndIfThenElse #-}
2
3 module LWN.HTTP
4 where
5
6 import qualified Data.ByteString as B (hPut)
7 import qualified Data.Map as Map (Map, empty, insert)
8 import Network.Curl (
9 CurlCode(..),
10 CurlOption(..),
11 CurlResponse,
12 do_curl_,
13 initialize,
14 respBody,
15 respCurlCode,
16 withCurlDo
17 )
18 import Network.Curl.Download (openURI)
19 import System.Directory (doesFileExist, getTemporaryDirectory)
20 import System.IO (hClose, hPutStrLn, stderr)
21 import System.IO.Temp (openBinaryTempFile, openTempFile)
22
23 import LWN.URI (URL, filename)
24
25 login_url :: URL
26 login_url = "https://lwn.net/login"
27
28 username_field :: String
29 username_field = "Username"
30
31 password_field :: String
32 password_field = "Password"
33
34 submit_field :: String
35 submit_field = "submit"
36
37
38 default_curl_opts :: [CurlOption]
39 default_curl_opts =
40 [ -- The Global cache is not thread-friendly.
41 CurlDNSUseGlobalCache False,
42
43 -- And we don't want to use a DNS cache anyway.
44 CurlDNSCacheTimeout 0,
45
46 -- Follow redirects.
47 CurlFollowLocation True,
48
49 -- Give it a little time...
50 CurlTimeout 45 ]
51
52
53 make_cookie_jar :: IO FilePath
54 make_cookie_jar = do
55 temp_dir <- getTemporaryDirectory
56 let file_name_template = "lwn-epub-cookies.txt"
57 (out_path, out_handle) <- openTempFile temp_dir file_name_template
58 hClose out_handle -- We just want to create it for now.
59 return out_path
60
61 get_page :: Maybe FilePath -> URL -> IO (Either String String)
62 get_page cookie_file url =
63 withCurlDo $ do
64 -- Create a curl instance.
65 curl <- initialize
66
67 -- Perform the request, and get back a CurlResponse object.
68 -- The cast is needed to specify how we would like our headers
69 -- and body returned (Strings).
70 resp <- do_curl_ curl url curl_opts :: IO CurlResponse
71
72 -- Pull out the response code as a CurlCode.
73 let code = respCurlCode resp
74
75 return $
76 case code of
77 CurlOK -> Right (respBody resp)
78 error_code -> Left ("HTTP Error: " ++ (show error_code))
79 -- If an error occurred, we want to dump as much information as
80 -- possible. If this becomes a problem, we can use respGetInfo to
81 -- query the response object for more information
82 where
83 get_opts =
84 case cookie_file of
85 Nothing -> []
86 Just cookies -> [ CurlCookieFile cookies ]
87
88 curl_opts = default_curl_opts ++ get_opts
89
90
91 -- | Log in using curl. Store the resulting session cookies in the
92 -- supplied file.
93 log_in :: FilePath -> String -> String -> IO (Either String String)
94 log_in cookie_jar username password =
95 withCurlDo $ do
96 -- Create a curl instance.
97 curl <- initialize
98
99 -- Perform the request, and get back a CurlResponse object.
100 -- The cast is needed to specify how we would like our headers
101 -- and body returned (Strings).
102 resp <- do_curl_ curl login_url curl_opts :: IO CurlResponse
103
104 -- Pull out the response code as a CurlCode.
105 let code = respCurlCode resp
106
107 return $
108 case code of
109 CurlOK -> Right (respBody resp)
110 error_code -> Left $ "HTTP Error: " ++ (show error_code)
111 -- If an error occurred, we want to dump as much information as
112 -- possible. If this becomes a problem, we can use respGetInfo to
113 -- query the response object for more information
114 where
115 post_submit :: String
116 post_submit = submit_field ++ "=Log+In"
117
118 post_username :: String
119 post_username = username_field ++ "=" ++ username
120
121 post_password :: String
122 post_password = password_field ++ "=" ++ password
123
124 post_data :: [String]
125 post_data = [post_username, post_password, post_submit]
126
127 post_opts :: [CurlOption]
128 post_opts =
129 [ CurlCookieSession True,
130 CurlCookieJar cookie_jar,
131 CurlPost True,
132 CurlPostFields post_data ]
133
134 curl_opts :: [CurlOption]
135 curl_opts = default_curl_opts ++ post_opts
136
137
138 -- | Save the image at 'url'. Saves to a temporary file, and
139 -- returns the path to that file if successful. Otherwise,
140 -- returns 'Nothing'.
141 --
142 -- We need to be able to parse the filename out of the URL
143 -- so that when we stick our image in the document, the reader
144 -- knows that type (jpg, png, etc.) it is.
145 save_image :: URL -> IO (Maybe FilePath)
146 save_image url = do
147 it_exists <- doesFileExist url
148 if it_exists then do
149 -- It's local, just use it.
150 return $ Just url
151 else do
152 let fn = filename url
153 case fn of
154 Nothing -> return Nothing
155 Just file -> do
156 temp_dir <- getTemporaryDirectory
157 (out_path, out_handle) <- openBinaryTempFile temp_dir file
158 result <- openURI url
159 case result of
160 Left err -> do
161 hPutStrLn stderr ("HTTP Error: " ++ err)
162 return Nothing
163 Right bs -> do
164 B.hPut out_handle bs
165 return $ Just out_path
166
167
168
169
170 -- | Map absolute image URLs to local system file paths where the
171 -- image referenced by the URL is stored.
172 type ImageMap = Map.Map URL FilePath
173
174 download_image_urls :: [URL] -> IO ImageMap
175 download_image_urls image_urls = do
176 files <- mapM save_image image_urls
177 let pairs = zip image_urls files
178 return $ foldl my_insert empty_map pairs
179 where
180 empty_map = Map.empty :: ImageMap
181
182 my_insert :: ImageMap -> (URL, Maybe FilePath) -> ImageMap
183 my_insert dict (_, Nothing) = dict
184 my_insert dict (k, Just v) = Map.insert k v dict