1 {-# LANGUAGE DoAndIfThenElse #-}
6 import Control.Concurrent.ParallelIO (parallel)
7 import qualified Data.ByteString as B (hPut)
8 import qualified Data.Map as Map (Map, empty, insert)
9 import Data.Maybe (fromJust, isNothing)
20 import Network.Curl.Download (openURI)
21 import System.Directory (doesFileExist, getTemporaryDirectory)
22 import System.IO (hClose, hPutStrLn, stderr)
23 import qualified System.IO.UTF8 as Utf8 (readFile)
24 import System.IO.Temp (openBinaryTempFile, openTempFile)
26 import qualified Configuration as C (Cfg(..))
27 import LWN.Article (real_article_path)
28 import LWN.URI (URL, filename)
31 login_url = "https://lwn.net/login"
33 username_field :: String
34 username_field = "Username"
36 password_field :: String
37 password_field = "Password"
39 submit_field :: String
40 submit_field = "submit"
43 default_curl_opts :: [CurlOption]
45 [ -- The Global cache is not thread-friendly.
46 CurlDNSUseGlobalCache False,
48 -- And we don't want to use a DNS cache anyway.
49 CurlDNSCacheTimeout 0,
52 CurlFollowLocation True,
54 -- Give it a little time...
58 make_cookie_jar :: IO FilePath
60 temp_dir <- getTemporaryDirectory
61 let file_name_template = "lwn-epub-cookies.txt"
62 (out_path, out_handle) <- openTempFile temp_dir file_name_template
63 hClose out_handle -- We just want to create it for now.
66 get_page :: Maybe FilePath -> URL -> IO (Either String String)
67 get_page cookie_file url =
69 -- Create a curl instance.
72 -- Perform the request, and get back a CurlResponse object.
73 -- The cast is needed to specify how we would like our headers
74 -- and body returned (Strings).
75 resp <- do_curl_ curl url curl_opts :: IO CurlResponse
77 -- Pull out the response code as a CurlCode.
78 let code = respCurlCode resp
82 CurlOK -> Right (respBody resp)
83 error_code -> Left ("HTTP Error: " ++ (show error_code))
84 -- If an error occurred, we want to dump as much information as
85 -- possible. If this becomes a problem, we can use respGetInfo to
86 -- query the response object for more information
91 Just cookies -> [ CurlCookieFile cookies ]
93 curl_opts = default_curl_opts ++ get_opts
96 -- | Log in using curl. Store the resulting session cookies in the
98 log_in :: FilePath -> String -> String -> IO (Either String String)
99 log_in cookie_jar username password =
101 -- Create a curl instance.
104 -- Perform the request, and get back a CurlResponse object.
105 -- The cast is needed to specify how we would like our headers
106 -- and body returned (Strings).
107 resp <- do_curl_ curl login_url curl_opts :: IO CurlResponse
109 -- Pull out the response code as a CurlCode.
110 let code = respCurlCode resp
114 CurlOK -> Right (respBody resp)
115 error_code -> Left $ "HTTP Error: " ++ (show error_code)
116 -- If an error occurred, we want to dump as much information as
117 -- possible. If this becomes a problem, we can use respGetInfo to
118 -- query the response object for more information
120 post_submit :: String
121 post_submit = submit_field ++ "=Log+In"
123 post_username :: String
124 post_username = username_field ++ "=" ++ username
126 post_password :: String
127 post_password = password_field ++ "=" ++ password
129 post_data :: [String]
130 post_data = [post_username, post_password, post_submit]
132 post_opts :: [CurlOption]
134 [ CurlCookieSession True,
135 CurlCookieJar cookie_jar,
137 CurlPostFields post_data ]
139 curl_opts :: [CurlOption]
140 curl_opts = default_curl_opts ++ post_opts
143 -- | Save the image at 'url'. Saves to a temporary file, and
144 -- returns the path to that file if successful. Otherwise,
145 -- returns 'Nothing'.
147 -- We need to be able to parse the filename out of the URL
148 -- so that when we stick our image in the document, the reader
149 -- knows that type (jpg, png, etc.) it is.
150 save_image :: URL -> IO (Maybe FilePath)
152 it_exists <- doesFileExist url
154 -- It's local, just use it.
157 let fn = filename url
159 Nothing -> return Nothing
161 temp_dir <- getTemporaryDirectory
162 (out_path, out_handle) <- openBinaryTempFile temp_dir file
163 result <- openURI url
166 hPutStrLn stderr ("HTTP Error: " ++ err)
170 return $ Just out_path
175 -- | Map absolute image URLs to local system file paths where the
176 -- image referenced by the URL is stored.
177 type ImageMap = Map.Map URL FilePath
179 download_image_urls :: [URL] -> IO ImageMap
180 download_image_urls image_urls = do
181 files <- parallel $ map save_image image_urls
182 let pairs = zip image_urls files
183 return $ foldl my_insert empty_map pairs
185 empty_map = Map.empty :: ImageMap
187 my_insert :: ImageMap -> (URL, Maybe FilePath) -> ImageMap
188 my_insert dict (_, Nothing) = dict
189 my_insert dict (k, Just v) = Map.insert k v dict
195 get_login_cookie :: C.Cfg -> IO C.Cfg
197 | isNothing (C.username cfg) = return cfg
198 | isNothing (C.password cfg) = return cfg
200 let uname = fromJust $ C.username cfg
201 let pword = fromJust $ C.password cfg
202 cj <- make_cookie_jar
203 li_result <- log_in cj uname pword
207 let msg = "Failed to log in. " ++ err
209 Right response_body -> do
210 hPutStrLn stderr response_body
212 return $ cfg { C.cookie_jar = Just cj }
215 -- | Try to parse the given article using HXT. We try a few different
216 -- methods; if none of them work, we return 'Nothing'.
217 get_article_contents :: C.Cfg -> URL -> IO (Maybe String)
218 get_article_contents cfg article_name = do
219 my_article <- real_article_path article_name
220 is_file <- doesFileExist my_article
223 contents <- Utf8.readFile my_article
224 return $ Just $ contents
226 -- Download the URL and try to parse it.
227 html <- get_page (C.cookie_jar cfg) my_article
231 let msg = "Failed to retrieve page. " ++ err
234 Right h -> return $ Just h