1 {-# LANGUAGE DoAndIfThenElse #-}
6 import qualified Data.ByteString as B (hPut)
7 import qualified Data.Map as Map (Map, empty, insert)
8 import Data.Maybe (fromJust)
19 import Network.Curl.Download (openURI)
20 import System.Directory (doesFileExist, getTemporaryDirectory)
21 import System.IO (hClose, hPutStrLn, stderr)
22 import qualified System.IO.UTF8 as Utf8 (readFile)
23 import System.IO.Temp (openBinaryTempFile, openTempFile)
25 import qualified Configuration as C (Cfg, password, use_account, username)
26 import LWN.Article (real_article_path)
27 import LWN.URI (URL, filename)
30 login_url = "https://lwn.net/login"
32 username_field :: String
33 username_field = "Username"
35 password_field :: String
36 password_field = "Password"
38 submit_field :: String
39 submit_field = "submit"
42 default_curl_opts :: [CurlOption]
44 [ -- The Global cache is not thread-friendly.
45 CurlDNSUseGlobalCache False,
47 -- And we don't want to use a DNS cache anyway.
48 CurlDNSCacheTimeout 0,
51 CurlFollowLocation True,
53 -- Give it a little time...
57 make_cookie_jar :: IO FilePath
59 temp_dir <- getTemporaryDirectory
60 let file_name_template = "lwn-epub-cookies.txt"
61 (out_path, out_handle) <- openTempFile temp_dir file_name_template
62 hClose out_handle -- We just want to create it for now.
65 get_page :: Maybe FilePath -> URL -> IO (Either String String)
66 get_page cookie_file url =
68 -- Create a curl instance.
71 -- Perform the request, and get back a CurlResponse object.
72 -- The cast is needed to specify how we would like our headers
73 -- and body returned (Strings).
74 resp <- do_curl_ curl url curl_opts :: IO CurlResponse
76 -- Pull out the response code as a CurlCode.
77 let code = respCurlCode resp
81 CurlOK -> Right (respBody resp)
82 error_code -> Left ("HTTP Error: " ++ (show error_code))
83 -- If an error occurred, we want to dump as much information as
84 -- possible. If this becomes a problem, we can use respGetInfo to
85 -- query the response object for more information
90 Just cookies -> [ CurlCookieFile cookies ]
92 curl_opts = default_curl_opts ++ get_opts
95 -- | Log in using curl. Store the resulting session cookies in the
97 log_in :: FilePath -> String -> String -> IO (Either String String)
98 log_in cookie_jar username password =
100 -- Create a curl instance.
103 -- Perform the request, and get back a CurlResponse object.
104 -- The cast is needed to specify how we would like our headers
105 -- and body returned (Strings).
106 resp <- do_curl_ curl login_url curl_opts :: IO CurlResponse
108 -- Pull out the response code as a CurlCode.
109 let code = respCurlCode resp
113 CurlOK -> Right (respBody resp)
114 error_code -> Left $ "HTTP Error: " ++ (show error_code)
115 -- If an error occurred, we want to dump as much information as
116 -- possible. If this becomes a problem, we can use respGetInfo to
117 -- query the response object for more information
119 post_submit :: String
120 post_submit = submit_field ++ "=Log+In"
122 post_username :: String
123 post_username = username_field ++ "=" ++ username
125 post_password :: String
126 post_password = password_field ++ "=" ++ password
128 post_data :: [String]
129 post_data = [post_username, post_password, post_submit]
131 post_opts :: [CurlOption]
133 [ CurlCookieSession True,
134 CurlCookieJar cookie_jar,
136 CurlPostFields post_data ]
138 curl_opts :: [CurlOption]
139 curl_opts = default_curl_opts ++ post_opts
142 -- | Save the image at 'url'. Saves to a temporary file, and
143 -- returns the path to that file if successful. Otherwise,
144 -- returns 'Nothing'.
146 -- We need to be able to parse the filename out of the URL
147 -- so that when we stick our image in the document, the reader
148 -- knows that type (jpg, png, etc.) it is.
149 save_image :: URL -> IO (Maybe FilePath)
151 it_exists <- doesFileExist url
153 -- It's local, just use it.
156 let fn = filename url
158 Nothing -> return Nothing
160 temp_dir <- getTemporaryDirectory
161 (out_path, out_handle) <- openBinaryTempFile temp_dir file
162 result <- openURI url
165 hPutStrLn stderr ("HTTP Error: " ++ err)
169 return $ Just out_path
174 -- | Map absolute image URLs to local system file paths where the
175 -- image referenced by the URL is stored.
176 type ImageMap = Map.Map URL FilePath
178 download_image_urls :: [URL] -> IO ImageMap
179 download_image_urls image_urls = do
180 files <- mapM save_image image_urls
181 let pairs = zip image_urls files
182 return $ foldl my_insert empty_map pairs
184 empty_map = Map.empty :: ImageMap
186 my_insert :: ImageMap -> (URL, Maybe FilePath) -> ImageMap
187 my_insert dict (_, Nothing) = dict
188 my_insert dict (k, Just v) = Map.insert k v dict
191 -- | Try to parse the given article using HXT. We try a few different
192 -- methods; if none of them work, we return 'Nothing'.
193 get_article_contents :: C.Cfg -> URL -> IO (Maybe String)
194 get_article_contents cfg article_name = do
195 my_article <- real_article_path article_name
196 is_file <- doesFileExist my_article
199 contents <- Utf8.readFile my_article
200 return $ Just $ contents
202 -- Download the URL and try to parse it.
203 if C.use_account cfg then do
204 -- use_account would be false if these fromJusts would fail.
205 cj <- make_cookie_jar
206 li_result <- log_in cj
207 (fromJust $ C.username cfg)
208 (fromJust $ C.password cfg)
212 let msg = "Failed to log in. " ++ err
214 Right response_body -> do
215 hPutStrLn stderr response_body
217 html <- get_page (Just cj) my_article
221 let msg = "Failed to retrieve page. " ++ err
224 Right h -> return $ Just h
226 html <- get_page Nothing my_article
229 let msg = "Failed to retrieve page. " ++ err
232 Right h -> return $ Just h