1 {-# LANGUAGE DoAndIfThenElse #-}
6 import qualified Data.ByteString as B (hPut)
7 import qualified Data.Map as Map (Map, empty, insert)
8 import Data.Maybe (fromJust, isNothing)
19 import Network.Curl.Download (openURI)
20 import System.Directory (doesFileExist, getTemporaryDirectory)
21 import System.IO (hClose, hPutStrLn, stderr)
22 import qualified System.IO.UTF8 as Utf8 (readFile)
23 import System.IO.Temp (openBinaryTempFile, openTempFile)
25 import qualified Configuration as C (Cfg(..))
26 import LWN.Article (real_article_path)
27 import LWN.URI (URL, filename)
30 login_url = "https://lwn.net/login"
32 username_field :: String
33 username_field = "Username"
35 password_field :: String
36 password_field = "Password"
38 submit_field :: String
39 submit_field = "submit"
42 default_curl_opts :: [CurlOption]
44 [ -- The Global cache is not thread-friendly.
45 CurlDNSUseGlobalCache False,
47 -- And we don't want to use a DNS cache anyway.
48 CurlDNSCacheTimeout 0,
51 CurlFollowLocation True,
53 -- Give it a little time...
57 make_cookie_jar :: IO FilePath
59 temp_dir <- getTemporaryDirectory
60 let file_name_template = "lwn-epub-cookies.txt"
61 (out_path, out_handle) <- openTempFile temp_dir file_name_template
62 hClose out_handle -- We just want to create it for now.
65 get_page :: Maybe FilePath -> URL -> IO (Either String String)
66 get_page cookie_file url =
68 -- Create a curl instance.
71 -- Perform the request, and get back a CurlResponse object.
72 -- The cast is needed to specify how we would like our headers
73 -- and body returned (Strings).
74 resp <- do_curl_ curl url curl_opts :: IO CurlResponse
76 -- Pull out the response code as a CurlCode.
77 let code = respCurlCode resp
81 CurlOK -> Right (respBody resp)
82 error_code -> Left ("HTTP Error: " ++ (show error_code))
83 -- If an error occurred, we want to dump as much information as
84 -- possible. If this becomes a problem, we can use respGetInfo to
85 -- query the response object for more information
90 Just cookies -> [ CurlCookieFile cookies ]
92 curl_opts = default_curl_opts ++ get_opts
95 -- | Log in using curl. Store the resulting session cookies in the
97 log_in :: FilePath -> String -> String -> IO (Either String String)
98 log_in cookie_jar username password =
100 -- Create a curl instance.
103 -- Perform the request, and get back a CurlResponse object.
104 -- The cast is needed to specify how we would like our headers
105 -- and body returned (Strings).
106 resp <- do_curl_ curl login_url curl_opts :: IO CurlResponse
108 -- Pull out the response code as a CurlCode.
109 let code = respCurlCode resp
113 CurlOK -> Right (respBody resp)
114 error_code -> Left $ "HTTP Error: " ++ (show error_code)
115 -- If an error occurred, we want to dump as much information as
116 -- possible. If this becomes a problem, we can use respGetInfo to
117 -- query the response object for more information
119 post_submit :: String
120 post_submit = submit_field ++ "=Log+In"
122 post_username :: String
123 post_username = username_field ++ "=" ++ username
125 post_password :: String
126 post_password = password_field ++ "=" ++ password
128 post_data :: [String]
129 post_data = [post_username, post_password, post_submit]
131 post_opts :: [CurlOption]
133 [ CurlCookieSession True,
134 CurlCookieJar cookie_jar,
136 CurlPostFields post_data ]
138 curl_opts :: [CurlOption]
139 curl_opts = default_curl_opts ++ post_opts
142 -- | Save the image at 'url'. Saves to a temporary file, and
143 -- returns the path to that file if successful. Otherwise,
144 -- returns 'Nothing'.
146 -- We need to be able to parse the filename out of the URL
147 -- so that when we stick our image in the document, the reader
148 -- knows that type (jpg, png, etc.) it is.
149 save_image :: URL -> IO (Maybe FilePath)
151 it_exists <- doesFileExist url
153 -- It's local, just use it.
156 let fn = filename url
158 Nothing -> return Nothing
160 temp_dir <- getTemporaryDirectory
161 (out_path, out_handle) <- openBinaryTempFile temp_dir file
162 result <- openURI url
165 hPutStrLn stderr ("HTTP Error: " ++ err)
169 return $ Just out_path
174 -- | Map absolute image URLs to local system file paths where the
175 -- image referenced by the URL is stored.
176 type ImageMap = Map.Map URL FilePath
178 download_image_urls :: [URL] -> IO ImageMap
179 download_image_urls image_urls = do
180 files <- mapM save_image image_urls
181 let pairs = zip image_urls files
182 return $ foldl my_insert empty_map pairs
184 empty_map = Map.empty :: ImageMap
186 my_insert :: ImageMap -> (URL, Maybe FilePath) -> ImageMap
187 my_insert dict (_, Nothing) = dict
188 my_insert dict (k, Just v) = Map.insert k v dict
194 get_login_cookie :: C.Cfg -> IO C.Cfg
196 | isNothing (C.username cfg) = return cfg
197 | isNothing (C.password cfg) = return cfg
199 let uname = fromJust $ C.username cfg
200 let pword = fromJust $ C.password cfg
201 cj <- make_cookie_jar
202 li_result <- log_in cj uname pword
206 let msg = "Failed to log in. " ++ err
208 Right response_body -> do
209 hPutStrLn stderr response_body
211 return $ cfg { C.cookie_jar = Just cj }
214 -- | Try to parse the given article using HXT. We try a few different
215 -- methods; if none of them work, we return 'Nothing'.
216 get_article_contents :: C.Cfg -> URL -> IO (Maybe String)
217 get_article_contents cfg article_name = do
218 my_article <- real_article_path article_name
219 is_file <- doesFileExist my_article
222 contents <- Utf8.readFile my_article
223 return $ Just $ contents
225 -- Download the URL and try to parse it.
226 html <- get_page (C.cookie_jar cfg) my_article
230 let msg = "Failed to retrieve page. " ++ err
233 Right h -> return $ Just h