From: Michael Orlitzky Date: Tue, 26 Jun 2012 23:52:27 +0000 (-0400) Subject: Implement URL filename parsing (with tests) and the save_image function. X-Git-Tag: v0.0.1~42 X-Git-Url: http://gitweb.michael.orlitzky.com/?p=dead%2Flwn-epub.git;a=commitdiff_plain;h=79eb04e3c84bd9514659d0d52c2e862959647aa6 Implement URL filename parsing (with tests) and the save_image function. --- diff --git a/src/LWN/HTTP.hs b/src/LWN/HTTP.hs index 8b4c14d..c74248b 100644 --- a/src/LWN/HTTP.hs +++ b/src/LWN/HTTP.hs @@ -1,6 +1,7 @@ module LWN.HTTP where +import qualified Data.ByteString as B (hPut) import Network.Curl ( CurlCode(..), CurlOption(..), @@ -12,7 +13,12 @@ import Network.Curl ( respCurlCode, withCurlDo ) +import Network.Curl.Download (openURI) +import System.Directory (getTemporaryDirectory) import System.IO (hPutStrLn, stderr) +import System.IO.Temp (openBinaryTempFile) + +import LWN.URI (filename) login_url :: URLString login_url = "https://lwn.net/login" @@ -118,3 +124,28 @@ log_in cookie_jar username password = curl_opts :: [CurlOption] curl_opts = default_curl_opts ++ post_opts + + +-- | Save the image at 'url'. Saves to a temporary file, and +-- returns the path to that file if successful. Otherwise, +-- returns 'Nothing'. +-- +-- We need to be able to parse the filename out of the URL +-- so that when we stick our image in the document, the reader +-- knows that type (jpg, png, etc.) it is. +save_image :: URLString -> IO (Maybe FilePath) +save_image url = do + let fn = filename url + case fn of + Nothing -> return Nothing + Just file -> do + temp_dir <- getTemporaryDirectory + (out_path, out_handle) <- openBinaryTempFile temp_dir file + result <- openURI url + case result of + Left err -> do + hPutStrLn stderr ("HTTP Error: " ++ err) + return Nothing + Right bs -> do + B.hPut out_handle bs + return $ Just out_path diff --git a/src/LWN/URI.hs b/src/LWN/URI.hs index 61ea6ca..0d01e19 100644 --- a/src/LWN/URI.hs +++ b/src/LWN/URI.hs @@ -2,9 +2,11 @@ module LWN.URI where import Data.Maybe (fromJust) +import Data.String.Utils (split) import Network.URI ( URI, parseAbsoluteURI, + parseURIReference, uriAuthority, uriPath, uriPort, @@ -101,6 +103,23 @@ is_lwn_url s = parse_result = parseAbsoluteURI s + +filename :: URL -> Maybe String +filename url = + case parse_result of + Nothing -> Nothing + Just uri -> + let components = split "/" (uriPath uri) in + -- Reverse them so that the filename comes first for easier + -- pattern-matching. + let reverse_components = reverse components in + case reverse_components of + [] -> Nothing + (x:xs) -> Just x + where + parse_result = parseURIReference url + + -- | A List of LWN URLs to use during testing. lwn_urls :: [URL] lwn_urls = [ proto ++ www ++ "lwn.net" ++ path ++ bigpage ++ query | @@ -130,9 +149,39 @@ test_https_uris_matched = url = "https://lwn.net/Articles/500844/bigpage" uri = fromJust $ parseAbsoluteURI url + +test_bare_filename_parsed :: Assertion +test_bare_filename_parsed = + assertEqual "Filename is 'example.jpg'" "example.jpg" actual_result + where + url = "example.jpg" + actual_result = fromJust $ filename url + +test_absolute_filename_parsed :: Assertion +test_absolute_filename_parsed = + assertEqual "Filename is 'example.jpg'" "example.jpg" actual_result + where + url = "http://lwn.net/one/two/example.jpg" + actual_result = fromJust $ filename url + +test_relative_filename_parsed :: Assertion +test_relative_filename_parsed = + assertEqual "Filename is 'example.jpg'" "example.jpg" actual_result + where + url = "/one/two/example.jpg" + actual_result = fromJust $ filename url + + uri_tests :: Test uri_tests = testGroup "URI Tests" [ - testCase "HTTP URIs matched" test_http_uris_matched, - testCase "HTTPS URIs matched" test_https_uris_matched, - testCase "LWN URLs matched" test_lwn_urls_matched ] + + testGroup "URI Matching" [ + testCase "HTTP URIs matched" test_http_uris_matched, + testCase "HTTPS URIs matched" test_https_uris_matched, + testCase "LWN URLs matched" test_lwn_urls_matched ], + + testGroup "Filename Parsing" [ + testCase "Bare filename parsed" test_bare_filename_parsed, + testCase "Absolute filename parsed" test_absolute_filename_parsed, + testCase "Relative filename parsed" test_relative_filename_parsed ] ]