module LWN.URI where import Data.List (isSuffixOf) import Data.Maybe (fromJust) import Data.String.Utils (split) import Network.URI ( URI(..), URIAuth(..), parseAbsoluteURI, parseURIReference, relativeTo, uriRegName ) import Test.HUnit (Assertion, assertEqual) import Test.Framework (Test, testGroup) import Test.Framework.Providers.HUnit (testCase) import Text.Regex.Posix ((=~)) -- Distinguish between URLs (Strings) and URIs as provided by the -- Network.URI module. type URL = String -- | Is this URI's scheme plain HTTP? http :: URI -> Bool http uri = (uriScheme uri) == "http:" -- | Is this URI's scheme (secure) HTTPS? https :: URI -> Bool https uri = (uriScheme uri) == "https:" -- | Does this URI use an HTTP-compatible port? http_port :: URI -> Bool http_port uri = case parse_result of Nothing -> False Just auth -> (uriPort auth) `elem` ["", ":80"] where parse_result = uriAuthority uri make_https :: URL -> URL make_https url = case parse_result of Nothing -> url -- Shrug? Just uri -> if http uri then show $ uri { uriScheme = "https:" } else url -- Leave non-http URLs alone. where parse_result = parseURIReference url add_trailing_slash :: URL -> URL add_trailing_slash url = case parse_result of Nothing -> url -- Shrug? Just uri -> let old_path = uriPath uri in if (isSuffixOf "/" old_path) || (isSuffixOf "bigpage" old_path) then -- It already had a trailing slash, or it's a 'bigpage' URL. -- Trailing slashes after 'bigpage' don't work. url else show $ uri { uriPath = old_path ++ "/" } where parse_result = parseURIReference url -- | Does this URI use an HTTPS-compatible port? https_port :: URI -> Bool https_port uri = case parse_result of Nothing -> False Just auth -> (uriPort auth) `elem` ["", ":443"] where parse_result = uriAuthority uri -- | Does this URL have one of the LWN hostnames? lwn_host :: URI -> Bool lwn_host uri = case parse_result of Nothing -> False Just auth -> (uriRegName auth) `elem` ["lwn.net", "www.lwn.net"] where parse_result = uriAuthority uri -- | Is the protocol portion of this URI one of the ones that LWN -- uses? lwn_proto :: URI -> Bool lwn_proto u = ((http u) && (http_port u)) || ((https u) && (https_port u)) -- | Is the server/port to which we're connecting an LWN server? lwn_server :: URI -> Bool lwn_server u = lwn_proto u && lwn_host u -- | Is this URI's path for an LWN article? lwn_article_path :: URI -> Bool lwn_article_path uri = path =~ "^/current(/bigpage)?/?$" || path =~ "^/Articles/[0-9]+(/bigpage)?/?$" where path = uriPath uri -- | Is this URI's query one that the LWN uses? The only query string -- that the LWN articles use is the printable page one. lwn_query :: URI -> Bool lwn_query uri = query `elem` ["", "?format=printable"] where query = uriQuery uri -- | Combine all of the other URI tests to determine if this 'URL' -- belongs to an LWN article. is_lwn_url :: URL -> Bool is_lwn_url s = case parse_result of Nothing -> False Just uri -> (lwn_server uri) && (lwn_article_path uri) && (lwn_query uri) where parse_result = parseAbsoluteURI s filename :: URL -> Maybe String filename url = case parse_result of Nothing -> Nothing Just uri -> let components = split "/" (uriPath uri) in -- Reverse them so that the filename comes first for easier -- pattern-matching. let reverse_components = reverse components in case reverse_components of [] -> Nothing (x:_) -> Just x where parse_result = parseURIReference url make_absolute_uri :: URI -> Maybe URI make_absolute_uri relative_uri = relativeTo relative_uri base_uri where base_auth = URIAuth { uriUserInfo = "", uriRegName = "lwn.net", uriPort = "" } base_uri = URI { uriScheme = "https:", uriAuthority = Just base_auth, uriPath = "/", uriQuery = "", uriFragment = "" } make_absolute_url :: URL -> Maybe URL make_absolute_url relative_url = case parse_result of Nothing -> Nothing Just relative_uri -> let abs_result = make_absolute_uri relative_uri in case abs_result of Nothing -> Nothing Just abs_uri -> Just $ show abs_uri where parse_result = parseURIReference relative_url -- | Like 'make_absolute_url', except returns its input instead of -- 'Nothing' if the absolution fails. try_make_absolute_url :: URL -> URL try_make_absolute_url url = case make_absolute_url url of Nothing -> url Just abs_url -> abs_url -- | A List of LWN URLs to use during testing. lwn_urls :: [URL] lwn_urls = [ proto ++ www ++ "lwn.net" ++ path ++ bigpage ++ query | proto <- ["http://", "https://"], www <- ["", "www."], bigpage <- ["", "/bigpage"], query <- ["", "?format=printable"], path <- [ "/current", "/Articles/500844", "/Articles/502371" ] ] test_lwn_urls_matched :: Assertion test_lwn_urls_matched = assertEqual "All LWN URLs matched" True (all is_lwn_url lwn_urls) test_http_uris_matched :: Assertion test_http_uris_matched = assertEqual (url ++ " is HTTP") True (http uri) where url = "http://lwn.net/Articles/500844/bigpage" uri = fromJust $ parseAbsoluteURI url test_https_uris_matched :: Assertion test_https_uris_matched = assertEqual (url ++ " is HTTPS") True (https uri) where url = "https://lwn.net/Articles/500844/bigpage" uri = fromJust $ parseAbsoluteURI url test_bare_filename_parsed :: Assertion test_bare_filename_parsed = assertEqual "Filename is 'example.jpg'" "example.jpg" actual_result where url = "example.jpg" actual_result = fromJust $ filename url test_absolute_filename_parsed :: Assertion test_absolute_filename_parsed = assertEqual "Filename is 'example.jpg'" "example.jpg" actual_result where url = "http://lwn.net/one/two/example.jpg" actual_result = fromJust $ filename url test_relative_filename_parsed :: Assertion test_relative_filename_parsed = assertEqual "Filename is 'example.jpg'" "example.jpg" actual_result where url = "/one/two/example.jpg" actual_result = fromJust $ filename url test_empty_url_conversion :: Assertion test_empty_url_conversion = assertEqual "'' converted to lwn.net" expected actual where expected = "https://lwn.net/" actual = fromJust $ make_absolute_url "" test_normal_url_conversion :: Assertion test_normal_url_conversion = assertEqual "Image URL is made absolute" expected actual where url = "/images/2012/lcj-coughlan-lattimer-sm.jpg" expected = "https://lwn.net/images/2012/lcj-coughlan-lattimer-sm.jpg" actual = fromJust $ make_absolute_url url test_make_https :: Assertion test_make_https = assertEqual "HTTP URL is made HTTPS" expected actual where url = "http://lwn.net/current" expected = "https://lwn.net/current" actual = make_https url test_add_trailing_slash :: Assertion test_add_trailing_slash = assertEqual "Trailing slashes get added" expected actual where url = "https://lwn.net/current" expected = "https://lwn.net/current/" actual = add_trailing_slash url uri_tests :: Test uri_tests = testGroup "URI Tests" [ testGroup "URI Matching" [ testCase "HTTP URIs matched" test_http_uris_matched, testCase "HTTPS URIs matched" test_https_uris_matched, testCase "LWN URLs matched" test_lwn_urls_matched ], testGroup "Filename Parsing" [ testCase "Bare filename parsed" test_bare_filename_parsed, testCase "Absolute filename parsed" test_absolute_filename_parsed, testCase "Relative filename parsed" test_relative_filename_parsed ], testGroup "Relative -> Absolute Conversion" [ testCase "Empty URL converted to lwn.net" test_empty_url_conversion, testCase "Normal URL made absolute" test_normal_url_conversion ], testGroup "URL Mangling" [ testCase "HTTP URLs are made HTTPS" test_make_https, testCase "Trailing slashes get added" test_add_trailing_slash ] ]