module LWN.URI where import Data.Maybe (fromJust) import Network.URI ( URI, parseAbsoluteURI, uriAuthority, uriPath, uriPort, uriQuery, uriRegName, uriScheme ) import Test.HUnit (Assertion, assertEqual) import Test.Framework (Test, testGroup) import Test.Framework.Providers.HUnit (testCase) import Text.Regex.Posix -- Distinguish between URLs (Strings) and URIs as provided by the -- Network.URI module. type URL = String -- | Is this URI's scheme plain HTTP? http :: URI -> Bool http uri = (uriScheme uri) == "http:" -- | Is this URI's scheme (secure) HTTPS? https :: URI -> Bool https uri = (uriScheme uri) == "https:" -- | Does this URI use an HTTP-compatible port? http_port :: URI -> Bool http_port uri = case parse_result of Nothing -> False Just auth -> (uriPort auth) `elem` ["", ":80"] where parse_result = uriAuthority uri -- | Does this URI use an HTTPS-compatible port? https_port :: URI -> Bool https_port uri = case parse_result of Nothing -> False Just auth -> (uriPort auth) `elem` ["", ":443"] where parse_result = uriAuthority uri -- | Does this URL have one of the LWN hostnames? lwn_host :: URI -> Bool lwn_host uri = case parse_result of Nothing -> False Just auth -> (uriRegName auth) `elem` ["lwn.net", "www.lwn.net"] where parse_result = uriAuthority uri -- | Is the protocol portion of this URI one of the ones that LWN -- uses? lwn_proto :: URI -> Bool lwn_proto u = ((http u) && (http_port u)) || ((https u) && (https_port u)) -- | Is the server/port to which we're connecting an LWN server? lwn_server :: URI -> Bool lwn_server u = lwn_proto u && lwn_host u -- | Is this URI's path for an LWN article? lwn_article_path :: URI -> Bool lwn_article_path uri = path =~ "^/current(/bigpage)?/?$" || path =~ "^/Articles/[0-9]+(/bigpage)?/?$" where path = uriPath uri -- | Is this URI's query one that the LWN uses? The only query string -- that the LWN articles use is the printable page one. lwn_query :: URI -> Bool lwn_query uri = query `elem` ["", "?format=printable"] where query = uriQuery uri -- | Combine all of the other URI tests to determine if this 'URL' -- belongs to an LWN article. is_lwn_url :: URL -> Bool is_lwn_url s = case parse_result of Nothing -> False Just uri -> (lwn_server uri) && (lwn_article_path uri) && (lwn_query uri) where parse_result = parseAbsoluteURI s -- | A List of LWN URLs to use during testing. lwn_urls :: [URL] lwn_urls = [ proto ++ www ++ "lwn.net" ++ path ++ bigpage ++ query | proto <- ["http://", "https://"], www <- ["", "www."], bigpage <- ["", "/bigpage"], query <- ["", "?format=printable"], path <- [ "/current", "/Articles/500844", "/Articles/502371" ] ] test_lwn_urls_matched :: Assertion test_lwn_urls_matched = assertEqual "All LWN URLs matched" True (all is_lwn_url lwn_urls) test_http_uris_matched :: Assertion test_http_uris_matched = assertEqual (url ++ " is HTTP") True (http uri) where url = "http://lwn.net/Articles/500844/bigpage" uri = fromJust $ parseAbsoluteURI url test_https_uris_matched :: Assertion test_https_uris_matched = assertEqual (url ++ " is HTTPS") True (https uri) where url = "https://lwn.net/Articles/500844/bigpage" uri = fromJust $ parseAbsoluteURI url uri_tests :: Test uri_tests = testGroup "URI Tests" [ testCase "HTTP URIs matched" test_http_uris_matched, testCase "HTTPS URIs matched" test_https_uris_matched, testCase "LWN URLs matched" test_lwn_urls_matched ]