4 import Data.List (isSuffixOf)
5 import Data.Maybe (fromJust)
6 import Data.String.Utils (split)
15 import Test.HUnit (Assertion, assertEqual)
16 import Test.Framework (Test, testGroup)
17 import Test.Framework.Providers.HUnit (testCase)
18 import Text.Regex.Posix ((=~))
20 -- Distinguish between URLs (Strings) and URIs as provided by the
21 -- Network.URI module.
25 -- | Is this URI's scheme plain HTTP?
27 http uri = (uriScheme uri) == "http:"
29 -- | Is this URI's scheme (secure) HTTPS?
31 https uri = (uriScheme uri) == "https:"
33 -- | Does this URI use an HTTP-compatible port?
34 http_port :: URI -> Bool
39 (uriPort auth) `elem` ["", ":80"]
41 parse_result = uriAuthority uri
44 make_https :: URL -> URL
47 Nothing -> url -- Shrug?
50 show $ uri { uriScheme = "https:" }
52 url -- Leave non-http URLs alone.
54 parse_result = parseURIReference url
57 add_trailing_slash :: URL -> URL
58 add_trailing_slash url =
60 Nothing -> url -- Shrug?
62 let old_path = uriPath uri in
63 if (isSuffixOf "/" old_path) || (isSuffixOf "bigpage" old_path) then
64 -- It already had a trailing slash, or it's a 'bigpage' URL.
65 -- Trailing slashes after 'bigpage' don't work.
68 show $ uri { uriPath = old_path ++ "/" }
70 parse_result = parseURIReference url
73 -- | Does this URI use an HTTPS-compatible port?
74 https_port :: URI -> Bool
79 (uriPort auth) `elem` ["", ":443"]
81 parse_result = uriAuthority uri
84 -- | Does this URL have one of the LWN hostnames?
85 lwn_host :: URI -> Bool
90 (uriRegName auth) `elem` ["lwn.net", "www.lwn.net"]
92 parse_result = uriAuthority uri
95 -- | Is the protocol portion of this URI one of the ones that LWN
97 lwn_proto :: URI -> Bool
99 ((http u) && (http_port u)) || ((https u) && (https_port u))
101 -- | Is the server/port to which we're connecting an LWN server?
102 lwn_server :: URI -> Bool
104 lwn_proto u && lwn_host u
106 -- | Is this URI's path for an LWN article?
107 lwn_article_path :: URI -> Bool
108 lwn_article_path uri =
109 path =~ "^/current(/bigpage)?/?$" ||
110 path =~ "^/Articles/[0-9]+(/bigpage)?/?$"
114 -- | Is this URI's query one that the LWN uses? The only query string
115 -- that the LWN articles use is the printable page one.
116 lwn_query :: URI -> Bool
118 query `elem` ["", "?format=printable"]
122 -- | Combine all of the other URI tests to determine if this 'URL'
123 -- belongs to an LWN article.
124 is_lwn_url :: URL -> Bool
128 Just uri -> (lwn_server uri) && (lwn_article_path uri) && (lwn_query uri)
130 parse_result = parseAbsoluteURI s
134 filename :: URL -> Maybe String
139 let components = split "/" (uriPath uri) in
140 -- Reverse them so that the filename comes first for easier
142 let reverse_components = reverse components in
143 case reverse_components of
147 parse_result = parseURIReference url
151 make_absolute_uri :: URI -> Maybe URI
152 make_absolute_uri relative_uri =
153 relativeTo relative_uri base_uri
155 base_auth = URIAuth { uriUserInfo = "",
156 uriRegName = "lwn.net",
158 base_uri = URI { uriScheme = "https:",
159 uriAuthority = Just base_auth,
165 make_absolute_url :: URL -> Maybe URL
166 make_absolute_url relative_url =
170 let abs_result = make_absolute_uri relative_uri in
173 Just abs_uri -> Just $ show abs_uri
175 parse_result = parseURIReference relative_url
177 -- | Like 'make_absolute_url', except returns its input instead of
178 -- 'Nothing' if the absolution fails.
179 try_make_absolute_url :: URL -> URL
180 try_make_absolute_url url =
181 case make_absolute_url url of
183 Just abs_url -> abs_url
185 -- | A List of LWN URLs to use during testing.
187 lwn_urls = [ proto ++ www ++ "lwn.net" ++ path ++ bigpage ++ query |
188 proto <- ["http://", "https://"],
190 bigpage <- ["", "/bigpage"],
191 query <- ["", "?format=printable"],
192 path <- [ "/current",
194 "/Articles/502371" ] ]
196 test_lwn_urls_matched :: Assertion
197 test_lwn_urls_matched =
198 assertEqual "All LWN URLs matched" True (all is_lwn_url lwn_urls)
200 test_http_uris_matched :: Assertion
201 test_http_uris_matched =
202 assertEqual (url ++ " is HTTP") True (http uri)
204 url = "http://lwn.net/Articles/500844/bigpage"
205 uri = fromJust $ parseAbsoluteURI url
207 test_https_uris_matched :: Assertion
208 test_https_uris_matched =
209 assertEqual (url ++ " is HTTPS") True (https uri)
211 url = "https://lwn.net/Articles/500844/bigpage"
212 uri = fromJust $ parseAbsoluteURI url
215 test_bare_filename_parsed :: Assertion
216 test_bare_filename_parsed =
217 assertEqual "Filename is 'example.jpg'" "example.jpg" actual_result
220 actual_result = fromJust $ filename url
222 test_absolute_filename_parsed :: Assertion
223 test_absolute_filename_parsed =
224 assertEqual "Filename is 'example.jpg'" "example.jpg" actual_result
226 url = "http://lwn.net/one/two/example.jpg"
227 actual_result = fromJust $ filename url
229 test_relative_filename_parsed :: Assertion
230 test_relative_filename_parsed =
231 assertEqual "Filename is 'example.jpg'" "example.jpg" actual_result
233 url = "/one/two/example.jpg"
234 actual_result = fromJust $ filename url
237 test_empty_url_conversion :: Assertion
238 test_empty_url_conversion =
239 assertEqual "'' converted to lwn.net" expected actual
241 expected = "https://lwn.net/"
242 actual = fromJust $ make_absolute_url ""
245 test_normal_url_conversion :: Assertion
246 test_normal_url_conversion =
247 assertEqual "Image URL is made absolute" expected actual
249 url = "/images/2012/lcj-coughlan-lattimer-sm.jpg"
250 expected = "https://lwn.net/images/2012/lcj-coughlan-lattimer-sm.jpg"
251 actual = fromJust $ make_absolute_url url
255 test_make_https :: Assertion
257 assertEqual "HTTP URL is made HTTPS" expected actual
259 url = "http://lwn.net/current"
260 expected = "https://lwn.net/current"
261 actual = make_https url
264 test_add_trailing_slash :: Assertion
265 test_add_trailing_slash =
266 assertEqual "Trailing slashes get added" expected actual
268 url = "https://lwn.net/current"
269 expected = "https://lwn.net/current/"
270 actual = add_trailing_slash url
275 testGroup "URI Tests" [
277 testGroup "URI Matching" [
278 testCase "HTTP URIs matched" test_http_uris_matched,
279 testCase "HTTPS URIs matched" test_https_uris_matched,
280 testCase "LWN URLs matched" test_lwn_urls_matched ],
282 testGroup "Filename Parsing" [
283 testCase "Bare filename parsed" test_bare_filename_parsed,
284 testCase "Absolute filename parsed" test_absolute_filename_parsed,
285 testCase "Relative filename parsed" test_relative_filename_parsed ],
287 testGroup "Relative -> Absolute Conversion" [
288 testCase "Empty URL converted to lwn.net" test_empty_url_conversion,
289 testCase "Normal URL made absolute" test_normal_url_conversion ],
291 testGroup "URL Mangling" [
292 testCase "HTTP URLs are made HTTPS" test_make_https,
293 testCase "Trailing slashes get added" test_add_trailing_slash ]