]> gitweb.michael.orlitzky.com - dead/lwn-epub.git/blob - src/LWN/URI.hs
3a21413dbf3feab89967ada01da9e13c9bb61531
[dead/lwn-epub.git] / src / LWN / URI.hs
1 module LWN.URI
2 where
3
4 import Data.List (isSuffixOf)
5 import Data.Maybe (fromJust)
6 import Data.String.Utils (split)
7 import Network.URI (
8 URI(..),
9 URIAuth(..),
10 parseAbsoluteURI,
11 parseURIReference,
12 relativeTo,
13 uriRegName
14 )
15 import Test.HUnit (Assertion, assertEqual)
16 import Test.Framework (Test, testGroup)
17 import Test.Framework.Providers.HUnit (testCase)
18 import Text.Regex.Posix ((=~))
19
20 -- Distinguish between URLs (Strings) and URIs as provided by the
21 -- Network.URI module.
22 type URL = String
23
24
25 -- | Is this URI's scheme plain HTTP?
26 http :: URI -> Bool
27 http uri = (uriScheme uri) == "http:"
28
29 -- | Is this URI's scheme (secure) HTTPS?
30 https :: URI -> Bool
31 https uri = (uriScheme uri) == "https:"
32
33 -- | Does this URI use an HTTP-compatible port?
34 http_port :: URI -> Bool
35 http_port uri =
36 case parse_result of
37 Nothing -> False
38 Just auth ->
39 (uriPort auth) `elem` ["", ":80"]
40 where
41 parse_result = uriAuthority uri
42
43
44 make_https :: URL -> URL
45 make_https url =
46 case parse_result of
47 Nothing -> url -- Shrug?
48 Just uri ->
49 if http uri then
50 show $ uri { uriScheme = "https:" }
51 else
52 url -- Leave non-http URLs alone.
53 where
54 parse_result = parseURIReference url
55
56
57
58 add_trailing_slash :: URL -> URL
59 add_trailing_slash url =
60 case parse_result of
61 Nothing -> url -- Shrug?
62 Just uri ->
63 let old_path = uriPath uri in
64 if isSuffixOf "/" old_path then
65 url -- It already had a trailing slash
66 else
67 show $ uri { uriPath = old_path ++ "/" }
68 where
69 parse_result = parseURIReference url
70
71
72 -- | Does this URI use an HTTPS-compatible port?
73 https_port :: URI -> Bool
74 https_port uri =
75 case parse_result of
76 Nothing -> False
77 Just auth ->
78 (uriPort auth) `elem` ["", ":443"]
79 where
80 parse_result = uriAuthority uri
81
82
83 -- | Does this URL have one of the LWN hostnames?
84 lwn_host :: URI -> Bool
85 lwn_host uri =
86 case parse_result of
87 Nothing -> False
88 Just auth ->
89 (uriRegName auth) `elem` ["lwn.net", "www.lwn.net"]
90 where
91 parse_result = uriAuthority uri
92
93
94 -- | Is the protocol portion of this URI one of the ones that LWN
95 -- uses?
96 lwn_proto :: URI -> Bool
97 lwn_proto u =
98 ((http u) && (http_port u)) || ((https u) && (https_port u))
99
100 -- | Is the server/port to which we're connecting an LWN server?
101 lwn_server :: URI -> Bool
102 lwn_server u =
103 lwn_proto u && lwn_host u
104
105 -- | Is this URI's path for an LWN article?
106 lwn_article_path :: URI -> Bool
107 lwn_article_path uri =
108 path =~ "^/current(/bigpage)?/?$" ||
109 path =~ "^/Articles/[0-9]+(/bigpage)?/?$"
110 where
111 path = uriPath uri
112
113 -- | Is this URI's query one that the LWN uses? The only query string
114 -- that the LWN articles use is the printable page one.
115 lwn_query :: URI -> Bool
116 lwn_query uri =
117 query `elem` ["", "?format=printable"]
118 where
119 query = uriQuery uri
120
121 -- | Combine all of the other URI tests to determine if this 'URL'
122 -- belongs to an LWN article.
123 is_lwn_url :: URL -> Bool
124 is_lwn_url s =
125 case parse_result of
126 Nothing -> False
127 Just uri -> (lwn_server uri) && (lwn_article_path uri) && (lwn_query uri)
128 where
129 parse_result = parseAbsoluteURI s
130
131
132
133 filename :: URL -> Maybe String
134 filename url =
135 case parse_result of
136 Nothing -> Nothing
137 Just uri ->
138 let components = split "/" (uriPath uri) in
139 -- Reverse them so that the filename comes first for easier
140 -- pattern-matching.
141 let reverse_components = reverse components in
142 case reverse_components of
143 [] -> Nothing
144 (x:_) -> Just x
145 where
146 parse_result = parseURIReference url
147
148
149
150 make_absolute_uri :: URI -> Maybe URI
151 make_absolute_uri relative_uri =
152 relativeTo relative_uri base_uri
153 where
154 base_auth = URIAuth { uriUserInfo = "",
155 uriRegName = "lwn.net",
156 uriPort = "" }
157 base_uri = URI { uriScheme = "https:",
158 uriAuthority = Just base_auth,
159 uriPath = "/",
160 uriQuery = "",
161 uriFragment = "" }
162
163
164 make_absolute_url :: URL -> Maybe URL
165 make_absolute_url relative_url =
166 case parse_result of
167 Nothing -> Nothing
168 Just relative_uri ->
169 let abs_result = make_absolute_uri relative_uri in
170 case abs_result of
171 Nothing -> Nothing
172 Just abs_uri -> Just $ show abs_uri
173 where
174 parse_result = parseURIReference relative_url
175
176 -- | Like 'make_absolute_url', except returns its input instead of
177 -- 'Nothing' if the absolution fails.
178 try_make_absolute_url :: URL -> URL
179 try_make_absolute_url url =
180 case make_absolute_url url of
181 Nothing -> url
182 Just abs_url -> abs_url
183
184 -- | A List of LWN URLs to use during testing.
185 lwn_urls :: [URL]
186 lwn_urls = [ proto ++ www ++ "lwn.net" ++ path ++ bigpage ++ query |
187 proto <- ["http://", "https://"],
188 www <- ["", "www."],
189 bigpage <- ["", "/bigpage"],
190 query <- ["", "?format=printable"],
191 path <- [ "/current",
192 "/Articles/500844",
193 "/Articles/502371" ] ]
194
195 test_lwn_urls_matched :: Assertion
196 test_lwn_urls_matched =
197 assertEqual "All LWN URLs matched" True (all is_lwn_url lwn_urls)
198
199 test_http_uris_matched :: Assertion
200 test_http_uris_matched =
201 assertEqual (url ++ " is HTTP") True (http uri)
202 where
203 url = "http://lwn.net/Articles/500844/bigpage"
204 uri = fromJust $ parseAbsoluteURI url
205
206 test_https_uris_matched :: Assertion
207 test_https_uris_matched =
208 assertEqual (url ++ " is HTTPS") True (https uri)
209 where
210 url = "https://lwn.net/Articles/500844/bigpage"
211 uri = fromJust $ parseAbsoluteURI url
212
213
214 test_bare_filename_parsed :: Assertion
215 test_bare_filename_parsed =
216 assertEqual "Filename is 'example.jpg'" "example.jpg" actual_result
217 where
218 url = "example.jpg"
219 actual_result = fromJust $ filename url
220
221 test_absolute_filename_parsed :: Assertion
222 test_absolute_filename_parsed =
223 assertEqual "Filename is 'example.jpg'" "example.jpg" actual_result
224 where
225 url = "http://lwn.net/one/two/example.jpg"
226 actual_result = fromJust $ filename url
227
228 test_relative_filename_parsed :: Assertion
229 test_relative_filename_parsed =
230 assertEqual "Filename is 'example.jpg'" "example.jpg" actual_result
231 where
232 url = "/one/two/example.jpg"
233 actual_result = fromJust $ filename url
234
235
236 test_empty_url_conversion :: Assertion
237 test_empty_url_conversion =
238 assertEqual "'' converted to lwn.net" expected actual
239 where
240 expected = "https://lwn.net/"
241 actual = fromJust $ make_absolute_url ""
242
243
244 test_normal_url_conversion :: Assertion
245 test_normal_url_conversion =
246 assertEqual "Image URL is made absolute" expected actual
247 where
248 url = "/images/2012/lcj-coughlan-lattimer-sm.jpg"
249 expected = "https://lwn.net/images/2012/lcj-coughlan-lattimer-sm.jpg"
250 actual = fromJust $ make_absolute_url url
251
252
253
254 test_make_https :: Assertion
255 test_make_https =
256 assertEqual "HTTP URL is made HTTPS" expected actual
257 where
258 url = "http://lwn.net/current"
259 expected = "https://lwn.net/current"
260 actual = make_https url
261
262
263 test_add_trailing_slash :: Assertion
264 test_add_trailing_slash =
265 assertEqual "Trailing slashes get added" expected actual
266 where
267 url = "https://lwn.net/current"
268 expected = "https://lwn.net/current/"
269 actual = add_trailing_slash url
270
271
272 uri_tests :: Test
273 uri_tests =
274 testGroup "URI Tests" [
275
276 testGroup "URI Matching" [
277 testCase "HTTP URIs matched" test_http_uris_matched,
278 testCase "HTTPS URIs matched" test_https_uris_matched,
279 testCase "LWN URLs matched" test_lwn_urls_matched ],
280
281 testGroup "Filename Parsing" [
282 testCase "Bare filename parsed" test_bare_filename_parsed,
283 testCase "Absolute filename parsed" test_absolute_filename_parsed,
284 testCase "Relative filename parsed" test_relative_filename_parsed ],
285
286 testGroup "Relative -> Absolute Conversion" [
287 testCase "Empty URL converted to lwn.net" test_empty_url_conversion,
288 testCase "Normal URL made absolute" test_normal_url_conversion ],
289
290 testGroup "URL Mangling" [
291 testCase "HTTP URLs are made HTTPS" test_make_https,
292 testCase "Trailing slashes get added" test_add_trailing_slash ]
293 ]