]> gitweb.michael.orlitzky.com - dead/lwn-epub.git/blob - src/LWN/URI.hs
Bump a few dependencies, fix compilation failures.
[dead/lwn-epub.git] / src / LWN / URI.hs
1 module LWN.URI
2 where
3
4 import Data.List (isSuffixOf)
5 import Data.Maybe (fromJust)
6 import Data.String.Utils (split)
7 import Network.URI (
8 URI(..),
9 URIAuth(..),
10 parseAbsoluteURI,
11 parseURIReference,
12 relativeTo,
13 uriRegName
14 )
15 import Test.HUnit (Assertion, assertEqual)
16 import Test.Framework (Test, testGroup)
17 import Test.Framework.Providers.HUnit (testCase)
18 import Text.Regex.Posix ((=~))
19
20 -- Distinguish between URLs (Strings) and URIs as provided by the
21 -- Network.URI module.
22 type URL = String
23
24
25 -- | Is this URI's scheme plain HTTP?
26 http :: URI -> Bool
27 http uri = (uriScheme uri) == "http:"
28
29 -- | Is this URI's scheme (secure) HTTPS?
30 https :: URI -> Bool
31 https uri = (uriScheme uri) == "https:"
32
33 -- | Does this URI use an HTTP-compatible port?
34 http_port :: URI -> Bool
35 http_port uri =
36 case parse_result of
37 Nothing -> False
38 Just auth ->
39 (uriPort auth) `elem` ["", ":80"]
40 where
41 parse_result = uriAuthority uri
42
43
44 make_https :: URL -> URL
45 make_https url =
46 case parse_result of
47 Nothing -> url -- Shrug?
48 Just uri ->
49 if http uri then
50 show $ uri { uriScheme = "https:" }
51 else
52 url -- Leave non-http URLs alone.
53 where
54 parse_result = parseURIReference url
55
56
57 add_trailing_slash :: URL -> URL
58 add_trailing_slash url =
59 case parse_result of
60 Nothing -> url -- Shrug?
61 Just uri ->
62 let old_path = uriPath uri in
63 if (isSuffixOf "/" old_path) || (isSuffixOf "bigpage" old_path) then
64 -- It already had a trailing slash, or it's a 'bigpage' URL.
65 -- Trailing slashes after 'bigpage' don't work.
66 url
67 else
68 show $ uri { uriPath = old_path ++ "/" }
69 where
70 parse_result = parseURIReference url
71
72
73 -- | Does this URI use an HTTPS-compatible port?
74 https_port :: URI -> Bool
75 https_port uri =
76 case parse_result of
77 Nothing -> False
78 Just auth ->
79 (uriPort auth) `elem` ["", ":443"]
80 where
81 parse_result = uriAuthority uri
82
83
84 -- | Does this URL have one of the LWN hostnames?
85 lwn_host :: URI -> Bool
86 lwn_host uri =
87 case parse_result of
88 Nothing -> False
89 Just auth ->
90 (uriRegName auth) `elem` ["lwn.net", "www.lwn.net"]
91 where
92 parse_result = uriAuthority uri
93
94
95 -- | Is the protocol portion of this URI one of the ones that LWN
96 -- uses?
97 lwn_proto :: URI -> Bool
98 lwn_proto u =
99 ((http u) && (http_port u)) || ((https u) && (https_port u))
100
101 -- | Is the server/port to which we're connecting an LWN server?
102 lwn_server :: URI -> Bool
103 lwn_server u =
104 lwn_proto u && lwn_host u
105
106 -- | Is this URI's path for an LWN article?
107 lwn_article_path :: URI -> Bool
108 lwn_article_path uri =
109 path =~ "^/current(/bigpage)?/?$" ||
110 path =~ "^/Articles/[0-9]+(/bigpage)?/?$"
111 where
112 path = uriPath uri
113
114 -- | Is this URI's query one that the LWN uses? The only query string
115 -- that the LWN articles use is the printable page one.
116 lwn_query :: URI -> Bool
117 lwn_query uri =
118 query `elem` ["", "?format=printable"]
119 where
120 query = uriQuery uri
121
122 -- | Combine all of the other URI tests to determine if this 'URL'
123 -- belongs to an LWN article.
124 is_lwn_url :: URL -> Bool
125 is_lwn_url s =
126 case parse_result of
127 Nothing -> False
128 Just uri -> (lwn_server uri) && (lwn_article_path uri) && (lwn_query uri)
129 where
130 parse_result = parseAbsoluteURI s
131
132
133
134 -- Bug here, doesn't work on unicode paths!
135 filename :: URL -> Maybe String
136 filename url =
137 case parse_result of
138 Nothing -> Nothing
139 Just uri ->
140 let components = split "/" (uriPath uri) in
141 -- Reverse them so that the filename comes first for easier
142 -- pattern-matching.
143 let reverse_components = reverse components in
144 case reverse_components of
145 [] -> Nothing
146 (x:_) -> Just x
147 where
148 parse_result = parseURIReference url
149
150
151
152 make_absolute_uri :: URI -> URI
153 make_absolute_uri relative_uri =
154 relativeTo relative_uri base_uri
155 where
156 base_auth = URIAuth { uriUserInfo = "",
157 uriRegName = "lwn.net",
158 uriPort = "" }
159 base_uri = URI { uriScheme = "https:",
160 uriAuthority = Just base_auth,
161 uriPath = "/",
162 uriQuery = "",
163 uriFragment = "" }
164
165
166 make_absolute_url :: URL -> Maybe URL
167 make_absolute_url relative_url =
168 case parse_result of
169 Nothing -> Nothing
170 Just relative_uri ->
171 Just $ show $ make_absolute_uri relative_uri
172 where
173 parse_result = parseURIReference relative_url
174
175 -- | Like 'make_absolute_url', except returns its input instead of
176 -- 'Nothing' if the absolution fails.
177 try_make_absolute_url :: URL -> URL
178 try_make_absolute_url url =
179 case make_absolute_url url of
180 Nothing -> url
181 Just abs_url -> abs_url
182
183 -- | A List of LWN URLs to use during testing.
184 lwn_urls :: [URL]
185 lwn_urls = [ proto ++ www ++ "lwn.net" ++ path ++ bigpage ++ query |
186 proto <- ["http://", "https://"],
187 www <- ["", "www."],
188 bigpage <- ["", "/bigpage"],
189 query <- ["", "?format=printable"],
190 path <- [ "/current",
191 "/Articles/500844",
192 "/Articles/502371" ] ]
193
194 test_lwn_urls_matched :: Assertion
195 test_lwn_urls_matched =
196 assertEqual "All LWN URLs matched" True (all is_lwn_url lwn_urls)
197
198 test_http_uris_matched :: Assertion
199 test_http_uris_matched =
200 assertEqual (url ++ " is HTTP") True (http uri)
201 where
202 url = "http://lwn.net/Articles/500844/bigpage"
203 uri = fromJust $ parseAbsoluteURI url
204
205 test_https_uris_matched :: Assertion
206 test_https_uris_matched =
207 assertEqual (url ++ " is HTTPS") True (https uri)
208 where
209 url = "https://lwn.net/Articles/500844/bigpage"
210 uri = fromJust $ parseAbsoluteURI url
211
212
213 test_bare_filename_parsed :: Assertion
214 test_bare_filename_parsed =
215 assertEqual "Filename is 'example.jpg'" "example.jpg" actual_result
216 where
217 url = "example.jpg"
218 actual_result = fromJust $ filename url
219
220 test_absolute_filename_parsed :: Assertion
221 test_absolute_filename_parsed =
222 assertEqual "Filename is 'example.jpg'" "example.jpg" actual_result
223 where
224 url = "http://lwn.net/one/two/example.jpg"
225 actual_result = fromJust $ filename url
226
227 test_relative_filename_parsed :: Assertion
228 test_relative_filename_parsed =
229 assertEqual "Filename is 'example.jpg'" "example.jpg" actual_result
230 where
231 url = "/one/two/example.jpg"
232 actual_result = fromJust $ filename url
233
234
235 test_empty_url_conversion :: Assertion
236 test_empty_url_conversion =
237 assertEqual "'' converted to lwn.net" expected actual
238 where
239 expected = "https://lwn.net/"
240 actual = fromJust $ make_absolute_url ""
241
242
243 test_normal_url_conversion :: Assertion
244 test_normal_url_conversion =
245 assertEqual "Image URL is made absolute" expected actual
246 where
247 url = "/images/2012/lcj-coughlan-lattimer-sm.jpg"
248 expected = "https://lwn.net/images/2012/lcj-coughlan-lattimer-sm.jpg"
249 actual = fromJust $ make_absolute_url url
250
251
252
253 test_make_https :: Assertion
254 test_make_https =
255 assertEqual "HTTP URL is made HTTPS" expected actual
256 where
257 url = "http://lwn.net/current"
258 expected = "https://lwn.net/current"
259 actual = make_https url
260
261
262 test_add_trailing_slash :: Assertion
263 test_add_trailing_slash =
264 assertEqual "Trailing slashes get added" expected actual
265 where
266 url = "https://lwn.net/current"
267 expected = "https://lwn.net/current/"
268 actual = add_trailing_slash url
269
270
271 uri_tests :: Test
272 uri_tests =
273 testGroup "URI Tests" [
274
275 testGroup "URI Matching" [
276 testCase "HTTP URIs matched" test_http_uris_matched,
277 testCase "HTTPS URIs matched" test_https_uris_matched,
278 testCase "LWN URLs matched" test_lwn_urls_matched ],
279
280 testGroup "Filename Parsing" [
281 testCase "Bare filename parsed" test_bare_filename_parsed,
282 testCase "Absolute filename parsed" test_absolute_filename_parsed,
283 testCase "Relative filename parsed" test_relative_filename_parsed ],
284
285 testGroup "Relative -> Absolute Conversion" [
286 testCase "Empty URL converted to lwn.net" test_empty_url_conversion,
287 testCase "Normal URL made absolute" test_normal_url_conversion ],
288
289 testGroup "URL Mangling" [
290 testCase "HTTP URLs are made HTTPS" test_make_https,
291 testCase "Trailing slashes get added" test_add_trailing_slash ]
292 ]