Add some tests for the article URL construction.
[dead/lwn-epub.git] / src / LWN / URI.hs
1 module LWN.URI
2 where
3
4 import Data.Maybe (fromJust)
5 import Data.String.Utils (split)
6 import Network.URI (
7 URI(..),
8 URIAuth(..),
9 parseAbsoluteURI,
10 parseURIReference,
11 relativeTo,
12 uriRegName
13 )
14 import Test.HUnit (Assertion, assertEqual)
15 import Test.Framework (Test, testGroup)
16 import Test.Framework.Providers.HUnit (testCase)
17 import Text.Regex.Posix ((=~))
18
19 -- Distinguish between URLs (Strings) and URIs as provided by the
20 -- Network.URI module.
21 type URL = String
22
23
24 -- | Is this URI's scheme plain HTTP?
25 http :: URI -> Bool
26 http uri = (uriScheme uri) == "http:"
27
28 -- | Is this URI's scheme (secure) HTTPS?
29 https :: URI -> Bool
30 https uri = (uriScheme uri) == "https:"
31
32 -- | Does this URI use an HTTP-compatible port?
33 http_port :: URI -> Bool
34 http_port uri =
35 case parse_result of
36 Nothing -> False
37 Just auth ->
38 (uriPort auth) `elem` ["", ":80"]
39 where
40 parse_result = uriAuthority uri
41
42
43 make_https :: URL -> URL
44 make_https url =
45 case parse_result of
46 Nothing -> url -- Shrug?
47 Just uri ->
48 if http uri then
49 show $ uri { uriScheme = "https:" }
50 else
51 url -- Leave non-http URLs alone.
52 where
53 parse_result = parseURIReference url
54
55
56 -- | Does this URI use an HTTPS-compatible port?
57 https_port :: URI -> Bool
58 https_port uri =
59 case parse_result of
60 Nothing -> False
61 Just auth ->
62 (uriPort auth) `elem` ["", ":443"]
63 where
64 parse_result = uriAuthority uri
65
66
67 -- | Does this URL have one of the LWN hostnames?
68 lwn_host :: URI -> Bool
69 lwn_host uri =
70 case parse_result of
71 Nothing -> False
72 Just auth ->
73 (uriRegName auth) `elem` ["lwn.net", "www.lwn.net"]
74 where
75 parse_result = uriAuthority uri
76
77
78 -- | Is the protocol portion of this URI one of the ones that LWN
79 -- uses?
80 lwn_proto :: URI -> Bool
81 lwn_proto u =
82 ((http u) && (http_port u)) || ((https u) && (https_port u))
83
84 -- | Is the server/port to which we're connecting an LWN server?
85 lwn_server :: URI -> Bool
86 lwn_server u =
87 lwn_proto u && lwn_host u
88
89 -- | Is this URI's path for an LWN article?
90 lwn_article_path :: URI -> Bool
91 lwn_article_path uri =
92 path =~ "^/current(/bigpage)?/?$" ||
93 path =~ "^/Articles/[0-9]+(/bigpage)?/?$"
94 where
95 path = uriPath uri
96
97 -- | Is this URI's query one that the LWN uses? The only query string
98 -- that the LWN articles use is the printable page one.
99 lwn_query :: URI -> Bool
100 lwn_query uri =
101 query `elem` ["", "?format=printable"]
102 where
103 query = uriQuery uri
104
105 -- | Combine all of the other URI tests to determine if this 'URL'
106 -- belongs to an LWN article.
107 is_lwn_url :: URL -> Bool
108 is_lwn_url s =
109 case parse_result of
110 Nothing -> False
111 Just uri -> (lwn_server uri) && (lwn_article_path uri) && (lwn_query uri)
112 where
113 parse_result = parseAbsoluteURI s
114
115
116
117 filename :: URL -> Maybe String
118 filename url =
119 case parse_result of
120 Nothing -> Nothing
121 Just uri ->
122 let components = split "/" (uriPath uri) in
123 -- Reverse them so that the filename comes first for easier
124 -- pattern-matching.
125 let reverse_components = reverse components in
126 case reverse_components of
127 [] -> Nothing
128 (x:_) -> Just x
129 where
130 parse_result = parseURIReference url
131
132
133
134 make_absolute_uri :: URI -> Maybe URI
135 make_absolute_uri relative_uri =
136 relativeTo relative_uri base_uri
137 where
138 base_auth = URIAuth { uriUserInfo = "",
139 uriRegName = "lwn.net",
140 uriPort = "" }
141 base_uri = URI { uriScheme = "https:",
142 uriAuthority = Just base_auth,
143 uriPath = "/",
144 uriQuery = "",
145 uriFragment = "" }
146
147
148 make_absolute_url :: URL -> Maybe URL
149 make_absolute_url relative_url =
150 case parse_result of
151 Nothing -> Nothing
152 Just relative_uri ->
153 let abs_result = make_absolute_uri relative_uri in
154 case abs_result of
155 Nothing -> Nothing
156 Just abs_uri -> Just $ show abs_uri
157 where
158 parse_result = parseURIReference relative_url
159
160 -- | Like 'make_absolute_url', except returns its input instead of
161 -- 'Nothing' if the absolution fails.
162 try_make_absolute_url :: URL -> URL
163 try_make_absolute_url url =
164 case make_absolute_url url of
165 Nothing -> url
166 Just abs_url -> abs_url
167
168 -- | A List of LWN URLs to use during testing.
169 lwn_urls :: [URL]
170 lwn_urls = [ proto ++ www ++ "lwn.net" ++ path ++ bigpage ++ query |
171 proto <- ["http://", "https://"],
172 www <- ["", "www."],
173 bigpage <- ["", "/bigpage"],
174 query <- ["", "?format=printable"],
175 path <- [ "/current",
176 "/Articles/500844",
177 "/Articles/502371" ] ]
178
179 test_lwn_urls_matched :: Assertion
180 test_lwn_urls_matched =
181 assertEqual "All LWN URLs matched" True (all is_lwn_url lwn_urls)
182
183 test_http_uris_matched :: Assertion
184 test_http_uris_matched =
185 assertEqual (url ++ " is HTTP") True (http uri)
186 where
187 url = "http://lwn.net/Articles/500844/bigpage"
188 uri = fromJust $ parseAbsoluteURI url
189
190 test_https_uris_matched :: Assertion
191 test_https_uris_matched =
192 assertEqual (url ++ " is HTTPS") True (https uri)
193 where
194 url = "https://lwn.net/Articles/500844/bigpage"
195 uri = fromJust $ parseAbsoluteURI url
196
197
198 test_bare_filename_parsed :: Assertion
199 test_bare_filename_parsed =
200 assertEqual "Filename is 'example.jpg'" "example.jpg" actual_result
201 where
202 url = "example.jpg"
203 actual_result = fromJust $ filename url
204
205 test_absolute_filename_parsed :: Assertion
206 test_absolute_filename_parsed =
207 assertEqual "Filename is 'example.jpg'" "example.jpg" actual_result
208 where
209 url = "http://lwn.net/one/two/example.jpg"
210 actual_result = fromJust $ filename url
211
212 test_relative_filename_parsed :: Assertion
213 test_relative_filename_parsed =
214 assertEqual "Filename is 'example.jpg'" "example.jpg" actual_result
215 where
216 url = "/one/two/example.jpg"
217 actual_result = fromJust $ filename url
218
219
220 test_empty_url_conversion :: Assertion
221 test_empty_url_conversion =
222 assertEqual "'' converted to lwn.net" expected actual
223 where
224 expected = "https://lwn.net/"
225 actual = fromJust $ make_absolute_url ""
226
227
228 test_normal_url_conversion :: Assertion
229 test_normal_url_conversion =
230 assertEqual "Image URL is made absolute" expected actual
231 where
232 url = "/images/2012/lcj-coughlan-lattimer-sm.jpg"
233 expected = "https://lwn.net/images/2012/lcj-coughlan-lattimer-sm.jpg"
234 actual = fromJust $ make_absolute_url url
235
236
237 uri_tests :: Test
238 uri_tests =
239 testGroup "URI Tests" [
240
241 testGroup "URI Matching" [
242 testCase "HTTP URIs matched" test_http_uris_matched,
243 testCase "HTTPS URIs matched" test_https_uris_matched,
244 testCase "LWN URLs matched" test_lwn_urls_matched ],
245
246 testGroup "Filename Parsing" [
247 testCase "Bare filename parsed" test_bare_filename_parsed,
248 testCase "Absolute filename parsed" test_absolute_filename_parsed,
249 testCase "Relative filename parsed" test_relative_filename_parsed ],
250
251 testGroup "Relative -> Absolute Conversion" [
252 testCase "Empty URL converted to lwn.net" test_empty_url_conversion,
253 testCase "Normal URL made absolute" test_normal_url_conversion ]]