Add config file parsing.
[dead/lwn-epub.git] / src / LWN / URI.hs
1 module LWN.URI
2 where
3
4 import Data.Maybe (fromJust)
5 import Data.String.Utils (split)
6 import Network.URI (
7 URI(..),
8 URIAuth(..),
9 parseAbsoluteURI,
10 parseURIReference,
11 relativeTo,
12 uriRegName
13 )
14 import Test.HUnit (Assertion, assertEqual)
15 import Test.Framework (Test, testGroup)
16 import Test.Framework.Providers.HUnit (testCase)
17 import Text.Regex.Posix ((=~))
18
19 -- Distinguish between URLs (Strings) and URIs as provided by the
20 -- Network.URI module.
21 type URL = String
22
23
24 -- | Is this URI's scheme plain HTTP?
25 http :: URI -> Bool
26 http uri = (uriScheme uri) == "http:"
27
28 -- | Is this URI's scheme (secure) HTTPS?
29 https :: URI -> Bool
30 https uri = (uriScheme uri) == "https:"
31
32 -- | Does this URI use an HTTP-compatible port?
33 http_port :: URI -> Bool
34 http_port uri =
35 case parse_result of
36 Nothing -> False
37 Just auth ->
38 (uriPort auth) `elem` ["", ":80"]
39 where
40 parse_result = uriAuthority uri
41
42
43 -- | Does this URI use an HTTPS-compatible port?
44 https_port :: URI -> Bool
45 https_port uri =
46 case parse_result of
47 Nothing -> False
48 Just auth ->
49 (uriPort auth) `elem` ["", ":443"]
50 where
51 parse_result = uriAuthority uri
52
53
54 -- | Does this URL have one of the LWN hostnames?
55 lwn_host :: URI -> Bool
56 lwn_host uri =
57 case parse_result of
58 Nothing -> False
59 Just auth ->
60 (uriRegName auth) `elem` ["lwn.net", "www.lwn.net"]
61 where
62 parse_result = uriAuthority uri
63
64
65 -- | Is the protocol portion of this URI one of the ones that LWN
66 -- uses?
67 lwn_proto :: URI -> Bool
68 lwn_proto u =
69 ((http u) && (http_port u)) || ((https u) && (https_port u))
70
71 -- | Is the server/port to which we're connecting an LWN server?
72 lwn_server :: URI -> Bool
73 lwn_server u =
74 lwn_proto u && lwn_host u
75
76 -- | Is this URI's path for an LWN article?
77 lwn_article_path :: URI -> Bool
78 lwn_article_path uri =
79 path =~ "^/current(/bigpage)?/?$" ||
80 path =~ "^/Articles/[0-9]+(/bigpage)?/?$"
81 where
82 path = uriPath uri
83
84 -- | Is this URI's query one that the LWN uses? The only query string
85 -- that the LWN articles use is the printable page one.
86 lwn_query :: URI -> Bool
87 lwn_query uri =
88 query `elem` ["", "?format=printable"]
89 where
90 query = uriQuery uri
91
92 -- | Combine all of the other URI tests to determine if this 'URL'
93 -- belongs to an LWN article.
94 is_lwn_url :: URL -> Bool
95 is_lwn_url s =
96 case parse_result of
97 Nothing -> False
98 Just uri -> (lwn_server uri) && (lwn_article_path uri) && (lwn_query uri)
99 where
100 parse_result = parseAbsoluteURI s
101
102
103
104 filename :: URL -> Maybe String
105 filename url =
106 case parse_result of
107 Nothing -> Nothing
108 Just uri ->
109 let components = split "/" (uriPath uri) in
110 -- Reverse them so that the filename comes first for easier
111 -- pattern-matching.
112 let reverse_components = reverse components in
113 case reverse_components of
114 [] -> Nothing
115 (x:_) -> Just x
116 where
117 parse_result = parseURIReference url
118
119
120
121 make_absolute_uri :: URI -> Maybe URI
122 make_absolute_uri relative_uri =
123 relativeTo relative_uri base_uri
124 where
125 base_auth = URIAuth { uriUserInfo = "",
126 uriRegName = "lwn.net",
127 uriPort = "" }
128 base_uri = URI { uriScheme = "https:",
129 uriAuthority = Just base_auth,
130 uriPath = "/",
131 uriQuery = "",
132 uriFragment = "" }
133
134
135 make_absolute_url :: URL -> Maybe URL
136 make_absolute_url relative_url =
137 case parse_result of
138 Nothing -> Nothing
139 Just relative_uri ->
140 let abs_result = make_absolute_uri relative_uri in
141 case abs_result of
142 Nothing -> Nothing
143 Just abs_uri -> Just $ show abs_uri
144 where
145 parse_result = parseURIReference relative_url
146
147 -- | Like 'make_absolute_url', except returns its input instead of
148 -- 'Nothing' if the absolution fails.
149 try_make_absolute_url :: URL -> URL
150 try_make_absolute_url url =
151 case make_absolute_url url of
152 Nothing -> url
153 Just abs_url -> abs_url
154
155 -- | A List of LWN URLs to use during testing.
156 lwn_urls :: [URL]
157 lwn_urls = [ proto ++ www ++ "lwn.net" ++ path ++ bigpage ++ query |
158 proto <- ["http://", "https://"],
159 www <- ["", "www."],
160 bigpage <- ["", "/bigpage"],
161 query <- ["", "?format=printable"],
162 path <- [ "/current",
163 "/Articles/500844",
164 "/Articles/502371" ] ]
165
166 test_lwn_urls_matched :: Assertion
167 test_lwn_urls_matched =
168 assertEqual "All LWN URLs matched" True (all is_lwn_url lwn_urls)
169
170 test_http_uris_matched :: Assertion
171 test_http_uris_matched =
172 assertEqual (url ++ " is HTTP") True (http uri)
173 where
174 url = "http://lwn.net/Articles/500844/bigpage"
175 uri = fromJust $ parseAbsoluteURI url
176
177 test_https_uris_matched :: Assertion
178 test_https_uris_matched =
179 assertEqual (url ++ " is HTTPS") True (https uri)
180 where
181 url = "https://lwn.net/Articles/500844/bigpage"
182 uri = fromJust $ parseAbsoluteURI url
183
184
185 test_bare_filename_parsed :: Assertion
186 test_bare_filename_parsed =
187 assertEqual "Filename is 'example.jpg'" "example.jpg" actual_result
188 where
189 url = "example.jpg"
190 actual_result = fromJust $ filename url
191
192 test_absolute_filename_parsed :: Assertion
193 test_absolute_filename_parsed =
194 assertEqual "Filename is 'example.jpg'" "example.jpg" actual_result
195 where
196 url = "http://lwn.net/one/two/example.jpg"
197 actual_result = fromJust $ filename url
198
199 test_relative_filename_parsed :: Assertion
200 test_relative_filename_parsed =
201 assertEqual "Filename is 'example.jpg'" "example.jpg" actual_result
202 where
203 url = "/one/two/example.jpg"
204 actual_result = fromJust $ filename url
205
206
207 test_empty_url_conversion :: Assertion
208 test_empty_url_conversion =
209 assertEqual "'' converted to lwn.net" expected actual
210 where
211 expected = "https://lwn.net/"
212 actual = fromJust $ make_absolute_url ""
213
214
215 test_normal_url_conversion :: Assertion
216 test_normal_url_conversion =
217 assertEqual "Image URL is made absolute" expected actual
218 where
219 url = "/images/2012/lcj-coughlan-lattimer-sm.jpg"
220 expected = "https://lwn.net/images/2012/lcj-coughlan-lattimer-sm.jpg"
221 actual = fromJust $ make_absolute_url url
222
223
224 uri_tests :: Test
225 uri_tests =
226 testGroup "URI Tests" [
227
228 testGroup "URI Matching" [
229 testCase "HTTP URIs matched" test_http_uris_matched,
230 testCase "HTTPS URIs matched" test_https_uris_matched,
231 testCase "LWN URLs matched" test_lwn_urls_matched ],
232
233 testGroup "Filename Parsing" [
234 testCase "Bare filename parsed" test_bare_filename_parsed,
235 testCase "Absolute filename parsed" test_absolute_filename_parsed,
236 testCase "Relative filename parsed" test_relative_filename_parsed ],
237
238 testGroup "Relative -> Absolute Conversion" [
239 testCase "Empty URL converted to lwn.net" test_empty_url_conversion,
240 testCase "Normal URL made absolute" test_normal_url_conversion ]]