Begin working on image replacemenent.
[dead/lwn-epub.git] / src / LWN / URI.hs
1 module LWN.URI
2 where
3
4 import Data.Maybe (fromJust)
5 import Data.String.Utils (split)
6 import Network.URI (
7 URI(..),
8 parseAbsoluteURI,
9 parseURIReference,
10 relativeTo,
11 uriRegName,
12 )
13 import Test.HUnit (Assertion, assertEqual)
14 import Test.Framework (Test, testGroup)
15 import Test.Framework.Providers.HUnit (testCase)
16 import Text.Regex.Posix
17
18 -- Distinguish between URLs (Strings) and URIs as provided by the
19 -- Network.URI module.
20 type URL = String
21
22
23 -- | Is this URI's scheme plain HTTP?
24 http :: URI -> Bool
25 http uri = (uriScheme uri) == "http:"
26
27 -- | Is this URI's scheme (secure) HTTPS?
28 https :: URI -> Bool
29 https uri = (uriScheme uri) == "https:"
30
31 -- | Does this URI use an HTTP-compatible port?
32 http_port :: URI -> Bool
33 http_port uri =
34 case parse_result of
35 Nothing -> False
36 Just auth ->
37 (uriPort auth) `elem` ["", ":80"]
38 where
39 parse_result = uriAuthority uri
40
41
42 -- | Does this URI use an HTTPS-compatible port?
43 https_port :: URI -> Bool
44 https_port uri =
45 case parse_result of
46 Nothing -> False
47 Just auth ->
48 (uriPort auth) `elem` ["", ":443"]
49 where
50 parse_result = uriAuthority uri
51
52
53 -- | Does this URL have one of the LWN hostnames?
54 lwn_host :: URI -> Bool
55 lwn_host uri =
56 case parse_result of
57 Nothing -> False
58 Just auth ->
59 (uriRegName auth) `elem` ["lwn.net", "www.lwn.net"]
60 where
61 parse_result = uriAuthority uri
62
63
64 -- | Is the protocol portion of this URI one of the ones that LWN
65 -- uses?
66 lwn_proto :: URI -> Bool
67 lwn_proto u =
68 ((http u) && (http_port u)) || ((https u) && (https_port u))
69
70 -- | Is the server/port to which we're connecting an LWN server?
71 lwn_server :: URI -> Bool
72 lwn_server u =
73 lwn_proto u && lwn_host u
74
75 -- | Is this URI's path for an LWN article?
76 lwn_article_path :: URI -> Bool
77 lwn_article_path uri =
78 path =~ "^/current(/bigpage)?/?$" ||
79 path =~ "^/Articles/[0-9]+(/bigpage)?/?$"
80 where
81 path = uriPath uri
82
83 -- | Is this URI's query one that the LWN uses? The only query string
84 -- that the LWN articles use is the printable page one.
85 lwn_query :: URI -> Bool
86 lwn_query uri =
87 query `elem` ["", "?format=printable"]
88 where
89 query = uriQuery uri
90
91 -- | Combine all of the other URI tests to determine if this 'URL'
92 -- belongs to an LWN article.
93 is_lwn_url :: URL -> Bool
94 is_lwn_url s =
95 case parse_result of
96 Nothing -> False
97 Just uri -> (lwn_server uri) && (lwn_article_path uri) && (lwn_query uri)
98 where
99 parse_result = parseAbsoluteURI s
100
101
102
103 filename :: URL -> Maybe String
104 filename url =
105 case parse_result of
106 Nothing -> Nothing
107 Just uri ->
108 let components = split "/" (uriPath uri) in
109 -- Reverse them so that the filename comes first for easier
110 -- pattern-matching.
111 let reverse_components = reverse components in
112 case reverse_components of
113 [] -> Nothing
114 (x:xs) -> Just x
115 where
116 parse_result = parseURIReference url
117
118
119
120 make_absolute_uri :: URI -> Maybe URI
121 make_absolute_uri relative_uri =
122 relativeTo relative_uri base_uri
123 where
124 base_uri = URI { uriScheme = "https://",
125 uriAuthority = "lwn.net",
126 uriPath = "/",
127 uriQuery = "",
128 uriFragment = "" }
129
130
131 make_absolute_url :: URL -> Maybe URL
132 make_absolute_url relative_url =
133 case parse_result of
134 Nothing -> Nothing
135 Just relative_uri ->
136 let abs_result = make_absolute_uri relative_uri in
137 case abs_result of
138 Nothing -> Nothing
139 Just abs_uri -> show abs_uri
140 where
141 parse_result = parseURIReference url
142
143
144
145 -- | A List of LWN URLs to use during testing.
146 lwn_urls :: [URL]
147 lwn_urls = [ proto ++ www ++ "lwn.net" ++ path ++ bigpage ++ query |
148 proto <- ["http://", "https://"],
149 www <- ["", "www."],
150 bigpage <- ["", "/bigpage"],
151 query <- ["", "?format=printable"],
152 path <- [ "/current",
153 "/Articles/500844",
154 "/Articles/502371" ] ]
155
156 test_lwn_urls_matched :: Assertion
157 test_lwn_urls_matched =
158 assertEqual "All LWN URLs matched" True (all is_lwn_url lwn_urls)
159
160 test_http_uris_matched :: Assertion
161 test_http_uris_matched =
162 assertEqual (url ++ " is HTTP") True (http uri)
163 where
164 url = "http://lwn.net/Articles/500844/bigpage"
165 uri = fromJust $ parseAbsoluteURI url
166
167 test_https_uris_matched :: Assertion
168 test_https_uris_matched =
169 assertEqual (url ++ " is HTTPS") True (https uri)
170 where
171 url = "https://lwn.net/Articles/500844/bigpage"
172 uri = fromJust $ parseAbsoluteURI url
173
174
175 test_bare_filename_parsed :: Assertion
176 test_bare_filename_parsed =
177 assertEqual "Filename is 'example.jpg'" "example.jpg" actual_result
178 where
179 url = "example.jpg"
180 actual_result = fromJust $ filename url
181
182 test_absolute_filename_parsed :: Assertion
183 test_absolute_filename_parsed =
184 assertEqual "Filename is 'example.jpg'" "example.jpg" actual_result
185 where
186 url = "http://lwn.net/one/two/example.jpg"
187 actual_result = fromJust $ filename url
188
189 test_relative_filename_parsed :: Assertion
190 test_relative_filename_parsed =
191 assertEqual "Filename is 'example.jpg'" "example.jpg" actual_result
192 where
193 url = "/one/two/example.jpg"
194 actual_result = fromJust $ filename url
195
196
197 test_empty_url_conversion :: Assertion
198 test_empty_url_conversion =
199 assertEqual "'' converted to lwn.net" expected actual
200 where
201 expected = "https://lwn.net/"
202 actual = fromJust $ make_absolute_url ""
203
204
205 test_normal_url_conversion :: Assertion
206 test_normal_url_conversion =
207 assertEqual "Image URL is made absolute" expected actual
208 where
209 url = "/images/2012/lcj-coughlan-lattimer-sm.jpg"
210 expected = "https://lwn.net/images/2012/lcj-coughlan-lattimer-sm.jpg"
211 actual = fromJust $ make_absolute_url url
212
213
214 uri_tests :: Test
215 uri_tests =
216 testGroup "URI Tests" [
217
218 testGroup "URI Matching" [
219 testCase "HTTP URIs matched" test_http_uris_matched,
220 testCase "HTTPS URIs matched" test_https_uris_matched,
221 testCase "LWN URLs matched" test_lwn_urls_matched ],
222
223 testGroup "Filename Parsing" [
224 testCase "Bare filename parsed" test_bare_filename_parsed,
225 testCase "Absolute filename parsed" test_absolute_filename_parsed,
226 testCase "Relative filename parsed" test_relative_filename_parsed ],
227
228 testGroup "Relative -> Absolute Conversion" [
229 testCase "Empty URL converted to lwn.net" test_empty_url_conversion,
230 testCase "Normal URL made absolute" test_normal_url_conversion ]]