]> gitweb.michael.orlitzky.com - dead/lwn-epub.git/blob - src/LWN/URI.hs
Implement URL filename parsing (with tests) and the save_image function.
[dead/lwn-epub.git] / src / LWN / URI.hs
1 module LWN.URI
2 where
3
4 import Data.Maybe (fromJust)
5 import Data.String.Utils (split)
6 import Network.URI (
7 URI,
8 parseAbsoluteURI,
9 parseURIReference,
10 uriAuthority,
11 uriPath,
12 uriPort,
13 uriQuery,
14 uriRegName,
15 uriScheme
16 )
17 import Test.HUnit (Assertion, assertEqual)
18 import Test.Framework (Test, testGroup)
19 import Test.Framework.Providers.HUnit (testCase)
20 import Text.Regex.Posix
21
22 -- Distinguish between URLs (Strings) and URIs as provided by the
23 -- Network.URI module.
24 type URL = String
25
26
27 -- | Is this URI's scheme plain HTTP?
28 http :: URI -> Bool
29 http uri = (uriScheme uri) == "http:"
30
31 -- | Is this URI's scheme (secure) HTTPS?
32 https :: URI -> Bool
33 https uri = (uriScheme uri) == "https:"
34
35 -- | Does this URI use an HTTP-compatible port?
36 http_port :: URI -> Bool
37 http_port uri =
38 case parse_result of
39 Nothing -> False
40 Just auth ->
41 (uriPort auth) `elem` ["", ":80"]
42 where
43 parse_result = uriAuthority uri
44
45
46 -- | Does this URI use an HTTPS-compatible port?
47 https_port :: URI -> Bool
48 https_port uri =
49 case parse_result of
50 Nothing -> False
51 Just auth ->
52 (uriPort auth) `elem` ["", ":443"]
53 where
54 parse_result = uriAuthority uri
55
56
57 -- | Does this URL have one of the LWN hostnames?
58 lwn_host :: URI -> Bool
59 lwn_host uri =
60 case parse_result of
61 Nothing -> False
62 Just auth ->
63 (uriRegName auth) `elem` ["lwn.net", "www.lwn.net"]
64 where
65 parse_result = uriAuthority uri
66
67
68 -- | Is the protocol portion of this URI one of the ones that LWN
69 -- uses?
70 lwn_proto :: URI -> Bool
71 lwn_proto u =
72 ((http u) && (http_port u)) || ((https u) && (https_port u))
73
74 -- | Is the server/port to which we're connecting an LWN server?
75 lwn_server :: URI -> Bool
76 lwn_server u =
77 lwn_proto u && lwn_host u
78
79 -- | Is this URI's path for an LWN article?
80 lwn_article_path :: URI -> Bool
81 lwn_article_path uri =
82 path =~ "^/current(/bigpage)?/?$" ||
83 path =~ "^/Articles/[0-9]+(/bigpage)?/?$"
84 where
85 path = uriPath uri
86
87 -- | Is this URI's query one that the LWN uses? The only query string
88 -- that the LWN articles use is the printable page one.
89 lwn_query :: URI -> Bool
90 lwn_query uri =
91 query `elem` ["", "?format=printable"]
92 where
93 query = uriQuery uri
94
95 -- | Combine all of the other URI tests to determine if this 'URL'
96 -- belongs to an LWN article.
97 is_lwn_url :: URL -> Bool
98 is_lwn_url s =
99 case parse_result of
100 Nothing -> False
101 Just uri -> (lwn_server uri) && (lwn_article_path uri) && (lwn_query uri)
102 where
103 parse_result = parseAbsoluteURI s
104
105
106
107 filename :: URL -> Maybe String
108 filename url =
109 case parse_result of
110 Nothing -> Nothing
111 Just uri ->
112 let components = split "/" (uriPath uri) in
113 -- Reverse them so that the filename comes first for easier
114 -- pattern-matching.
115 let reverse_components = reverse components in
116 case reverse_components of
117 [] -> Nothing
118 (x:xs) -> Just x
119 where
120 parse_result = parseURIReference url
121
122
123 -- | A List of LWN URLs to use during testing.
124 lwn_urls :: [URL]
125 lwn_urls = [ proto ++ www ++ "lwn.net" ++ path ++ bigpage ++ query |
126 proto <- ["http://", "https://"],
127 www <- ["", "www."],
128 bigpage <- ["", "/bigpage"],
129 query <- ["", "?format=printable"],
130 path <- [ "/current",
131 "/Articles/500844",
132 "/Articles/502371" ] ]
133
134 test_lwn_urls_matched :: Assertion
135 test_lwn_urls_matched =
136 assertEqual "All LWN URLs matched" True (all is_lwn_url lwn_urls)
137
138 test_http_uris_matched :: Assertion
139 test_http_uris_matched =
140 assertEqual (url ++ " is HTTP") True (http uri)
141 where
142 url = "http://lwn.net/Articles/500844/bigpage"
143 uri = fromJust $ parseAbsoluteURI url
144
145 test_https_uris_matched :: Assertion
146 test_https_uris_matched =
147 assertEqual (url ++ " is HTTPS") True (https uri)
148 where
149 url = "https://lwn.net/Articles/500844/bigpage"
150 uri = fromJust $ parseAbsoluteURI url
151
152
153 test_bare_filename_parsed :: Assertion
154 test_bare_filename_parsed =
155 assertEqual "Filename is 'example.jpg'" "example.jpg" actual_result
156 where
157 url = "example.jpg"
158 actual_result = fromJust $ filename url
159
160 test_absolute_filename_parsed :: Assertion
161 test_absolute_filename_parsed =
162 assertEqual "Filename is 'example.jpg'" "example.jpg" actual_result
163 where
164 url = "http://lwn.net/one/two/example.jpg"
165 actual_result = fromJust $ filename url
166
167 test_relative_filename_parsed :: Assertion
168 test_relative_filename_parsed =
169 assertEqual "Filename is 'example.jpg'" "example.jpg" actual_result
170 where
171 url = "/one/two/example.jpg"
172 actual_result = fromJust $ filename url
173
174
175 uri_tests :: Test
176 uri_tests =
177 testGroup "URI Tests" [
178
179 testGroup "URI Matching" [
180 testCase "HTTP URIs matched" test_http_uris_matched,
181 testCase "HTTPS URIs matched" test_https_uris_matched,
182 testCase "LWN URLs matched" test_lwn_urls_matched ],
183
184 testGroup "Filename Parsing" [
185 testCase "Bare filename parsed" test_bare_filename_parsed,
186 testCase "Absolute filename parsed" test_absolute_filename_parsed,
187 testCase "Relative filename parsed" test_relative_filename_parsed ] ]