Add indentation.
[dead/lwn-epub.git] / src / LWN / URI.hs
1 module LWN.URI
2 where
3
4 import Data.Maybe (fromJust)
5 import Network.URI (
6 URI,
7 parseAbsoluteURI,
8 uriAuthority,
9 uriPath,
10 uriPort,
11 uriQuery,
12 uriRegName,
13 uriScheme
14 )
15 import Test.HUnit (Assertion, assertEqual)
16 import Test.Framework (Test, testGroup)
17 import Test.Framework.Providers.HUnit (testCase)
18 import Text.Regex.Posix
19
20 -- Distinguish between URLs (Strings) and URIs as provided by the
21 -- Network.URI module.
22 type URL = String
23
24
25 -- | Is this URI's scheme plain HTTP?
26 http :: URI -> Bool
27 http uri = (uriScheme uri) == "http:"
28
29 -- | Is this URI's scheme (secure) HTTPS?
30 https :: URI -> Bool
31 https uri = (uriScheme uri) == "https:"
32
33 -- | Does this URI use an HTTP-compatible port?
34 http_port :: URI -> Bool
35 http_port uri =
36 case parse_result of
37 Nothing -> False
38 Just auth ->
39 (uriPort auth) `elem` ["", ":80"]
40 where
41 parse_result = uriAuthority uri
42
43
44 -- | Does this URI use an HTTPS-compatible port?
45 https_port :: URI -> Bool
46 https_port uri =
47 case parse_result of
48 Nothing -> False
49 Just auth ->
50 (uriPort auth) `elem` ["", ":443"]
51 where
52 parse_result = uriAuthority uri
53
54
55 -- | Does this URL have one of the LWN hostnames?
56 lwn_host :: URI -> Bool
57 lwn_host uri =
58 case parse_result of
59 Nothing -> False
60 Just auth ->
61 (uriRegName auth) `elem` ["lwn.net", "www.lwn.net"]
62 where
63 parse_result = uriAuthority uri
64
65
66 -- | Is the protocol portion of this URI one of the ones that LWN
67 -- uses?
68 lwn_proto :: URI -> Bool
69 lwn_proto u =
70 ((http u) && (http_port u)) || ((https u) && (https_port u))
71
72 -- | Is the server/port to which we're connecting an LWN server?
73 lwn_server :: URI -> Bool
74 lwn_server u =
75 lwn_proto u && lwn_host u
76
77 -- | Is this URI's path for an LWN article?
78 lwn_article_path :: URI -> Bool
79 lwn_article_path uri =
80 path =~ "^/current(/bigpage)?/?$" ||
81 path =~ "^/Articles/[0-9]+(/bigpage)?/?$"
82 where
83 path = uriPath uri
84
85 -- | Is this URI's query one that the LWN uses? The only query string
86 -- that the LWN articles use is the printable page one.
87 lwn_query :: URI -> Bool
88 lwn_query uri =
89 query `elem` ["", "?format=printable"]
90 where
91 query = uriQuery uri
92
93 -- | Combine all of the other URI tests to determine if this 'URL'
94 -- belongs to an LWN article.
95 is_lwn_url :: URL -> Bool
96 is_lwn_url s =
97 case parse_result of
98 Nothing -> False
99 Just uri -> (lwn_server uri) && (lwn_article_path uri) && (lwn_query uri)
100 where
101 parse_result = parseAbsoluteURI s
102
103
104 -- | A List of LWN URLs to use during testing.
105 lwn_urls :: [URL]
106 lwn_urls = [ proto ++ www ++ "lwn.net" ++ path ++ bigpage ++ query |
107 proto <- ["http://", "https://"],
108 www <- ["", "www."],
109 bigpage <- ["", "/bigpage"],
110 query <- ["", "?format=printable"],
111 path <- [ "/current",
112 "/Articles/500844",
113 "/Articles/502371" ] ]
114
115 test_lwn_urls_matched :: Assertion
116 test_lwn_urls_matched =
117 assertEqual "All LWN URLs matched" True (all is_lwn_url lwn_urls)
118
119 test_http_uris_matched :: Assertion
120 test_http_uris_matched =
121 assertEqual (url ++ " is HTTP") True (http uri)
122 where
123 url = "http://lwn.net/Articles/500844/bigpage"
124 uri = fromJust $ parseAbsoluteURI url
125
126 test_https_uris_matched :: Assertion
127 test_https_uris_matched =
128 assertEqual (url ++ " is HTTPS") True (https uri)
129 where
130 url = "https://lwn.net/Articles/500844/bigpage"
131 uri = fromJust $ parseAbsoluteURI url
132
133 uri_tests :: Test
134 uri_tests =
135 testGroup "URI Tests" [
136 testCase "HTTP URIs matched" test_http_uris_matched,
137 testCase "HTTPS URIs matched" test_https_uris_matched,
138 testCase "LWN URLs matched" test_lwn_urls_matched ]