Finish moving all of the DNS name components under Network.DNS.RBL.Domain.
[dead/harbl.git] / harbl / src / Network / DNS / RBL / Domain / Subdomain.hs
1 {-# LANGUAGE DoAndIfThenElse #-}
2
3 -- | This module contains the 'Subdomain' type and a Parsec parser to
4 -- parse one. We don't export its constructor because then you could
5 -- do something dumb like stick a period inside one.
6 --
7 -- These are defined in RFC1035, Section 2.3.1, \"Preferred name
8 -- syntax\" <https://tools.ietf.org/html/rfc1035#section-2.3.1>:
9 --
10 -- <subdomain> ::= <label> | <subdomain> "." <label>
11 --
12 module Network.DNS.RBL.Domain.Subdomain (
13 Subdomain,
14 subdomain )
15 where
16
17 import Text.Parsec ( (<|>), char, try )
18 import Text.Parsec.String ( Parser )
19
20 import Network.DNS.RBL.Domain.Label ( Label, label )
21 import Network.DNS.RBL.Pretty ( Pretty(..) )
22 import Network.DNS.RBL.Reversible ( Reversible(..) )
23
24
25 -- | The data type representing a \"subdomain\" from RFC1035. We have
26 -- reversed the order of the subdomain and label in the second
27 -- option (from the grammar), however. This is explained in the
28 -- 'subdomain' parser.
29 --
30 --
31 -- ==== _Examples_
32 --
33 -- >>> import Text.Parsec ( parse )
34 --
35 -- >>> let (Right r) = parse label "" "x"
36 -- >>> SubdomainSingleLabel r
37 -- SubdomainSingleLabel (Label (LetDigLetter (Letter 'x')) Nothing)
38 --
39 data Subdomain =
40 SubdomainSingleLabel Label |
41 SubdomainMultipleLabel Label Subdomain
42 deriving (Eq, Show)
43
44
45 -- | Pretty-print a 'Subdomain'. We print additional \".\" characters
46 -- between our labels when there are more than one of them.
47 --
48 instance Pretty Subdomain where
49 pretty_show (SubdomainSingleLabel l) = pretty_show l
50 pretty_show (SubdomainMultipleLabel l s) =
51 (pretty_show l) ++ "." ++ (pretty_show s)
52
53
54 instance Reversible Subdomain where
55 -- | Reverse the labels of the given subdomain.
56 --
57 -- ==== _Examples_
58 --
59 -- >>> import Text.Parsec ( parse )
60 --
61 -- Standard usage:
62 --
63 -- >>> let (Right r) = parse subdomain "" "com"
64 -- >>> pretty_print $ backwards r
65 -- com
66 --
67 -- >>> let (Right r) = parse subdomain "" "example.com"
68 -- >>> pretty_print $ backwards r
69 -- com.example
70 --
71 -- >>> let (Right r) = parse subdomain "" "www.example.com"
72 -- >>> pretty_print $ backwards r
73 -- com.example.www
74 --
75 -- >>> let (Right r) = parse subdomain "" "new.www.example.com"
76 -- >>> pretty_print $ backwards r
77 -- com.example.www.new
78 --
79
80 -- It's easy to reverse a single label...
81 backwards s@(SubdomainSingleLabel _) = s
82
83 -- For multiple labels we have two cases. The first is where we have
84 -- exactly two labels, and we just need to swap them.
85 backwards (SubdomainMultipleLabel l (SubdomainSingleLabel m)) =
86 SubdomainMultipleLabel m (SubdomainSingleLabel l)
87
88 -- And now the hard case. See the 'LdhStr' implementation for an
89 -- explanation.
90 --
91 backwards (SubdomainMultipleLabel l s) = build (SubdomainSingleLabel l) s
92 where
93 -- Build up the first Subdomain on the left by peeling off the
94 -- leading elements of the second Subdomain.
95 build :: Subdomain -> Subdomain -> Subdomain
96 build dst (SubdomainSingleLabel final) = SubdomainMultipleLabel final dst
97 build dst (SubdomainMultipleLabel leading rest) =
98 build (SubdomainMultipleLabel leading dst) rest
99
100
101
102 -- | Parse a 'Subdomain'. Here is the RFC1035 grammar for reference:
103 --
104 -- <subdomain> ::= <label> | <subdomain> "." <label>
105 --
106 -- In contrast with the grammar, we have reversed the order of the
107 -- subdomain and label to prevent infinite recursion. The second
108 -- option (subdomain + label) is obviously more specific, we we need
109 -- to try it first. This presents a problem: we're trying to parse a
110 -- subdomain in terms of a subdomain! The given grammar represents
111 -- subdomains how we like to think of them; from right to left. But
112 -- it's better to parse from left to right, so we pick off the
113 -- leading label and then recurse into the definition of subdomain.
114 --
115 -- According to RFC1034 <https://tools.ietf.org/html/rfc1034>
116 -- Section 3.1, \"3.1. Name space specifications and terminology\"
117 -- two neighboring labels in a DNS name cannot be equal:
118 --
119 -- Each node has a label, which is zero to 63 octets in length. Brother
120 -- nodes may not have the same label, although the same label can be used
121 -- for nodes which are not brothers. One label is reserved, and that is
122 -- the null (i.e., zero length) label used for the root.
123 --
124 -- We enforce this restriction, but the result is usually that we
125 -- only parse the part of the subdomain leading up to the repeated
126 -- label.
127 --
128 -- ==== _Examples_
129 --
130 -- >>> import Text.Parsec ( parse, parseTest )
131 --
132 -- Make sure we can parse a single character:
133 --
134 -- >>> parseTest subdomain "a"
135 -- SubdomainSingleLabel (Label (LetDigLetter (Letter 'a')) Nothing)
136 --
137 -- >>> pretty_print $ parse subdomain "" "example.com"
138 -- example.com
139 --
140 -- >>> pretty_print $ parse subdomain "" "www.example.com"
141 -- www.example.com
142 --
143 -- We reject a subdomain with equal neighbors, but this leads to
144 -- only the single first label being parsed instead:
145 --
146 -- >>> pretty_print $ parse subdomain "" "www.www.example.com"
147 -- www
148 --
149 -- But not one with a repeated but non-neighboring label:
150 --
151 -- >>> pretty_print $ parse subdomain "" "www.example.www.com"
152 -- www.example.www.com
153 --
154 subdomain :: Parser Subdomain
155 subdomain = try both <|> just_one
156 where
157 both :: Parser Subdomain
158 both = do
159 l <- label
160 _ <- char '.'
161 s <- subdomain
162 let result = SubdomainMultipleLabel l s
163 if (subdomain_has_equal_neighbors result)
164 then fail "subdomain cannot have equal neighboring labels"
165 else return result
166
167 just_one :: Parser Subdomain
168 just_one = fmap SubdomainSingleLabel label
169
170
171
172 -- | Retrieve a list of labels contained in a 'Subdomain'.
173 --
174 -- ==== _Examples_
175 --
176 -- >>> import Text.Parsec ( parse )
177 --
178 -- >>> let (Right r) = parse subdomain "" "a"
179 -- >>> pretty_print $ subdomain_labels r
180 -- ["a"]
181 --
182 -- >>> let (Right r) = parse subdomain "" "example.com"
183 -- >>> pretty_print $ subdomain_labels r
184 -- ["example","com"]
185 --
186 -- >>> let (Right r) = parse subdomain "" "www.example.com"
187 -- >>> pretty_print $ subdomain_labels r
188 -- ["www","example","com"]
189 --
190 subdomain_labels :: Subdomain -> [Label]
191 subdomain_labels (SubdomainSingleLabel l) = [l]
192 subdomain_labels (SubdomainMultipleLabel l s) = l : (subdomain_labels s)
193
194
195 -- | Return a list of pairs of neighboring labels in a subdomain.
196 --
197 -- ==== _Examples_
198 --
199 -- >>> import Text.Parsec ( parse )
200 -- >>> let (Right r) = parse subdomain "" "www.example.com"
201 -- >>> pretty_print $ subdomain_label_neighbors r
202 -- ["(\"www\",\"example\")","(\"example\",\"com\")"]
203 --
204 subdomain_label_neighbors :: Subdomain -> [(Label,Label)]
205 subdomain_label_neighbors s =
206 zip ls (tail ls)
207 where
208 ls = subdomain_labels s
209
210
211 -- | Return @True@ if the subdomain has any two equal neighboring
212 -- labels, and @False@ otherwise.
213 --
214 -- ==== _Examples_
215 --
216 -- >>> import Text.Parsec ( parse )
217 --
218 -- >>> let (Right r) = parse subdomain "" "www.example.com"
219 -- >>> subdomain_has_equal_neighbors r
220 -- False
221 --
222 -- >>> let (Right l) = parse label "" "www"
223 -- >>> let (Right s) = parse subdomain "" "www.example.com"
224 -- >>> let bad_subdomain = SubdomainMultipleLabel l s
225 -- >>> subdomain_has_equal_neighbors bad_subdomain
226 -- True
227 --
228 subdomain_has_equal_neighbors :: Subdomain -> Bool
229 subdomain_has_equal_neighbors s =
230 or [ x == y | (x,y) <- subdomain_label_neighbors s ]