]> gitweb.michael.orlitzky.com - dead/harbl.git/blob - src/Domain.hs
Add the Domain module, work in progress.
[dead/harbl.git] / src / Domain.hs
1 -- | The 'Domain' data type and its parser. A 'Domain' represents a
2 -- name in the domain name system (DNS) as described by
3 -- RFC1035. In particular, we enforce the restrictions from Section
4 -- 2.3.1 \"Preferred name syntax\". See for example,
5 --
6 -- <https://tools.ietf.org/html/rfc1035#section-2.3.1>
7 --
8 module Domain
9 where
10
11 import Data.Char ( toLower )
12 import Text.Parsec (
13 ParseError,
14 (<|>),
15 alphaNum,
16 char,
17 eof,
18 many1,
19 option,
20 optionMaybe,
21 parse,
22 string,
23 try,
24 unexpected )
25 import qualified Text.Parsec as Parsec ( digit, letter)
26 import Text.Parsec.String ( Parser )
27
28 newtype Domain = Domain String deriving Show
29
30
31 -- | The derived instance of 'Eq' for domain names is incorrect. All
32 -- comparisons are currently made case-insensitively. The following is
33 -- an excerpt from RFC1035:
34 --
35 -- 2.3.3. Character Case
36 --
37 -- For all parts of the DNS that are part of the official
38 -- protocol, all comparisons between character strings (e.g.,
39 -- labels, domain names, etc.) are done in a case-insensitive
40 -- manner...
41 --
42 -- So to compare two DNS names, we compare their lower-case
43 -- counterparts.
44 --
45 instance Eq Domain where
46 (Domain d1) == (Domain d2) =
47 (map toLower d1) == (map toLower d2)
48
49
50 -- * Digits
51 newtype Digit = Digit Char deriving (Eq, Show)
52
53 digit :: Parser Digit
54 digit = fmap Digit Parsec.digit
55
56
57 -- * Letters
58 newtype Letter = Letter Char deriving (Eq, Show)
59
60 letter :: Parser Letter
61 letter = fmap Letter Parsec.letter
62
63
64 -- * Letters/Digits
65 data LetDig = LetDigLetter Letter | LetDigDigit Digit deriving (Eq, Show)
66
67 let_dig :: Parser LetDig
68 let_dig = (fmap LetDigLetter letter) <|> (fmap LetDigDigit digit)
69
70
71 -- * Hyphens
72 newtype Hyphen = Hyphen Char deriving (Eq, Show)
73
74 hyphen :: Parser Hyphen
75 hyphen = fmap Hyphen (char '-')
76
77
78 -- * Letter, Digit, or Hyphen.
79
80 data LetDigHyp = LetDigHypLetDig LetDig
81 | LetDigHypHyphen Hyphen
82 deriving (Eq, Show)
83
84
85 -- | The following is the simplest type in the domain grammar that
86 -- isn't already implemented for us.
87 --
88 -- <let-dig> ::= <letter> | <digit>
89 --
90 -- ==== _Examples_
91 --
92 -- >>> import Text.Parsec ( parseTest )
93 --
94 -- Letters, digits, and hyphens are all parsed:
95 --
96 -- >>> parseTest let_dig_hyp "a"
97 -- LetDigHypLetDig (LetDigLetter (Letter 'a'))
98 --
99 -- >>> parseTest let_dig_hyp "7"
100 -- LetDigHypLetDig (LetDigDigit (Digit '7'))
101 --
102 -- >>> parseTest let_dig_hyp "-"
103 -- LetDigHypHyphen (Hyphen '-')
104 --
105 -- However, an underscore (for example) is not:
106 --
107 -- >>> parseTest let_dig_hyp "_"
108 -- parse error at (line 1, column 1):
109 -- unexpected "_"
110 -- expecting letter, digit or "-"
111 --
112 let_dig_hyp :: Parser LetDigHyp
113 let_dig_hyp =
114 parse_letdig <|> parse_hyphen
115 where
116 parse_letdig :: Parser LetDigHyp
117 parse_letdig = fmap LetDigHypLetDig let_dig
118
119 parse_hyphen :: Parser LetDigHyp
120 parse_hyphen = fmap LetDigHypHyphen hyphen
121
122
123 -- * Letter/Digit/Hyphen strings
124
125 -- | A string of letters, digits, and hyphens from the RFC1035 grammar:
126 --
127 -- <ldh-str> ::= <let-dig-hyp> | <let-dig-hyp> <ldh-str>
128 --
129 -- These are represented as either a single instance of a
130 -- 'LetDigHyp', or a string of them (recursive).
131 --
132 -- ==== _Examples_
133 --
134 -- >>> import Text.Parsec ( parseTest )
135 --
136 -- Single letters, digits, and hyphens are parsed:
137 --
138 -- >>> parseTest ldh_str "a"
139 -- LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a')))
140 --
141 -- >>> parseTest ldh_str "0"
142 -- LdhStrSingleLdh (LetDigHypLetDig (LetDigDigit (Digit '0')))
143 --
144 -- >>> parseTest ldh_str "-"
145 -- LdhStrSingleLdh (LetDigHypHyphen (Hyphen '-'))
146 --
147 -- As well as strings of them:
148 --
149 -- LdhStr (LetDigHypLetter (Letter 'a')) (LdhStr (LetDigHypDigit (Digit '0')) (LdhStr (LetDigHypHyphen (Hyphen '-')) (LdhStrSingleLdh (LetDigHypLetter (Letter 'b')))))
150 --
151 data LdhStr =
152 LdhStrSingleLdh LetDigHyp | LdhStrMultipleLdh LetDigHyp LdhStr
153 deriving (Eq, Show)
154
155 ldh_str :: Parser LdhStr
156 ldh_str = try both <|> just_one
157 where
158 both :: Parser LdhStr
159 both = do
160 ldh1 <- let_dig_hyp
161 ldh_tail <- ldh_str
162 return $ LdhStrMultipleLdh ldh1 ldh_tail
163
164 just_one :: Parser LdhStr
165 just_one = fmap LdhStrSingleLdh let_dig_hyp
166
167
168
169 -- | A version of 'last' that works on a 'LdhStr' rather than a
170 -- list. That is, it returns the last 'LetDigHyp' in the
171 -- string. Since 'LdhStr' contains at least one character, there's
172 -- no \"nil\" case here.
173 --
174 -- ==== _Examples_
175 --
176 -- >>> let (Right r) = parse ldh_str "" "a"
177 -- >>> last_ldh_str r
178 -- LetDigHypLetDig (LetDigLetter (Letter 'a'))
179 --
180 -- >>> let (Right r) = parse ldh_str "" "abc-def"
181 -- >>> last_ldh_str r
182 -- LetDigHypLetDig (LetDigLetter (Letter 'f'))
183 --
184 last_ldh_str :: LdhStr -> LetDigHyp
185 last_ldh_str (LdhStrSingleLdh x) = x
186 last_ldh_str (LdhStrMultipleLdh _ x) = last_ldh_str x
187
188
189 -- | A version of 'init' that works on a 'LdhStr' rather than a
190 -- list. That is, it returns everything /except/ the last character in
191 -- the string.
192 --
193 -- Since an 'LdhStr' must contain at least one character, this might
194 -- not be opssible (when the input is of length one). So, we return
195 -- a 'Maybe' value.
196 --
197 -- ==== _Examples_
198 --
199 -- >>> let (Right r) = parse ldh_str "" "a"
200 -- >>> init_ldh_str r
201 -- Nothing
202 --
203 -- >>> let (Right r) = parse ldh_str "" "ab"
204 -- >>> init_ldh_str r
205 -- Just (LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a'))))
206 --
207 -- >>> let (Right r) = parse ldh_str "" "abc-def"
208 -- >>> init_ldh_str r
209 -- Just (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a'))) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'b'))) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'c'))) (LdhStrMultipleLdh (LetDigHypHyphen (Hyphen '-')) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'd'))) (LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'e')))))))))
210 --
211 init_ldh_str :: LdhStr -> Maybe LdhStr
212 init_ldh_str (LdhStrSingleLdh _) = Nothing
213 init_ldh_str (LdhStrMultipleLdh h t) =
214 Just $ case (init_ldh_str t) of
215 -- We just got the second-to-last character, we're done.
216 Nothing -> LdhStrSingleLdh h
217
218 -- There's still more stuff. Recurse.
219 Just rest -> LdhStrMultipleLdh h rest
220
221
222 -- | Compute the length of an 'LdhStr'. It will be at least one, since
223 -- 'LdhStr's are non-empty. And if there's something other than the
224 -- first character present, we simply recurse.
225 --
226 -- ==== _Examples_
227 --
228 -- >>> let (Right r) = parse ldh_str "" "a"
229 -- >>> length_ldh_str r
230 -- 1
231 --
232 -- >>> let (Right r) = parse ldh_str "" "abc-def"
233 -- >>> length_ldh_str r
234 -- 7
235 --
236 length_ldh_str :: LdhStr -> Int
237 length_ldh_str (LdhStrSingleLdh _) = 1
238 length_ldh_str (LdhStrMultipleLdh _ t) = 1 + (length_ldh_str t)
239
240 -- * Letter/Digit/Hyphen string followed by a trailing Letter/Digit
241
242 -- | This type isn't explicitly part of the grammar, but it's what
243 -- shows up in the square brackets of,
244 --
245 -- <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
246 --
247 -- The ldh-str is optional, but if one is present, we must also have
248 -- a trailing let-dig to prevent the name from ending with a
249 -- hyphen. This can be represented with a @Maybe LdhStrLetDig@,
250 -- which is why we're about to define it.
251 --
252 data LdhStrLetDig = LdhStrLetDig (Maybe LdhStr) LetDig
253 deriving (Eq, Show)
254
255 -- | Parse an 'LdhStrLetDig'. This isn't in the grammar, but we might
256 -- as well define the parser for it independently since we gave it
257 -- its own data type.
258 --
259 -- ==== _Examples_
260 --
261 -- >>> import Text.Parsec ( parseTest )
262 --
263 -- Make sure we can parse a single character:
264 --
265 -- >>> parseTest ldh_str_let_dig "a"
266 -- LdhStrLetDig Nothing (LetDigLetter (Letter 'a'))
267 --
268 -- And longer strings:
269 --
270 -- >>> parseTest ldh_str_let_dig "ab"
271 -- LdhStrLetDig (Just (LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a'))))) (LetDigLetter (Letter 'b'))
272 --
273 -- >>> parseTest ldh_str_let_dig "-b"
274 -- LdhStrLetDig (Just (LdhStrSingleLdh (LetDigHypHyphen (Hyphen '-')))) (LetDigLetter (Letter 'b'))
275 --
276 -- >>> parseTest ldh_str_let_dig "b-"
277 -- parse error at (line 1, column 3):
278 -- label cannot end with a hyphen
279 --
280 ldh_str_let_dig :: Parser LdhStrLetDig
281 ldh_str_let_dig = do
282 -- This will happily eat up the trailing let-dig...
283 full_ldh <- ldh_str
284
285 -- So we have to go back and see what happened.
286 case (last_ldh_str full_ldh) of
287 (LetDigHypHyphen _) -> fail "label cannot end with a hyphen"
288 (LetDigHypLetDig ld) ->
289 -- Ok, the label didn't end with a hyphen; now we need to split
290 -- off the last letter/digit so we can pack it into our return
291 -- type separately.
292 return $ case (init_ldh_str full_ldh) of
293 -- We only parsed one letter/digit. This can happen
294 -- if the label contains two characters. For example,
295 -- if we try to parse the label "ab", then the "a"
296 -- will be eaten by the label parser, and this
297 -- function will be left with only "b".
298 Nothing -> LdhStrLetDig Nothing ld
299
300 -- Usual case: there's was some leading let-dig-hyp junk,
301 -- return it too.
302 leading_ldhs -> LdhStrLetDig leading_ldhs ld
303
304
305
306 -- | Compute the length of a 'LdhStrLetDig'. It's at least one, since
307 -- the let-dig at the end is always there. And when there's an
308 -- ldh-str too, we add its length to one.
309 --
310 -- ==== _Examples_
311 --
312 -- >>> let (Right r) = parse ldh_str_let_dig "" "a"
313 -- >>> length_ldh_str_let_dig r
314 -- 1
315 --
316 -- >>> let (Right r) = parse ldh_str_let_dig "" "abc-def"
317 -- >>> length_ldh_str_let_dig r
318 -- 7
319 --
320 length_ldh_str_let_dig :: LdhStrLetDig -> Int
321 length_ldh_str_let_dig (LdhStrLetDig Nothing _) = 1
322 length_ldh_str_let_dig (LdhStrLetDig (Just ldhstring) _) =
323 1 + (length_ldh_str ldhstring)
324
325
326 -- * Labels
327
328 -- | The label type from the RFC1035 grammar:
329 --
330 -- <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
331 --
332 data Label = Label Letter (Maybe LdhStrLetDig)
333 deriving (Eq, Show)
334
335
336 -- | Parse a 'Label'.
337 --
338 -- In addition to the grammar, there's another restriction on
339 -- labels: their length must be 63 characters or less. Quoting
340 -- Section 2.3.1, \"Preferred name syntax\", of RFC1035:
341 --
342 -- The labels must follow the rules for ARPANET host names. They
343 -- must start with a letter, end with a letter or digit, and have
344 -- as interior characters only letters, digits, and hyphen. There
345 -- are also some restrictions on the length. Labels must be 63
346 -- characters or less.
347 --
348 -- We check this only after we have successfully parsed a label.
349 --
350 -- ==== _Examples_
351 --
352 -- >>> import Text.Parsec ( parseTest )
353 --
354 -- Make sure we can parse a single character:
355 --
356 -- >>> parseTest label "a"
357 -- Label (Letter 'a') Nothing
358 --
359 -- And longer strings:
360 --
361 -- >>> parseTest label "abc-def"
362 -- Label (Letter 'a') (Just (LdhStrLetDig (Just (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'b'))) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'c'))) (LdhStrMultipleLdh (LetDigHypHyphen (Hyphen '-')) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'd'))) (LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'e'))))))))) (LetDigLetter (Letter 'f'))))
363 --
364 -- But not anything ending in a hyphen:
365 --
366 -- >>> parseTest label "abc-"
367 -- parse error at (line 1, column 5):
368 -- label cannot end with a hyphen
369 --
370 -- Or anything over 63 characters:
371 --
372 -- >>> parseTest label (['a'..'z'] ++ ['a'..'z'] ++ ['a'..'z'])
373 -- parse error at (line 1, column 79):
374 -- labels must be 63 or fewer characters
375 --
376 -- However, /exactly/ 63 characters is acceptable:
377 --
378 -- TODO
379 --
380 label :: Parser Label
381 label = do
382 l <- letter -- Guaranteed to be there
383 maybe_s <- optionMaybe ldh_str_let_dig -- Might not be there
384 case maybe_s of
385 -- It can only be one character long, from the letter...
386 Nothing -> return $ Label l maybe_s
387
388 -- The letter gives us one character, so we check that the rest is
389 -- less than 62 characters long. But in the error message we need
390 -- to report 63.
391 Just s -> if (length_ldh_str_let_dig s) <= 62
392 then return $ Label l maybe_s
393 else fail "labels must be 63 or fewer characters"
394
395
396 domain :: Parser Domain
397 domain = undefined
398
399 --subdomain :: Parser Subdomain
400 --subdomain = undefined