]> gitweb.michael.orlitzky.com - dead/harbl.git/blob - src/Domain.hs
Add the no-equal-neighbors restriction for labels.
[dead/harbl.git] / src / Domain.hs
1 -- | The 'Domain' data type and its parser. A 'Domain' represents a
2 -- name in the domain name system (DNS) as described by
3 -- RFC1035. In particular, we enforce the restrictions from Section
4 -- 2.3.1 \"Preferred name syntax\". See for example,
5 --
6 -- <https://tools.ietf.org/html/rfc1035#section-2.3.1>
7 --
8 -- We basically work with strings and characters everywhere, even
9 -- though this isn't really correct. The length specifications in
10 -- the RFCs are all in terms of octets, so really a ByteString.Char8
11 -- would be more appropriate. With strings, for example, we could
12 -- have a unicode mumbo jumbo character that takes up two bytes
13 -- (octets).
14 --
15 module Domain (
16 UserDomain,
17 user_domain )
18 where
19
20 import Data.Char ( toLower )
21 import Text.Parsec (
22 ParseError,
23 (<|>),
24 alphaNum,
25 char,
26 eof,
27 many1,
28 option,
29 optionMaybe,
30 parse,
31 string,
32 try,
33 unexpected )
34 import qualified Text.Parsec as Parsec ( digit, letter)
35 import Text.Parsec.String ( Parser )
36
37 import Pretty ( Pretty(..) )
38
39 -- * Digits
40
41 -- | A wrapper around a digit character.
42 --
43 newtype Digit = Digit Char deriving (Eq, Show)
44 instance Pretty Digit where pretty_show (Digit d) = [d]
45
46 -- | Parse a single digit, but wrap it in our 'Digit' type.
47 --
48 digit :: Parser Digit
49 digit = fmap Digit Parsec.digit
50
51
52 -- * Letters
53
54 -- | A wrapper around a letter character.
55 --
56 newtype Letter = Letter Char deriving (Show)
57 instance Pretty Letter where pretty_show (Letter l) = [l]
58
59
60 -- | Parse a single letter, but wrap it in our 'Letter' type.
61 --
62 letter :: Parser Letter
63 letter = fmap Letter Parsec.letter
64
65 -- | The derived instance of 'Eq' for letters is incorrect. All
66 -- comparisons should be made case-insensitively. The following
67 -- is an excerpt from RFC1035:
68 --
69 -- 2.3.3. Character Case
70 --
71 -- For all parts of the DNS that are part of the official
72 -- protocol, all comparisons between character strings (e.g.,
73 -- labels, domain names, etc.) are done in a case-insensitive
74 -- manner...
75 --
76 -- Since each part of DNS name is composed of our custom types, it
77 -- suffices to munge the equality for 'Letter'. RFC4343
78 -- <https://tools.ietf.org/html/rfc4343> clarifies the
79 -- case-insensitivity rules, but the fact that we're treating DNS
80 -- names as strings makes most of those problems go away (in
81 -- exchange for new ones).
82 --
83 instance Eq Letter where
84 (Letter l1) == (Letter l2) = (toLower l1) == (toLower l2)
85
86 -- * Letters/Digits
87
88 -- | A sum type representing either a letter or a digit.
89 --
90 data LetDig =
91 LetDigLetter Letter |
92 LetDigDigit Digit
93 deriving (Eq, Show)
94
95 instance Pretty LetDig where
96 pretty_show (LetDigLetter l) = pretty_show l
97 pretty_show (LetDigDigit d) = pretty_show d
98
99 -- | Parse a letter or a digit and wrap it in our 'LetDig' type.
100 --
101 let_dig :: Parser LetDig
102 let_dig = (fmap LetDigLetter letter) <|> (fmap LetDigDigit digit)
103
104
105 -- * Hyphens
106
107 -- | A wrapper around a single hyphen character.
108 --
109 newtype Hyphen = Hyphen Char deriving (Eq, Show)
110 instance Pretty Hyphen where pretty_show (Hyphen h) = [h]
111
112 -- | Parse a single hyphen and wrap it in our 'Hyphen' type.
113 --
114 hyphen :: Parser Hyphen
115 hyphen = fmap Hyphen (char '-')
116
117
118 -- * Letter, Digit, or Hyphen.
119
120 -- | A sum type representing a letter, digit, or hyphen.
121 --
122 data LetDigHyp =
123 LetDigHypLetDig LetDig |
124 LetDigHypHyphen Hyphen
125 deriving (Eq, Show)
126
127 instance Pretty LetDigHyp where
128 pretty_show (LetDigHypLetDig ld) = pretty_show ld
129 pretty_show (LetDigHypHyphen h) = pretty_show h
130
131
132 -- | The following is the simplest type in the domain grammar that
133 -- isn't already implemented for us.
134 --
135 -- <let-dig> ::= <letter> | <digit>
136 --
137 -- ==== _Examples_
138 --
139 -- >>> import Text.Parsec ( parseTest )
140 --
141 -- Letters, digits, and hyphens are all parsed:
142 --
143 -- >>> parseTest let_dig_hyp "a"
144 -- LetDigHypLetDig (LetDigLetter (Letter 'a'))
145 --
146 -- >>> parseTest let_dig_hyp "7"
147 -- LetDigHypLetDig (LetDigDigit (Digit '7'))
148 --
149 -- >>> parseTest let_dig_hyp "-"
150 -- LetDigHypHyphen (Hyphen '-')
151 --
152 -- However, an underscore (for example) is not:
153 --
154 -- >>> parseTest let_dig_hyp "_"
155 -- parse error at (line 1, column 1):
156 -- unexpected "_"
157 -- expecting letter, digit or "-"
158 --
159 let_dig_hyp :: Parser LetDigHyp
160 let_dig_hyp =
161 parse_letdig <|> parse_hyphen
162 where
163 parse_letdig :: Parser LetDigHyp
164 parse_letdig = fmap LetDigHypLetDig let_dig
165
166 parse_hyphen :: Parser LetDigHyp
167 parse_hyphen = fmap LetDigHypHyphen hyphen
168
169
170 -- * Letter/Digit/Hyphen strings
171
172 -- | A string of letters, digits, and hyphens from the RFC1035 grammar:
173 --
174 -- <ldh-str> ::= <let-dig-hyp> | <let-dig-hyp> <ldh-str>
175 --
176 -- These are represented as either a single instance of a
177 -- 'LetDigHyp', or a string of them (recursive).
178 --
179 data LdhStr =
180 LdhStrSingleLdh LetDigHyp |
181 LdhStrMultipleLdh LetDigHyp LdhStr
182 deriving (Eq, Show)
183
184 instance Pretty LdhStr where
185 pretty_show (LdhStrSingleLdh ldh) = pretty_show ldh
186 pretty_show (LdhStrMultipleLdh ldh s) = (pretty_show ldh) ++ (pretty_show s)
187
188 -- | Parse a string of letters, digits, and hyphens (an 'LdhStr').
189 --
190 -- ==== _Examples_
191 --
192 -- >>> import Text.Parsec ( parseTest )
193 --
194 -- Single letters, digits, and hyphens are parsed:
195 --
196 -- >>> parseTest ldh_str "a"
197 -- LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a')))
198 --
199 -- >>> parseTest ldh_str "0"
200 -- LdhStrSingleLdh (LetDigHypLetDig (LetDigDigit (Digit '0')))
201 --
202 -- >>> parseTest ldh_str "-"
203 -- LdhStrSingleLdh (LetDigHypHyphen (Hyphen '-'))
204 --
205 -- As well as strings of them:
206 --
207 -- >>> pretty_print $ parse ldh_str "" "a0-b"
208 -- a0-b
209 --
210 ldh_str :: Parser LdhStr
211 ldh_str = try both <|> just_one
212 where
213 both :: Parser LdhStr
214 both = do
215 ldh1 <- let_dig_hyp
216 ldh_tail <- ldh_str
217 return $ LdhStrMultipleLdh ldh1 ldh_tail
218
219 just_one :: Parser LdhStr
220 just_one = fmap LdhStrSingleLdh let_dig_hyp
221
222
223
224 -- | A version of 'last' that works on a 'LdhStr' rather than a
225 -- list. That is, it returns the last 'LetDigHyp' in the
226 -- string. Since 'LdhStr' contains at least one character, there's
227 -- no \"nil\" case here.
228 --
229 -- ==== _Examples_
230 --
231 -- >>> let (Right r) = parse ldh_str "" "a"
232 -- >>> last_ldh_str r
233 -- LetDigHypLetDig (LetDigLetter (Letter 'a'))
234 --
235 -- >>> let (Right r) = parse ldh_str "" "abc-def"
236 -- >>> last_ldh_str r
237 -- LetDigHypLetDig (LetDigLetter (Letter 'f'))
238 --
239 last_ldh_str :: LdhStr -> LetDigHyp
240 last_ldh_str (LdhStrSingleLdh x) = x
241 last_ldh_str (LdhStrMultipleLdh _ x) = last_ldh_str x
242
243
244 -- | A version of 'init' that works on a 'LdhStr' rather than a
245 -- list. That is, it returns everything /except/ the last character in
246 -- the string.
247 --
248 -- Since an 'LdhStr' must contain at least one character, this might
249 -- not be opssible (when the input is of length one). So, we return
250 -- a 'Maybe' value.
251 --
252 -- ==== _Examples_
253 --
254 -- >>> let (Right r) = parse ldh_str "" "a"
255 -- >>> init_ldh_str r
256 -- Nothing
257 --
258 -- >>> let (Right r) = parse ldh_str "" "ab"
259 -- >>> init_ldh_str r
260 -- Just (LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a'))))
261 --
262 -- >>> let (Right r) = parse ldh_str "" "abc-def"
263 -- >>> init_ldh_str r
264 -- Just (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a'))) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'b'))) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'c'))) (LdhStrMultipleLdh (LetDigHypHyphen (Hyphen '-')) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'd'))) (LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'e')))))))))
265 --
266 init_ldh_str :: LdhStr -> Maybe LdhStr
267 init_ldh_str (LdhStrSingleLdh _) = Nothing
268 init_ldh_str (LdhStrMultipleLdh h t) =
269 Just $ case (init_ldh_str t) of
270 -- We just got the second-to-last character, we're done.
271 Nothing -> LdhStrSingleLdh h
272
273 -- There's still more stuff. Recurse.
274 Just rest -> LdhStrMultipleLdh h rest
275
276
277 -- | Compute the length of an 'LdhStr'. It will be at least one, since
278 -- 'LdhStr's are non-empty. And if there's something other than the
279 -- first character present, we simply recurse.
280 --
281 -- ==== _Examples_
282 --
283 -- >>> let (Right r) = parse ldh_str "" "a"
284 -- >>> length_ldh_str r
285 -- 1
286 --
287 -- >>> let (Right r) = parse ldh_str "" "abc-def"
288 -- >>> length_ldh_str r
289 -- 7
290 --
291 length_ldh_str :: LdhStr -> Int
292 length_ldh_str (LdhStrSingleLdh _) = 1
293 length_ldh_str (LdhStrMultipleLdh _ t) = 1 + (length_ldh_str t)
294
295 -- * Letter/Digit/Hyphen string followed by a trailing Letter/Digit
296
297 -- | This type isn't explicitly part of the grammar, but it's what
298 -- shows up in the square brackets of,
299 --
300 -- <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
301 --
302 -- The ldh-str is optional, but if one is present, we must also have
303 -- a trailing let-dig to prevent the name from ending with a
304 -- hyphen. This can be represented with a @Maybe LdhStrLetDig@,
305 -- which is why we're about to define it.
306 --
307 data LdhStrLetDig = LdhStrLetDig (Maybe LdhStr) LetDig
308 deriving (Eq, Show)
309
310 instance Pretty LdhStrLetDig where
311 pretty_show (LdhStrLetDig Nothing ld) = pretty_show ld
312 pretty_show (LdhStrLetDig (Just s) ld) = (pretty_show s) ++ (pretty_show ld)
313
314 -- | Parse an 'LdhStrLetDig'. This isn't in the grammar, but we might
315 -- as well define the parser for it independently since we gave it
316 -- its own data type.
317 --
318 -- ==== _Examples_
319 --
320 -- >>> import Text.Parsec ( parseTest )
321 --
322 -- Make sure we can parse a single character:
323 --
324 -- >>> parseTest ldh_str_let_dig "a"
325 -- LdhStrLetDig Nothing (LetDigLetter (Letter 'a'))
326 --
327 -- And longer strings:
328 --
329 -- >>> pretty_print $ parse ldh_str_let_dig "" "ab"
330 -- ab
331 --
332 -- >>> pretty_print $ parse ldh_str_let_dig "" "-b"
333 -- -b
334 --
335 -- >>> parseTest ldh_str_let_dig "b-"
336 -- parse error at (line 1, column 3):
337 -- label cannot end with a hyphen
338 --
339 ldh_str_let_dig :: Parser LdhStrLetDig
340 ldh_str_let_dig = do
341 -- This will happily eat up the trailing let-dig...
342 full_ldh <- ldh_str
343
344 -- So we have to go back and see what happened.
345 case (last_ldh_str full_ldh) of
346 (LetDigHypHyphen _) -> fail "label cannot end with a hyphen"
347 (LetDigHypLetDig ld) ->
348 -- Ok, the label didn't end with a hyphen; now we need to split
349 -- off the last letter/digit so we can pack it into our return
350 -- type separately.
351 return $ case (init_ldh_str full_ldh) of
352 -- We only parsed one letter/digit. This can happen
353 -- if the label contains two characters. For example,
354 -- if we try to parse the label "ab", then the "a"
355 -- will be eaten by the label parser, and this
356 -- function will be left with only "b".
357 Nothing -> LdhStrLetDig Nothing ld
358
359 -- Usual case: there's was some leading let-dig-hyp junk,
360 -- return it too.
361 leading_ldhs -> LdhStrLetDig leading_ldhs ld
362
363
364
365 -- | Compute the length of a 'LdhStrLetDig'. It's at least one, since
366 -- the let-dig at the end is always there. And when there's an
367 -- ldh-str too, we add its length to one.
368 --
369 -- ==== _Examples_
370 --
371 -- >>> let (Right r) = parse ldh_str_let_dig "" "a"
372 -- >>> length_ldh_str_let_dig r
373 -- 1
374 --
375 -- >>> let (Right r) = parse ldh_str_let_dig "" "abc-def"
376 -- >>> length_ldh_str_let_dig r
377 -- 7
378 --
379 length_ldh_str_let_dig :: LdhStrLetDig -> Int
380 length_ldh_str_let_dig (LdhStrLetDig Nothing _) = 1
381 length_ldh_str_let_dig (LdhStrLetDig (Just ldhstring) _) =
382 1 + (length_ldh_str ldhstring)
383
384
385 -- * Labels
386
387 -- | The label type from the RFC1035 grammar:
388 --
389 -- <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
390 --
391 -- We allow the slightly more general syntax from RFC1123, Section 2.1:
392 --
393 -- The syntax of a legal Internet host name was specified in RFC-952
394 -- [DNS:4]. One aspect of host name syntax is hereby changed: the
395 -- restriction on the first character is relaxed to allow either a
396 -- letter or a digit. Host software MUST support this more liberal
397 -- syntax.
398 --
399 data Label = Label Letter (Maybe LdhStrLetDig)
400 deriving (Eq, Show)
401
402 instance Pretty Label where
403 pretty_show (Label l Nothing) = pretty_show l
404 pretty_show (Label l (Just s)) = (pretty_show l) ++ (pretty_show s)
405
406 -- | Parse a 'Label'.
407 --
408 -- In addition to the grammar, there's another restriction on
409 -- labels: their length must be 63 characters or less. Quoting
410 -- Section 2.3.1, \"Preferred name syntax\", of RFC1035:
411 --
412 -- The labels must follow the rules for ARPANET host names. They
413 -- must start with a letter, end with a letter or digit, and have
414 -- as interior characters only letters, digits, and hyphen. There
415 -- are also some restrictions on the length. Labels must be 63
416 -- characters or less.
417 --
418 -- We check this only after we have successfully parsed a label.
419 --
420 -- ==== _Examples_
421 --
422 -- >>> import Text.Parsec ( parseTest )
423 --
424 -- Make sure we can parse a single character:
425 --
426 -- >>> parseTest label "a"
427 -- Label (Letter 'a') Nothing
428 --
429 -- And longer strings:
430 --
431 -- >>> pretty_print $ parse label "" "abc-def"
432 -- abc-def
433 --
434 -- But not anything ending in a hyphen:
435 --
436 -- >>> parseTest label "abc-"
437 -- parse error at (line 1, column 5):
438 -- label cannot end with a hyphen
439 --
440 -- Or anything over 63 characters:
441 --
442 -- >>> parseTest label (['a'..'z'] ++ ['a'..'z'] ++ ['a'..'z'])
443 -- parse error at (line 1, column 79):
444 -- labels must be 63 or fewer characters
445 --
446 -- However, /exactly/ 63 characters is acceptable:
447 --
448 -- >>> pretty_print $ parse label "" (replicate 63 'x')
449 -- xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
450 --
451 label :: Parser Label
452 label = do
453 l <- letter -- Guaranteed to be there
454 maybe_s <- optionMaybe ldh_str_let_dig -- Might not be there
455 case maybe_s of
456 -- It can only be one character long, from the letter...
457 Nothing -> return $ Label l maybe_s
458
459 -- The letter gives us one character, so we check that the rest is
460 -- less than 62 characters long. But in the error message we need
461 -- to report 63.
462 Just s -> if (length_ldh_str_let_dig s) <= 62
463 then return $ Label l maybe_s
464 else fail "labels must be 63 or fewer characters"
465
466
467
468 -- * Subdomains
469
470
471 -- | The data type representing a \"subdomain\" from RFC1035:
472 --
473 -- <subdomain> ::= <label> | <subdomain> "." <label>
474 --
475 -- We have reversed the order of the subdomain and label in the
476 -- second option, however. This is explained in 'subdomain'.
477 --
478 data Subdomain =
479 SubdomainSingleLabel Label |
480 SubdomainMultipleLabel Label Subdomain
481 deriving (Eq, Show)
482
483
484
485 instance Pretty Subdomain where
486 pretty_show (SubdomainSingleLabel l) = pretty_show l
487 pretty_show (SubdomainMultipleLabel l s) =
488 (pretty_show l) ++ "." ++ (pretty_show s)
489
490 -- | Parse an RFC1035 \"subdomain\". The given grammar is,
491 --
492 -- <subdomain> ::= <label> | <subdomain> "." <label>
493 --
494 -- However, we have reversed the order of the subdomain and label to
495 -- prevent infinite recursion. The second option (subdomain + label)
496 -- is obviously more specific, we we need to try it first. This
497 -- presents a problem: we're trying to parse a subdomain in terms of
498 -- a subdomain! The given grammar represents subdomains how we like
499 -- to think of them; from right to left. But it's better to parse
500 -- from left to right, so we pick off the leading label and then
501 -- recurse into the definition of subdomain.
502 --
503 -- According to RFC1034, Section 3.1, two neighboring labels in a
504 -- DNS name cannot be equal:
505 --
506 -- Each node has a label, which is zero to 63 octets in length. Brother
507 -- nodes may not have the same label, although the same label can be used
508 -- for nodes which are not brothers. One label is reserved, and that is
509 -- the null (i.e., zero length) label used for the root.
510 --
511 -- We enforce this restriction, but the result is usually that we
512 -- only parse the part of the subdomain leading up to the repeated
513 -- label.
514 --
515 -- ==== _Examples_
516 --
517 -- >>> import Text.Parsec ( parseTest )
518 --
519 -- Make sure we can parse a single character:
520 --
521 -- >>> parseTest subdomain "a"
522 -- SubdomainSingleLabel (Label (Letter 'a') Nothing)
523 --
524 -- >>> pretty_print $ parse subdomain "" "example.com"
525 -- example.com
526 --
527 -- >>> pretty_print $ parse subdomain "" "www.example.com"
528 -- www.example.com
529 --
530 -- We reject a subdomain with equal neighbors, but this leads to
531 -- only the single first label being parsed instead:
532 --
533 -- >>> pretty_print $ parse subdomain "" "www.www.example.com"
534 -- www
535 --
536 -- But not one with a repeated but non-neighboring label:
537 --
538 -- >>> pretty_print $ parse subdomain "" "www.example.www.com"
539 -- www.example.www.com
540 --
541 subdomain :: Parser Subdomain
542 subdomain = try both <|> just_one
543 where
544 both :: Parser Subdomain
545 both = do
546 l <- label
547 char '.'
548 s <- subdomain
549 let result = SubdomainMultipleLabel l s
550 if (subdomain_has_equal_neighbors result)
551 then fail "subdomain cannot have equal neighboring labels"
552 else return result
553
554 just_one :: Parser Subdomain
555 just_one = fmap SubdomainSingleLabel label
556
557
558
559 -- | Retrieve a list of labels contained in a 'Subdomain'.
560 --
561 -- ==== _Examples_
562 --
563 -- >>> let (Right r) = parse subdomain "" "a"
564 -- >>> pretty_print $ subdomain_labels r
565 -- ["a"]
566 --
567 -- >>> let (Right r) = parse subdomain "" "example.com"
568 -- >>> pretty_print $ subdomain_labels r
569 -- ["example","com"]
570 --
571 -- >>> let (Right r) = parse subdomain "" "www.example.com"
572 -- >>> pretty_print $ subdomain_labels r
573 -- ["www","example","com"]
574 --
575 subdomain_labels :: Subdomain -> [Label]
576 subdomain_labels (SubdomainSingleLabel l) = [l]
577 subdomain_labels (SubdomainMultipleLabel l s) = l : (subdomain_labels s)
578
579
580 -- | Return a list of pairs of neighboring labels in a subdomain.
581 --
582 -- ==== _Examples_
583 --
584 -- >>> let (Right r) = parse subdomain "" "www.example.com"
585 -- >>> pretty_print $ subdomain_label_neighbors r
586 -- ["(\"www\",\"example\")","(\"example\",\"com\")"]
587 --
588 subdomain_label_neighbors :: Subdomain -> [(Label,Label)]
589 subdomain_label_neighbors s =
590 zip ls (tail ls)
591 where
592 ls = subdomain_labels s
593
594
595 -- | Return @True@ if the subdomain has any two equal neighboring
596 -- labels, and @False@ otherwise.
597 --
598 -- ==== _Examples_
599 --
600 -- >>> let (Right r) = parse subdomain "" "www.example.com"
601 -- >>> subdomain_has_equal_neighbors r
602 -- False
603 --
604 -- >>> let (Right l) = parse label "" "www"
605 -- >>> let (Right s) = parse subdomain "" "www.example.com"
606 -- >>> let bad_subdomain = SubdomainMultipleLabel l s
607 -- >>> subdomain_has_equal_neighbors bad_subdomain
608 -- True
609 --
610 subdomain_has_equal_neighbors :: Subdomain -> Bool
611 subdomain_has_equal_neighbors s =
612 or [ x == y | (x,y) <- subdomain_label_neighbors s ]
613
614
615
616 -- * Domains
617
618 -- | An RFC1035 domain. According to RFC1035 a domain can be either a
619 -- subdomain or \" \", which according to RFC2181
620 -- <https://tools.ietf.org/html/rfc2181#section-11> means the root:
621 --
622 -- The zero length full name is defined as representing the root
623 -- of the DNS tree, and is typically written and displayed as
624 -- \".\".
625 --
626 -- We let the 'Domain' type remain true to those RFCs, even though
627 -- they don't support an absolute domain name of e.g. a single dot.
628 -- We have one more data type 'UserDomain' which handles the possibility
629 -- of an absolute path.
630 --
631 data Domain =
632 DomainName Subdomain |
633 DomainRoot
634 deriving (Eq, Show)
635
636 instance Pretty Domain where
637 pretty_show DomainRoot = ""
638 pretty_show (DomainName s) = pretty_show s
639
640 -- | Parse an RFC1035 \"domain\"
641 --
642 -- ==== _Examples_
643 --
644 -- >>> import Text.Parsec ( parseTest )
645 --
646 -- Make sure we can parse a single character:
647 --
648 -- >>> parseTest domain "a"
649 -- DomainName (SubdomainSingleLabel (Label (Letter 'a') Nothing))
650 --
651 -- And the empty domain:
652 --
653 -- >>> parseTest domain ""
654 -- DomainRoot
655 --
656 -- We will in fact parse the \"empty\" domain off the front of
657 -- pretty much anything:
658 --
659 -- >>> parseTest domain "8===D"
660 -- DomainRoot
661 --
662 -- Equality of domains is case-insensitive:
663 --
664 -- >>> let (Right r1) = parse domain "" "example.com"
665 -- >>> let (Right r2) = parse domain "" "ExaMPle.coM"
666 -- >>> r1 == r2
667 -- True
668 --
669 -- A single dot IS parsed as the root, but the dot isn't consumed:
670 --
671 -- >>> parseTest domain "."
672 -- DomainRoot
673 --
674 -- Anything over 255 characters is an error, so the root will be
675 -- parsed:
676 --
677 -- >>> let big_l1 = replicate 63 'x'
678 -- >>> let big_l2 = replicate 63 'y' -- Avoid equal neighboring labels!
679 -- >>> let big_labels = big_l1 ++ "." ++ big_l2 ++ "."
680 -- >>> let big_subdomain = concat $ replicate 3 big_labels
681 -- >>> parseTest domain big_subdomain
682 -- DomainRoot
683 --
684 -- But exactly 255 is allowed:
685 --
686 -- >>> import Data.List ( intercalate )
687 -- >>> let l1 = replicate 63 'w'
688 -- >>> let l2 = replicate 63 'x'
689 -- >>> let l3 = replicate 63 'y'
690 -- >>> let l4 = replicate 63 'z'
691 -- >>> let big_subdomain = intercalate "." [l1,l2,l3,l4]
692 -- >>> let (Right r) = parse domain "" big_subdomain
693 -- >>> length (pretty_show r)
694 -- 255
695 --
696 domain :: Parser Domain
697 domain = try parse_subdomain <|> parse_empty
698 where
699 parse_subdomain :: Parser Domain
700 parse_subdomain = do
701 s <- subdomain
702 if (length $ pretty_show s) <= 255
703 then return $ DomainName s
704 else fail "subdomains can be at most 255 characters"
705
706 parse_empty :: Parser Domain
707 parse_empty = string "" >> return DomainRoot
708
709
710
711 -- * User domains
712
713 -- | This type helps clarify some murkiness in the DNS \"domain\" name
714 -- specification. In RFC1034, it is acknowledged that a domain name
715 -- input with a trailing \".\" will represent an absolute domain
716 -- name (i.e. with respect to the DNS root). However, the grammar in
717 -- RFC1035 disallows a trailing dot.
718 --
719 -- This makes some sense: within the DNS, everything knows its
720 -- position in the tree. The relative/absolute distinction only
721 -- makes sense on the client side, where a user's resolver might
722 -- decide to append some suffix to a relative
723 -- request. Unfortunately, that's where we live. So we have to deal
724 -- with the possibility of having a trailing dot at the end of any
725 -- domain name.
726 --
727 data UserDomain =
728 UserDomainRelative Domain |
729 UserDomainAbsolute Domain
730 deriving (Eq, Show)
731
732 instance Pretty UserDomain where
733 pretty_show (UserDomainRelative d) = pretty_show d
734 pretty_show (UserDomainAbsolute d) = (pretty_show d) ++ "."
735
736
737 -- | Parse a 'UserDomain'. This is what we'll be using to read user
738 -- input, since it supports both relative and absolute domain names
739 -- (unlike the implicitly-absolute 'Domain').
740 --
741 -- ==== _Examples_
742 --
743 -- >>> import Text.Parsec ( parseTest )
744 --
745 -- We can really parse the root now!
746 --
747 -- >>> parseTest user_domain "."
748 -- UserDomainAbsolute DomainRoot
749 --
750 -- But multiple dots aren't (only the first):
751 --
752 -- >>> pretty_print $ parse user_domain "" ".."
753 -- .
754 --
755 -- We can also optionally have a trailing dot at the end of a
756 -- non-empty name:
757 --
758 -- >>> pretty_print $ parse user_domain "" "www.example.com"
759 -- www.example.com
760 --
761 -- >>> pretty_print $ parse user_domain "" "www.example.com."
762 -- www.example.com.
763 --
764 -- A \"relative root\" can also be parsed, letting the user's
765 -- resolver deal with it:
766 --
767 -- >>> parseTest user_domain ""
768 -- UserDomainRelative DomainRoot
769 --
770 user_domain :: Parser UserDomain
771 user_domain = try absolute <|> relative
772 where
773 absolute :: Parser UserDomain
774 absolute = do
775 d <- domain
776 r <- char '.'
777 return $ UserDomainAbsolute d
778
779 relative :: Parser UserDomain
780 relative = fmap UserDomainRelative domain