]> gitweb.michael.orlitzky.com - dead/harbl.git/blob - src/Network/DNS/RBL/Domain.hs
Add a .ghci file.
[dead/harbl.git] / src / Network / DNS / RBL / Domain.hs
1 {-# LANGUAGE DoAndIfThenElse #-}
2
3 -- | The 'Domain' data type and its parser. A 'Domain' represents a
4 -- name in the domain name system (DNS) as described by
5 -- RFC1035. In particular, we enforce the restrictions from Section
6 -- 2.3.1 \"Preferred name syntax\". See for example,
7 --
8 -- <https://tools.ietf.org/html/rfc1035#section-2.3.1>
9 --
10 -- We basically work with strings and characters everywhere, even
11 -- though this isn't really correct. The length specifications in
12 -- the RFCs are all in terms of octets, so really a ByteString.Char8
13 -- would be more appropriate. With strings, for example, we could
14 -- have a unicode mumbo jumbo character that takes up two bytes
15 -- (octets).
16 --
17 module Network.DNS.RBL.Domain (
18 UserDomain,
19 user_domain )
20 where
21
22 import Data.Char ( toLower )
23 import Text.Parsec (
24 (<|>),
25 char,
26 optionMaybe,
27 string,
28 try )
29 import qualified Text.Parsec as Parsec ( digit, letter)
30 import Text.Parsec.String ( Parser )
31
32 import Network.DNS.RBL.Pretty ( Pretty(..) )
33
34 -- * Digits
35
36 -- | A wrapper around a digit character.
37 --
38 newtype Digit = Digit Char deriving (Eq, Show)
39 instance Pretty Digit where pretty_show (Digit d) = [d]
40
41 -- | Parse a single digit, but wrap it in our 'Digit' type.
42 --
43 digit :: Parser Digit
44 digit = fmap Digit Parsec.digit
45
46
47 -- * Letters
48
49 -- | A wrapper around a letter character.
50 --
51 newtype Letter = Letter Char deriving (Show)
52 instance Pretty Letter where pretty_show (Letter l) = [l]
53
54
55 -- | Parse a single letter, but wrap it in our 'Letter' type.
56 --
57 letter :: Parser Letter
58 letter = fmap Letter Parsec.letter
59
60 -- | The derived instance of 'Eq' for letters is incorrect. All
61 -- comparisons should be made case-insensitively. The following
62 -- is an excerpt from RFC1035:
63 --
64 -- 2.3.3. Character Case
65 --
66 -- For all parts of the DNS that are part of the official
67 -- protocol, all comparisons between character strings (e.g.,
68 -- labels, domain names, etc.) are done in a case-insensitive
69 -- manner...
70 --
71 -- Since each part of DNS name is composed of our custom types, it
72 -- suffices to munge the equality for 'Letter'. RFC4343
73 -- <https://tools.ietf.org/html/rfc4343> clarifies the
74 -- case-insensitivity rules, but the fact that we're treating DNS
75 -- names as strings makes most of those problems go away (in
76 -- exchange for new ones).
77 --
78 instance Eq Letter where
79 (Letter l1) == (Letter l2) = (toLower l1) == (toLower l2)
80
81 -- * Letters/Digits
82
83 -- | A sum type representing either a letter or a digit.
84 --
85 data LetDig =
86 LetDigLetter Letter |
87 LetDigDigit Digit
88 deriving (Eq, Show)
89
90 instance Pretty LetDig where
91 pretty_show (LetDigLetter l) = pretty_show l
92 pretty_show (LetDigDigit d) = pretty_show d
93
94 -- | Parse a letter or a digit and wrap it in our 'LetDig' type.
95 --
96 let_dig :: Parser LetDig
97 let_dig = (fmap LetDigLetter letter) <|> (fmap LetDigDigit digit)
98
99
100 -- * Hyphens
101
102 -- | A wrapper around a single hyphen character.
103 --
104 newtype Hyphen = Hyphen Char deriving (Eq, Show)
105 instance Pretty Hyphen where pretty_show (Hyphen h) = [h]
106
107 -- | Parse a single hyphen and wrap it in our 'Hyphen' type.
108 --
109 hyphen :: Parser Hyphen
110 hyphen = fmap Hyphen (char '-')
111
112
113 -- * Letter, Digit, or Hyphen.
114
115 -- | A sum type representing a letter, digit, or hyphen.
116 --
117 data LetDigHyp =
118 LetDigHypLetDig LetDig |
119 LetDigHypHyphen Hyphen
120 deriving (Eq, Show)
121
122 instance Pretty LetDigHyp where
123 pretty_show (LetDigHypLetDig ld) = pretty_show ld
124 pretty_show (LetDigHypHyphen h) = pretty_show h
125
126
127 -- | The following is the simplest type in the domain grammar that
128 -- isn't already implemented for us.
129 --
130 -- <let-dig> ::= <letter> | <digit>
131 --
132 -- ==== _Examples_
133 --
134 -- >>> import Text.Parsec ( parseTest )
135 --
136 -- Letters, digits, and hyphens are all parsed:
137 --
138 -- >>> parseTest let_dig_hyp "a"
139 -- LetDigHypLetDig (LetDigLetter (Letter 'a'))
140 --
141 -- >>> parseTest let_dig_hyp "7"
142 -- LetDigHypLetDig (LetDigDigit (Digit '7'))
143 --
144 -- >>> parseTest let_dig_hyp "-"
145 -- LetDigHypHyphen (Hyphen '-')
146 --
147 -- However, an underscore (for example) is not:
148 --
149 -- >>> parseTest let_dig_hyp "_"
150 -- parse error at (line 1, column 1):
151 -- unexpected "_"
152 -- expecting letter, digit or "-"
153 --
154 let_dig_hyp :: Parser LetDigHyp
155 let_dig_hyp =
156 parse_letdig <|> parse_hyphen
157 where
158 parse_letdig :: Parser LetDigHyp
159 parse_letdig = fmap LetDigHypLetDig let_dig
160
161 parse_hyphen :: Parser LetDigHyp
162 parse_hyphen = fmap LetDigHypHyphen hyphen
163
164
165 -- * Letter/Digit/Hyphen strings
166
167 -- | A string of letters, digits, and hyphens from the RFC1035 grammar:
168 --
169 -- <ldh-str> ::= <let-dig-hyp> | <let-dig-hyp> <ldh-str>
170 --
171 -- These are represented as either a single instance of a
172 -- 'LetDigHyp', or a string of them (recursive).
173 --
174 data LdhStr =
175 LdhStrSingleLdh LetDigHyp |
176 LdhStrMultipleLdh LetDigHyp LdhStr
177 deriving (Eq, Show)
178
179 instance Pretty LdhStr where
180 pretty_show (LdhStrSingleLdh ldh) = pretty_show ldh
181 pretty_show (LdhStrMultipleLdh ldh s) = (pretty_show ldh) ++ (pretty_show s)
182
183 -- | Parse a string of letters, digits, and hyphens (an 'LdhStr').
184 --
185 -- ==== _Examples_
186 --
187 -- >>> import Text.Parsec ( parseTest )
188 --
189 -- Single letters, digits, and hyphens are parsed:
190 --
191 -- >>> parseTest ldh_str "a"
192 -- LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a')))
193 --
194 -- >>> parseTest ldh_str "0"
195 -- LdhStrSingleLdh (LetDigHypLetDig (LetDigDigit (Digit '0')))
196 --
197 -- >>> parseTest ldh_str "-"
198 -- LdhStrSingleLdh (LetDigHypHyphen (Hyphen '-'))
199 --
200 -- As well as strings of them:
201 --
202 -- >>> import Text.Parsec ( parse )
203 -- >>> pretty_print $ parse ldh_str "" "a0-b"
204 -- a0-b
205 --
206 ldh_str :: Parser LdhStr
207 ldh_str = try both <|> just_one
208 where
209 both :: Parser LdhStr
210 both = do
211 ldh1 <- let_dig_hyp
212 ldh_tail <- ldh_str
213 return $ LdhStrMultipleLdh ldh1 ldh_tail
214
215 just_one :: Parser LdhStr
216 just_one = fmap LdhStrSingleLdh let_dig_hyp
217
218
219
220 -- | A version of 'last' that works on a 'LdhStr' rather than a
221 -- list. That is, it returns the last 'LetDigHyp' in the
222 -- string. Since 'LdhStr' contains at least one character, there's
223 -- no \"nil\" case here.
224 --
225 -- ==== _Examples_
226 --
227 -- >>> import Text.Parsec ( parse )
228 --
229 -- >>> let (Right r) = parse ldh_str "" "a"
230 -- >>> last_ldh_str r
231 -- LetDigHypLetDig (LetDigLetter (Letter 'a'))
232 --
233 -- >>> let (Right r) = parse ldh_str "" "abc-def"
234 -- >>> last_ldh_str r
235 -- LetDigHypLetDig (LetDigLetter (Letter 'f'))
236 --
237 last_ldh_str :: LdhStr -> LetDigHyp
238 last_ldh_str (LdhStrSingleLdh x) = x
239 last_ldh_str (LdhStrMultipleLdh _ x) = last_ldh_str x
240
241
242 -- | A version of 'init' that works on a 'LdhStr' rather than a
243 -- list. That is, it returns everything /except/ the last character in
244 -- the string.
245 --
246 -- Since an 'LdhStr' must contain at least one character, this might
247 -- not be opssible (when the input is of length one). So, we return
248 -- a 'Maybe' value.
249 --
250 -- ==== _Examples_
251 --
252 -- >>> import Text.Parsec ( parse )
253 --
254 -- >>> let (Right r) = parse ldh_str "" "a"
255 -- >>> init_ldh_str r
256 -- Nothing
257 --
258 -- >>> let (Right r) = parse ldh_str "" "ab"
259 -- >>> init_ldh_str r
260 -- Just (LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a'))))
261 --
262 -- >>> let (Right r) = parse ldh_str "" "abc-def"
263 -- >>> init_ldh_str r
264 -- Just (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a'))) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'b'))) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'c'))) (LdhStrMultipleLdh (LetDigHypHyphen (Hyphen '-')) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'd'))) (LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'e')))))))))
265 --
266 init_ldh_str :: LdhStr -> Maybe LdhStr
267 init_ldh_str (LdhStrSingleLdh _) = Nothing
268 init_ldh_str (LdhStrMultipleLdh h t) =
269 Just $ case (init_ldh_str t) of
270 -- We just got the second-to-last character, we're done.
271 Nothing -> LdhStrSingleLdh h
272
273 -- There's still more stuff. Recurse.
274 Just rest -> LdhStrMultipleLdh h rest
275
276
277 -- | Compute the length of an 'LdhStr'. It will be at least one, since
278 -- 'LdhStr's are non-empty. And if there's something other than the
279 -- first character present, we simply recurse.
280 --
281 -- ==== _Examples_
282 --
283 -- >>> import Text.Parsec ( parse )
284 --
285 -- >>> let (Right r) = parse ldh_str "" "a"
286 -- >>> length_ldh_str r
287 -- 1
288 --
289 -- >>> let (Right r) = parse ldh_str "" "abc-def"
290 -- >>> length_ldh_str r
291 -- 7
292 --
293 length_ldh_str :: LdhStr -> Int
294 length_ldh_str (LdhStrSingleLdh _) = 1
295 length_ldh_str (LdhStrMultipleLdh _ t) = 1 + (length_ldh_str t)
296
297 -- * Letter/Digit/Hyphen string followed by a trailing Letter/Digit
298
299 -- | This type isn't explicitly part of the grammar, but it's what
300 -- shows up in the square brackets of,
301 --
302 -- <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
303 --
304 -- The ldh-str is optional, but if one is present, we must also have
305 -- a trailing let-dig to prevent the name from ending with a
306 -- hyphen. This can be represented with a @Maybe LdhStrLetDig@,
307 -- which is why we're about to define it.
308 --
309 data LdhStrLetDig = LdhStrLetDig (Maybe LdhStr) LetDig
310 deriving (Eq, Show)
311
312 instance Pretty LdhStrLetDig where
313 pretty_show (LdhStrLetDig Nothing ld) = pretty_show ld
314 pretty_show (LdhStrLetDig (Just s) ld) = (pretty_show s) ++ (pretty_show ld)
315
316 -- | Parse an 'LdhStrLetDig'. This isn't in the grammar, but we might
317 -- as well define the parser for it independently since we gave it
318 -- its own data type.
319 --
320 -- ==== _Examples_
321 --
322 -- >>> import Text.Parsec ( parse, parseTest )
323 --
324 -- Make sure we can parse a single character:
325 --
326 -- >>> parseTest ldh_str_let_dig "a"
327 -- LdhStrLetDig Nothing (LetDigLetter (Letter 'a'))
328 --
329 -- And longer strings:
330 --
331 -- >>> pretty_print $ parse ldh_str_let_dig "" "ab"
332 -- ab
333 --
334 -- >>> pretty_print $ parse ldh_str_let_dig "" "-b"
335 -- -b
336 --
337 -- >>> parseTest ldh_str_let_dig "b-"
338 -- parse error at (line 1, column 3):
339 -- label cannot end with a hyphen
340 --
341 ldh_str_let_dig :: Parser LdhStrLetDig
342 ldh_str_let_dig = do
343 -- This will happily eat up the trailing let-dig...
344 full_ldh <- ldh_str
345
346 -- So we have to go back and see what happened.
347 case (last_ldh_str full_ldh) of
348 (LetDigHypHyphen _) -> fail "label cannot end with a hyphen"
349 (LetDigHypLetDig ld) ->
350 -- Ok, the label didn't end with a hyphen; now we need to split
351 -- off the last letter/digit so we can pack it into our return
352 -- type separately.
353 return $ case (init_ldh_str full_ldh) of
354 -- We only parsed one letter/digit. This can happen
355 -- if the label contains two characters. For example,
356 -- if we try to parse the label "ab", then the "a"
357 -- will be eaten by the label parser, and this
358 -- function will be left with only "b".
359 Nothing -> LdhStrLetDig Nothing ld
360
361 -- Usual case: there's was some leading let-dig-hyp junk,
362 -- return it too.
363 leading_ldhs -> LdhStrLetDig leading_ldhs ld
364
365
366
367 -- | Compute the length of a 'LdhStrLetDig'. It's at least one, since
368 -- the let-dig at the end is always there. And when there's an
369 -- ldh-str too, we add its length to one.
370 --
371 -- ==== _Examples_
372 --
373 -- >>> import Text.Parsec ( parse )
374 --
375 -- >>> let (Right r) = parse ldh_str_let_dig "" "a"
376 -- >>> length_ldh_str_let_dig r
377 -- 1
378 --
379 -- >>> let (Right r) = parse ldh_str_let_dig "" "abc-def"
380 -- >>> length_ldh_str_let_dig r
381 -- 7
382 --
383 length_ldh_str_let_dig :: LdhStrLetDig -> Int
384 length_ldh_str_let_dig (LdhStrLetDig Nothing _) = 1
385 length_ldh_str_let_dig (LdhStrLetDig (Just ldhstring) _) =
386 1 + (length_ldh_str ldhstring)
387
388
389 -- * Labels
390
391 -- | The label type from the RFC1035 grammar:
392 --
393 -- <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
394 --
395 -- We allow the slightly more general syntax from RFC1123, Section 2.1:
396 --
397 -- The syntax of a legal Internet host name was specified in RFC-952
398 -- [DNS:4]. One aspect of host name syntax is hereby changed: the
399 -- restriction on the first character is relaxed to allow either a
400 -- letter or a digit. Host software MUST support this more liberal
401 -- syntax.
402 --
403 data Label = Label Letter (Maybe LdhStrLetDig)
404 deriving (Eq, Show)
405
406 instance Pretty Label where
407 pretty_show (Label l Nothing) = pretty_show l
408 pretty_show (Label l (Just s)) = (pretty_show l) ++ (pretty_show s)
409
410 -- | Parse a 'Label'.
411 --
412 -- In addition to the grammar, there's another restriction on
413 -- labels: their length must be 63 characters or less. Quoting
414 -- Section 2.3.1, \"Preferred name syntax\", of RFC1035:
415 --
416 -- The labels must follow the rules for ARPANET host names. They
417 -- must start with a letter, end with a letter or digit, and have
418 -- as interior characters only letters, digits, and hyphen. There
419 -- are also some restrictions on the length. Labels must be 63
420 -- characters or less.
421 --
422 -- We check this only after we have successfully parsed a label.
423 --
424 -- ==== _Examples_
425 --
426 -- >>> import Text.Parsec ( parse, parseTest )
427 --
428 -- Make sure we can parse a single character:
429 --
430 -- >>> parseTest label "a"
431 -- Label (Letter 'a') Nothing
432 --
433 -- And longer strings:
434 --
435 -- >>> pretty_print $ parse label "" "abc-def"
436 -- abc-def
437 --
438 -- But not anything ending in a hyphen:
439 --
440 -- >>> parseTest label "abc-"
441 -- parse error at (line 1, column 5):
442 -- label cannot end with a hyphen
443 --
444 -- Or anything over 63 characters:
445 --
446 -- >>> parseTest label (['a'..'z'] ++ ['a'..'z'] ++ ['a'..'z'])
447 -- parse error at (line 1, column 79):
448 -- labels must be 63 or fewer characters
449 --
450 -- However, /exactly/ 63 characters is acceptable:
451 --
452 -- >>> pretty_print $ parse label "" (replicate 63 'x')
453 -- xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
454 --
455 label :: Parser Label
456 label = do
457 l <- letter -- Guaranteed to be there
458 maybe_s <- optionMaybe ldh_str_let_dig -- Might not be there
459 case maybe_s of
460 -- It can only be one character long, from the letter...
461 Nothing -> return $ Label l maybe_s
462
463 -- The letter gives us one character, so we check that the rest is
464 -- less than 62 characters long. But in the error message we need
465 -- to report 63.
466 Just s -> if (length_ldh_str_let_dig s) <= 62
467 then return $ Label l maybe_s
468 else fail "labels must be 63 or fewer characters"
469
470
471
472 -- * Subdomains
473
474
475 -- | The data type representing a \"subdomain\" from RFC1035:
476 --
477 -- <subdomain> ::= <label> | <subdomain> "." <label>
478 --
479 -- We have reversed the order of the subdomain and label in the
480 -- second option, however. This is explained in 'subdomain'.
481 --
482 data Subdomain =
483 SubdomainSingleLabel Label |
484 SubdomainMultipleLabel Label Subdomain
485 deriving (Eq, Show)
486
487
488
489 instance Pretty Subdomain where
490 pretty_show (SubdomainSingleLabel l) = pretty_show l
491 pretty_show (SubdomainMultipleLabel l s) =
492 (pretty_show l) ++ "." ++ (pretty_show s)
493
494 -- | Parse an RFC1035 \"subdomain\". The given grammar is,
495 --
496 -- <subdomain> ::= <label> | <subdomain> "." <label>
497 --
498 -- However, we have reversed the order of the subdomain and label to
499 -- prevent infinite recursion. The second option (subdomain + label)
500 -- is obviously more specific, we we need to try it first. This
501 -- presents a problem: we're trying to parse a subdomain in terms of
502 -- a subdomain! The given grammar represents subdomains how we like
503 -- to think of them; from right to left. But it's better to parse
504 -- from left to right, so we pick off the leading label and then
505 -- recurse into the definition of subdomain.
506 --
507 -- According to RFC1034, Section 3.1, two neighboring labels in a
508 -- DNS name cannot be equal:
509 --
510 -- Each node has a label, which is zero to 63 octets in length. Brother
511 -- nodes may not have the same label, although the same label can be used
512 -- for nodes which are not brothers. One label is reserved, and that is
513 -- the null (i.e., zero length) label used for the root.
514 --
515 -- We enforce this restriction, but the result is usually that we
516 -- only parse the part of the subdomain leading up to the repeated
517 -- label.
518 --
519 -- ==== _Examples_
520 --
521 -- >>> import Text.Parsec ( parse, parseTest )
522 --
523 -- Make sure we can parse a single character:
524 --
525 -- >>> parseTest subdomain "a"
526 -- SubdomainSingleLabel (Label (Letter 'a') Nothing)
527 --
528 -- >>> pretty_print $ parse subdomain "" "example.com"
529 -- example.com
530 --
531 -- >>> pretty_print $ parse subdomain "" "www.example.com"
532 -- www.example.com
533 --
534 -- We reject a subdomain with equal neighbors, but this leads to
535 -- only the single first label being parsed instead:
536 --
537 -- >>> pretty_print $ parse subdomain "" "www.www.example.com"
538 -- www
539 --
540 -- But not one with a repeated but non-neighboring label:
541 --
542 -- >>> pretty_print $ parse subdomain "" "www.example.www.com"
543 -- www.example.www.com
544 --
545 subdomain :: Parser Subdomain
546 subdomain = try both <|> just_one
547 where
548 both :: Parser Subdomain
549 both = do
550 l <- label
551 _ <- char '.'
552 s <- subdomain
553 let result = SubdomainMultipleLabel l s
554 if (subdomain_has_equal_neighbors result)
555 then fail "subdomain cannot have equal neighboring labels"
556 else return result
557
558 just_one :: Parser Subdomain
559 just_one = fmap SubdomainSingleLabel label
560
561
562
563 -- | Retrieve a list of labels contained in a 'Subdomain'.
564 --
565 -- ==== _Examples_
566 --
567 -- >>> import Text.Parsec ( parse )
568 --
569 -- >>> let (Right r) = parse subdomain "" "a"
570 -- >>> pretty_print $ subdomain_labels r
571 -- ["a"]
572 --
573 -- >>> let (Right r) = parse subdomain "" "example.com"
574 -- >>> pretty_print $ subdomain_labels r
575 -- ["example","com"]
576 --
577 -- >>> let (Right r) = parse subdomain "" "www.example.com"
578 -- >>> pretty_print $ subdomain_labels r
579 -- ["www","example","com"]
580 --
581 subdomain_labels :: Subdomain -> [Label]
582 subdomain_labels (SubdomainSingleLabel l) = [l]
583 subdomain_labels (SubdomainMultipleLabel l s) = l : (subdomain_labels s)
584
585
586 -- | Return a list of pairs of neighboring labels in a subdomain.
587 --
588 -- ==== _Examples_
589 --
590 -- >>> import Text.Parsec ( parse )
591 -- >>> let (Right r) = parse subdomain "" "www.example.com"
592 -- >>> pretty_print $ subdomain_label_neighbors r
593 -- ["(\"www\",\"example\")","(\"example\",\"com\")"]
594 --
595 subdomain_label_neighbors :: Subdomain -> [(Label,Label)]
596 subdomain_label_neighbors s =
597 zip ls (tail ls)
598 where
599 ls = subdomain_labels s
600
601
602 -- | Return @True@ if the subdomain has any two equal neighboring
603 -- labels, and @False@ otherwise.
604 --
605 -- ==== _Examples_
606 --
607 -- >>> import Text.Parsec ( parse )
608 --
609 -- >>> let (Right r) = parse subdomain "" "www.example.com"
610 -- >>> subdomain_has_equal_neighbors r
611 -- False
612 --
613 -- >>> let (Right l) = parse label "" "www"
614 -- >>> let (Right s) = parse subdomain "" "www.example.com"
615 -- >>> let bad_subdomain = SubdomainMultipleLabel l s
616 -- >>> subdomain_has_equal_neighbors bad_subdomain
617 -- True
618 --
619 subdomain_has_equal_neighbors :: Subdomain -> Bool
620 subdomain_has_equal_neighbors s =
621 or [ x == y | (x,y) <- subdomain_label_neighbors s ]
622
623
624
625 -- * Domains
626
627 -- | An RFC1035 domain. According to RFC1035 a domain can be either a
628 -- subdomain or \" \", which according to RFC2181
629 -- <https://tools.ietf.org/html/rfc2181#section-11> means the root:
630 --
631 -- The zero length full name is defined as representing the root
632 -- of the DNS tree, and is typically written and displayed as
633 -- \".\".
634 --
635 -- We let the 'Domain' type remain true to those RFCs, even though
636 -- they don't support an absolute domain name of e.g. a single dot.
637 -- We have one more data type 'UserDomain' which handles the possibility
638 -- of an absolute path.
639 --
640 data Domain =
641 DomainName Subdomain |
642 DomainRoot
643 deriving (Eq, Show)
644
645 instance Pretty Domain where
646 pretty_show DomainRoot = ""
647 pretty_show (DomainName s) = pretty_show s
648
649 -- | Parse an RFC1035 \"domain\"
650 --
651 -- ==== _Examples_
652 --
653 -- >>> import Text.Parsec ( parse, parseTest )
654 --
655 -- Make sure we can parse a single character:
656 --
657 -- >>> parseTest domain "a"
658 -- DomainName (SubdomainSingleLabel (Label (Letter 'a') Nothing))
659 --
660 -- And the empty domain:
661 --
662 -- >>> parseTest domain ""
663 -- DomainRoot
664 --
665 -- We will in fact parse the \"empty\" domain off the front of
666 -- pretty much anything:
667 --
668 -- >>> parseTest domain "8===D"
669 -- DomainRoot
670 --
671 -- Equality of domains is case-insensitive:
672 --
673 -- >>> let (Right r1) = parse domain "" "example.com"
674 -- >>> let (Right r2) = parse domain "" "ExaMPle.coM"
675 -- >>> r1 == r2
676 -- True
677 --
678 -- A single dot IS parsed as the root, but the dot isn't consumed:
679 --
680 -- >>> parseTest domain "."
681 -- DomainRoot
682 --
683 -- Anything over 255 characters is an error, so the root will be
684 -- parsed:
685 --
686 -- >>> let big_l1 = replicate 63 'x'
687 -- >>> let big_l2 = replicate 63 'y' -- Avoid equal neighboring labels!
688 -- >>> let big_labels = big_l1 ++ "." ++ big_l2 ++ "."
689 -- >>> let big_subdomain = concat $ replicate 3 big_labels
690 -- >>> parseTest domain big_subdomain
691 -- DomainRoot
692 --
693 -- But exactly 255 is allowed:
694 --
695 -- >>> import Data.List ( intercalate )
696 -- >>> let l1 = replicate 63 'w'
697 -- >>> let l2 = replicate 63 'x'
698 -- >>> let l3 = replicate 63 'y'
699 -- >>> let l4 = replicate 63 'z'
700 -- >>> let big_subdomain = intercalate "." [l1,l2,l3,l4]
701 -- >>> let (Right r) = parse domain "" big_subdomain
702 -- >>> length (pretty_show r)
703 -- 255
704 --
705 domain :: Parser Domain
706 domain = try parse_subdomain <|> parse_empty
707 where
708 parse_subdomain :: Parser Domain
709 parse_subdomain = do
710 s <- subdomain
711 if length (pretty_show s) <= 255
712 then return $ DomainName s
713 else fail "subdomains can be at most 255 characters"
714
715 parse_empty :: Parser Domain
716 parse_empty = string "" >> return DomainRoot
717
718
719
720 -- * User domains
721
722 -- | This type helps clarify some murkiness in the DNS \"domain\" name
723 -- specification. In RFC1034, it is acknowledged that a domain name
724 -- input with a trailing \".\" will represent an absolute domain
725 -- name (i.e. with respect to the DNS root). However, the grammar in
726 -- RFC1035 disallows a trailing dot.
727 --
728 -- This makes some sense: within the DNS, everything knows its
729 -- position in the tree. The relative/absolute distinction only
730 -- makes sense on the client side, where a user's resolver might
731 -- decide to append some suffix to a relative
732 -- request. Unfortunately, that's where we live. So we have to deal
733 -- with the possibility of having a trailing dot at the end of any
734 -- domain name.
735 --
736 data UserDomain =
737 UserDomainRelative Domain |
738 UserDomainAbsolute Domain
739 deriving (Eq, Show)
740
741 instance Pretty UserDomain where
742 pretty_show (UserDomainRelative d) = pretty_show d
743 pretty_show (UserDomainAbsolute d) = (pretty_show d) ++ "."
744
745
746 -- | Parse a 'UserDomain'. This is what we'll be using to read user
747 -- input, since it supports both relative and absolute domain names
748 -- (unlike the implicitly-absolute 'Domain').
749 --
750 -- ==== _Examples_
751 --
752 -- >>> import Text.Parsec ( parse, parseTest )
753 --
754 -- We can really parse the root now!
755 --
756 -- >>> parseTest user_domain "."
757 -- UserDomainAbsolute DomainRoot
758 --
759 -- But multiple dots aren't (only the first):
760 --
761 -- >>> pretty_print $ parse user_domain "" ".."
762 -- .
763 --
764 -- We can also optionally have a trailing dot at the end of a
765 -- non-empty name:
766 --
767 -- >>> pretty_print $ parse user_domain "" "www.example.com"
768 -- www.example.com
769 --
770 -- >>> pretty_print $ parse user_domain "" "www.example.com."
771 -- www.example.com.
772 --
773 -- A \"relative root\" can also be parsed, letting the user's
774 -- resolver deal with it:
775 --
776 -- >>> parseTest user_domain ""
777 -- UserDomainRelative DomainRoot
778 --
779 user_domain :: Parser UserDomain
780 user_domain = try absolute <|> relative
781 where
782 absolute :: Parser UserDomain
783 absolute = do
784 d <- domain
785 _ <- char '.'
786 return $ UserDomainAbsolute d
787
788 relative :: Parser UserDomain
789 relative = fmap UserDomainRelative domain