Add the Network.DNS.RBL.Host module.
[dead/harbl.git] / harbl / src / Network / DNS / RBL / Domain.hs
1 {-# LANGUAGE DoAndIfThenElse #-}
2
3 -- | The 'Domain' data type and its parser. A 'Domain' represents a
4 -- name in the domain name system (DNS) as described by
5 -- RFC1035. In particular, we enforce the restrictions from Section
6 -- 2.3.1 \"Preferred name syntax\". See for example,
7 --
8 -- <https://tools.ietf.org/html/rfc1035#section-2.3.1>
9 --
10 -- We basically work with strings and characters everywhere, even
11 -- though this isn't really correct. The length specifications in
12 -- the RFCs are all in terms of octets, so really a ByteString.Char8
13 -- would be more appropriate. With strings, for example, we could
14 -- have a unicode mumbo jumbo character that takes up two bytes
15 -- (octets).
16 --
17 module Network.DNS.RBL.Domain (
18 Domain,
19 domain )
20 where
21
22 import Data.Char ( toLower )
23 import Text.Parsec (
24 (<|>),
25 char,
26 optionMaybe,
27 string,
28 try )
29 import qualified Text.Parsec as Parsec ( digit, letter)
30 import Text.Parsec.String ( Parser )
31
32 import Network.DNS.RBL.Pretty ( Pretty(..) )
33
34 -- * Digits
35
36 -- | A wrapper around a digit character.
37 --
38 newtype Digit = Digit Char deriving (Eq, Show)
39 instance Pretty Digit where pretty_show (Digit d) = [d]
40
41 -- | Parse a single digit, but wrap it in our 'Digit' type.
42 --
43 digit :: Parser Digit
44 digit = fmap Digit Parsec.digit
45
46
47 -- * Letters
48
49 -- | A wrapper around a letter character.
50 --
51 newtype Letter = Letter Char deriving (Show)
52 instance Pretty Letter where pretty_show (Letter l) = [l]
53
54
55 -- | Parse a single letter, but wrap it in our 'Letter' type.
56 --
57 letter :: Parser Letter
58 letter = fmap Letter Parsec.letter
59
60 -- | The derived instance of 'Eq' for letters is incorrect. All
61 -- comparisons should be made case-insensitively. The following
62 -- is an excerpt from RFC1035:
63 --
64 -- 2.3.3. Character Case
65 --
66 -- For all parts of the DNS that are part of the official
67 -- protocol, all comparisons between character strings (e.g.,
68 -- labels, domain names, etc.) are done in a case-insensitive
69 -- manner...
70 --
71 -- Since each part of DNS name is composed of our custom types, it
72 -- suffices to munge the equality for 'Letter'. RFC4343
73 -- <https://tools.ietf.org/html/rfc4343> clarifies the
74 -- case-insensitivity rules, but the fact that we're treating DNS
75 -- names as strings makes most of those problems go away (in
76 -- exchange for new ones).
77 --
78 instance Eq Letter where
79 (Letter l1) == (Letter l2) = (toLower l1) == (toLower l2)
80
81 -- * Letters/Digits
82
83 -- | A sum type representing either a letter or a digit.
84 --
85 data LetDig =
86 LetDigLetter Letter |
87 LetDigDigit Digit
88 deriving (Eq, Show)
89
90 instance Pretty LetDig where
91 pretty_show (LetDigLetter l) = pretty_show l
92 pretty_show (LetDigDigit d) = pretty_show d
93
94 -- | Parse a letter or a digit and wrap it in our 'LetDig' type.
95 --
96 let_dig :: Parser LetDig
97 let_dig = (fmap LetDigLetter letter) <|> (fmap LetDigDigit digit)
98
99
100 -- * Hyphens
101
102 -- | A wrapper around a single hyphen character.
103 --
104 newtype Hyphen = Hyphen Char deriving (Eq, Show)
105 instance Pretty Hyphen where pretty_show (Hyphen h) = [h]
106
107 -- | Parse a single hyphen and wrap it in our 'Hyphen' type.
108 --
109 hyphen :: Parser Hyphen
110 hyphen = fmap Hyphen (char '-')
111
112
113 -- * Letter, Digit, or Hyphen.
114
115 -- | A sum type representing a letter, digit, or hyphen.
116 --
117 data LetDigHyp =
118 LetDigHypLetDig LetDig |
119 LetDigHypHyphen Hyphen
120 deriving (Eq, Show)
121
122 instance Pretty LetDigHyp where
123 pretty_show (LetDigHypLetDig ld) = pretty_show ld
124 pretty_show (LetDigHypHyphen h) = pretty_show h
125
126
127 -- | The following is the simplest type in the domain grammar that
128 -- isn't already implemented for us.
129 --
130 -- <let-dig> ::= <letter> | <digit>
131 --
132 -- ==== _Examples_
133 --
134 -- >>> import Text.Parsec ( parseTest )
135 --
136 -- Letters, digits, and hyphens are all parsed:
137 --
138 -- >>> parseTest let_dig_hyp "a"
139 -- LetDigHypLetDig (LetDigLetter (Letter 'a'))
140 --
141 -- >>> parseTest let_dig_hyp "7"
142 -- LetDigHypLetDig (LetDigDigit (Digit '7'))
143 --
144 -- >>> parseTest let_dig_hyp "-"
145 -- LetDigHypHyphen (Hyphen '-')
146 --
147 -- However, an underscore (for example) is not:
148 --
149 -- >>> parseTest let_dig_hyp "_"
150 -- parse error at (line 1, column 1):
151 -- unexpected "_"
152 -- expecting letter, digit or "-"
153 --
154 let_dig_hyp :: Parser LetDigHyp
155 let_dig_hyp =
156 parse_letdig <|> parse_hyphen
157 where
158 parse_letdig :: Parser LetDigHyp
159 parse_letdig = fmap LetDigHypLetDig let_dig
160
161 parse_hyphen :: Parser LetDigHyp
162 parse_hyphen = fmap LetDigHypHyphen hyphen
163
164
165 -- * Letter/Digit/Hyphen strings
166
167 -- | A string of letters, digits, and hyphens from the RFC1035 grammar:
168 --
169 -- <ldh-str> ::= <let-dig-hyp> | <let-dig-hyp> <ldh-str>
170 --
171 -- These are represented as either a single instance of a
172 -- 'LetDigHyp', or a string of them (recursive).
173 --
174 data LdhStr =
175 LdhStrSingleLdh LetDigHyp |
176 LdhStrMultipleLdh LetDigHyp LdhStr
177 deriving (Eq, Show)
178
179 instance Pretty LdhStr where
180 pretty_show (LdhStrSingleLdh ldh) = pretty_show ldh
181 pretty_show (LdhStrMultipleLdh ldh s) = (pretty_show ldh) ++ (pretty_show s)
182
183 -- | Parse a string of letters, digits, and hyphens (an 'LdhStr').
184 --
185 -- ==== _Examples_
186 --
187 -- >>> import Text.Parsec ( parseTest )
188 --
189 -- Single letters, digits, and hyphens are parsed:
190 --
191 -- >>> parseTest ldh_str "a"
192 -- LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a')))
193 --
194 -- >>> parseTest ldh_str "0"
195 -- LdhStrSingleLdh (LetDigHypLetDig (LetDigDigit (Digit '0')))
196 --
197 -- >>> parseTest ldh_str "-"
198 -- LdhStrSingleLdh (LetDigHypHyphen (Hyphen '-'))
199 --
200 -- As well as strings of them:
201 --
202 -- >>> import Text.Parsec ( parse )
203 -- >>> pretty_print $ parse ldh_str "" "a0-b"
204 -- a0-b
205 --
206 ldh_str :: Parser LdhStr
207 ldh_str = try both <|> just_one
208 where
209 both :: Parser LdhStr
210 both = do
211 ldh1 <- let_dig_hyp
212 ldh_tail <- ldh_str
213 return $ LdhStrMultipleLdh ldh1 ldh_tail
214
215 just_one :: Parser LdhStr
216 just_one = fmap LdhStrSingleLdh let_dig_hyp
217
218
219
220 -- | A version of 'last' that works on a 'LdhStr' rather than a
221 -- list. That is, it returns the last 'LetDigHyp' in the
222 -- string. Since 'LdhStr' contains at least one character, there's
223 -- no \"nil\" case here.
224 --
225 -- ==== _Examples_
226 --
227 -- >>> import Text.Parsec ( parse )
228 --
229 -- >>> let (Right r) = parse ldh_str "" "a"
230 -- >>> last_ldh_str r
231 -- LetDigHypLetDig (LetDigLetter (Letter 'a'))
232 --
233 -- >>> let (Right r) = parse ldh_str "" "abc-def"
234 -- >>> last_ldh_str r
235 -- LetDigHypLetDig (LetDigLetter (Letter 'f'))
236 --
237 last_ldh_str :: LdhStr -> LetDigHyp
238 last_ldh_str (LdhStrSingleLdh x) = x
239 last_ldh_str (LdhStrMultipleLdh _ x) = last_ldh_str x
240
241
242 -- | A version of 'init' that works on a 'LdhStr' rather than a
243 -- list. That is, it returns everything /except/ the last character in
244 -- the string.
245 --
246 -- Since an 'LdhStr' must contain at least one character, this might
247 -- not be opssible (when the input is of length one). So, we return
248 -- a 'Maybe' value.
249 --
250 -- ==== _Examples_
251 --
252 -- >>> import Text.Parsec ( parse )
253 --
254 -- >>> let (Right r) = parse ldh_str "" "a"
255 -- >>> init_ldh_str r
256 -- Nothing
257 --
258 -- >>> let (Right r) = parse ldh_str "" "ab"
259 -- >>> init_ldh_str r
260 -- Just (LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a'))))
261 --
262 -- >>> let (Right r) = parse ldh_str "" "abc-def"
263 -- >>> init_ldh_str r
264 -- Just (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a'))) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'b'))) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'c'))) (LdhStrMultipleLdh (LetDigHypHyphen (Hyphen '-')) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'd'))) (LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'e')))))))))
265 --
266 init_ldh_str :: LdhStr -> Maybe LdhStr
267 init_ldh_str (LdhStrSingleLdh _) = Nothing
268 init_ldh_str (LdhStrMultipleLdh h t) =
269 Just $ case (init_ldh_str t) of
270 -- We just got the second-to-last character, we're done.
271 Nothing -> LdhStrSingleLdh h
272
273 -- There's still more stuff. Recurse.
274 Just rest -> LdhStrMultipleLdh h rest
275
276
277 -- | Compute the length of an 'LdhStr'. It will be at least one, since
278 -- 'LdhStr's are non-empty. And if there's something other than the
279 -- first character present, we simply recurse.
280 --
281 -- ==== _Examples_
282 --
283 -- >>> import Text.Parsec ( parse )
284 --
285 -- >>> let (Right r) = parse ldh_str "" "a"
286 -- >>> length_ldh_str r
287 -- 1
288 --
289 -- >>> let (Right r) = parse ldh_str "" "abc-def"
290 -- >>> length_ldh_str r
291 -- 7
292 --
293 length_ldh_str :: LdhStr -> Int
294 length_ldh_str (LdhStrSingleLdh _) = 1
295 length_ldh_str (LdhStrMultipleLdh _ t) = 1 + (length_ldh_str t)
296
297 -- * Letter/Digit/Hyphen string followed by a trailing Letter/Digit
298
299 -- | This type isn't explicitly part of the grammar, but it's what
300 -- shows up in the square brackets of,
301 --
302 -- <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
303 --
304 -- The ldh-str is optional, but if one is present, we must also have
305 -- a trailing let-dig to prevent the name from ending with a
306 -- hyphen. This can be represented with a @Maybe LdhStrLetDig@,
307 -- which is why we're about to define it.
308 --
309 data LdhStrLetDig = LdhStrLetDig (Maybe LdhStr) LetDig
310 deriving (Eq, Show)
311
312 instance Pretty LdhStrLetDig where
313 pretty_show (LdhStrLetDig Nothing ld) = pretty_show ld
314 pretty_show (LdhStrLetDig (Just s) ld) = (pretty_show s) ++ (pretty_show ld)
315
316 -- | Parse an 'LdhStrLetDig'. This isn't in the grammar, but we might
317 -- as well define the parser for it independently since we gave it
318 -- its own data type.
319 --
320 -- ==== _Examples_
321 --
322 -- >>> import Text.Parsec ( parse, parseTest )
323 --
324 -- Make sure we can parse a single character:
325 --
326 -- >>> parseTest ldh_str_let_dig "a"
327 -- LdhStrLetDig Nothing (LetDigLetter (Letter 'a'))
328 --
329 -- And longer strings:
330 --
331 -- >>> pretty_print $ parse ldh_str_let_dig "" "ab"
332 -- ab
333 --
334 -- >>> pretty_print $ parse ldh_str_let_dig "" "-b"
335 -- -b
336 --
337 -- >>> parseTest ldh_str_let_dig "b-"
338 -- parse error at (line 1, column 3):
339 -- label cannot end with a hyphen
340 --
341 ldh_str_let_dig :: Parser LdhStrLetDig
342 ldh_str_let_dig = do
343 -- This will happily eat up the trailing let-dig...
344 full_ldh <- ldh_str
345
346 -- So we have to go back and see what happened.
347 case (last_ldh_str full_ldh) of
348 (LetDigHypHyphen _) -> fail "label cannot end with a hyphen"
349 (LetDigHypLetDig ld) ->
350 -- Ok, the label didn't end with a hyphen; now we need to split
351 -- off the last letter/digit so we can pack it into our return
352 -- type separately.
353 return $ case (init_ldh_str full_ldh) of
354 -- We only parsed one letter/digit. This can happen
355 -- if the label contains two characters. For example,
356 -- if we try to parse the label "ab", then the "a"
357 -- will be eaten by the label parser, and this
358 -- function will be left with only "b".
359 Nothing -> LdhStrLetDig Nothing ld
360
361 -- Usual case: there's was some leading let-dig-hyp junk,
362 -- return it too.
363 leading_ldhs -> LdhStrLetDig leading_ldhs ld
364
365
366
367 -- | Compute the length of a 'LdhStrLetDig'. It's at least one, since
368 -- the let-dig at the end is always there. And when there's an
369 -- ldh-str too, we add its length to one.
370 --
371 -- ==== _Examples_
372 --
373 -- >>> import Text.Parsec ( parse )
374 --
375 -- >>> let (Right r) = parse ldh_str_let_dig "" "a"
376 -- >>> length_ldh_str_let_dig r
377 -- 1
378 --
379 -- >>> let (Right r) = parse ldh_str_let_dig "" "abc-def"
380 -- >>> length_ldh_str_let_dig r
381 -- 7
382 --
383 length_ldh_str_let_dig :: LdhStrLetDig -> Int
384 length_ldh_str_let_dig (LdhStrLetDig Nothing _) = 1
385 length_ldh_str_let_dig (LdhStrLetDig (Just ldhstring) _) =
386 1 + (length_ldh_str ldhstring)
387
388
389 -- * Labels
390
391 -- | The label type from the RFC1035 grammar:
392 --
393 -- <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
394 --
395 -- We allow the slightly more general syntax from RFC1123, Section 2.1:
396 --
397 -- The syntax of a legal Internet host name was specified in RFC-952
398 -- [DNS:4]. One aspect of host name syntax is hereby changed: the
399 -- restriction on the first character is relaxed to allow either a
400 -- letter or a digit. Host software MUST support this more liberal
401 -- syntax.
402 --
403 data Label = Label LetDig (Maybe LdhStrLetDig)
404 deriving (Eq, Show)
405
406 instance Pretty Label where
407 pretty_show (Label l Nothing) = pretty_show l
408 pretty_show (Label l (Just s)) = (pretty_show l) ++ (pretty_show s)
409
410 -- | Parse a 'Label'.
411 --
412 -- In addition to the grammar, there's another restriction on
413 -- labels: their length must be 63 characters or less. Quoting
414 -- Section 2.3.1, \"Preferred name syntax\", of RFC1035:
415 --
416 -- The labels must follow the rules for ARPANET host names. They
417 -- must start with a letter, end with a letter or digit, and have
418 -- as interior characters only letters, digits, and hyphen. There
419 -- are also some restrictions on the length. Labels must be 63
420 -- characters or less.
421 --
422 -- We check this only after we have successfully parsed a label.
423 --
424 -- ==== _Examples_
425 --
426 -- >>> import Text.Parsec ( parse, parseTest )
427 --
428 -- Make sure we can parse a single character:
429 --
430 -- >>> parseTest label "a"
431 -- Label (LetDigLetter (Letter 'a')) Nothing
432 --
433 -- And longer strings:
434 --
435 -- >>> pretty_print $ parse label "" "abc-def"
436 -- abc-def
437 --
438 -- But not anything ending in a hyphen:
439 --
440 -- >>> parseTest label "abc-"
441 -- parse error at (line 1, column 5):
442 -- label cannot end with a hyphen
443 --
444 -- Or anything over 63 characters:
445 --
446 -- >>> parseTest label (['a'..'z'] ++ ['a'..'z'] ++ ['a'..'z'])
447 -- parse error at (line 1, column 79):
448 -- labels must be 63 or fewer characters
449 --
450 -- However, /exactly/ 63 characters is acceptable:
451 --
452 -- >>> pretty_print $ parse label "" (replicate 63 'x')
453 -- xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
454 --
455 -- Ensure that a label can begin with a digit:
456 --
457 -- >>> pretty_print $ parse label "" "3com"
458 -- 3com
459 --
460 label :: Parser Label
461 label = do
462 l <- let_dig -- Guaranteed to be there
463 maybe_s <- optionMaybe ldh_str_let_dig -- Might not be there
464 case maybe_s of
465 -- It can only be one character long, from the letter...
466 Nothing -> return $ Label l maybe_s
467
468 -- The letter gives us one character, so we check that the rest is
469 -- less than 62 characters long. But in the error message we need
470 -- to report 63.
471 Just s -> if (length_ldh_str_let_dig s) <= 62
472 then return $ Label l maybe_s
473 else fail "labels must be 63 or fewer characters"
474
475
476
477 -- * Subdomains
478
479
480 -- | The data type representing a \"subdomain\" from RFC1035:
481 --
482 -- <subdomain> ::= <label> | <subdomain> "." <label>
483 --
484 -- We have reversed the order of the subdomain and label in the
485 -- second option, however. This is explained in 'subdomain'.
486 --
487 data Subdomain =
488 SubdomainSingleLabel Label |
489 SubdomainMultipleLabel Label Subdomain
490 deriving (Eq, Show)
491
492
493
494 instance Pretty Subdomain where
495 pretty_show (SubdomainSingleLabel l) = pretty_show l
496 pretty_show (SubdomainMultipleLabel l s) =
497 (pretty_show l) ++ "." ++ (pretty_show s)
498
499 -- | Parse an RFC1035 \"subdomain\". The given grammar is,
500 --
501 -- <subdomain> ::= <label> | <subdomain> "." <label>
502 --
503 -- However, we have reversed the order of the subdomain and label to
504 -- prevent infinite recursion. The second option (subdomain + label)
505 -- is obviously more specific, we we need to try it first. This
506 -- presents a problem: we're trying to parse a subdomain in terms of
507 -- a subdomain! The given grammar represents subdomains how we like
508 -- to think of them; from right to left. But it's better to parse
509 -- from left to right, so we pick off the leading label and then
510 -- recurse into the definition of subdomain.
511 --
512 -- According to RFC1034, Section 3.1, two neighboring labels in a
513 -- DNS name cannot be equal:
514 --
515 -- Each node has a label, which is zero to 63 octets in length. Brother
516 -- nodes may not have the same label, although the same label can be used
517 -- for nodes which are not brothers. One label is reserved, and that is
518 -- the null (i.e., zero length) label used for the root.
519 --
520 -- We enforce this restriction, but the result is usually that we
521 -- only parse the part of the subdomain leading up to the repeated
522 -- label.
523 --
524 -- ==== _Examples_
525 --
526 -- >>> import Text.Parsec ( parse, parseTest )
527 --
528 -- Make sure we can parse a single character:
529 --
530 -- >>> parseTest subdomain "a"
531 -- SubdomainSingleLabel (Label (LetDigLetter (Letter 'a')) Nothing)
532 --
533 -- >>> pretty_print $ parse subdomain "" "example.com"
534 -- example.com
535 --
536 -- >>> pretty_print $ parse subdomain "" "www.example.com"
537 -- www.example.com
538 --
539 -- We reject a subdomain with equal neighbors, but this leads to
540 -- only the single first label being parsed instead:
541 --
542 -- >>> pretty_print $ parse subdomain "" "www.www.example.com"
543 -- www
544 --
545 -- But not one with a repeated but non-neighboring label:
546 --
547 -- >>> pretty_print $ parse subdomain "" "www.example.www.com"
548 -- www.example.www.com
549 --
550 subdomain :: Parser Subdomain
551 subdomain = try both <|> just_one
552 where
553 both :: Parser Subdomain
554 both = do
555 l <- label
556 _ <- char '.'
557 s <- subdomain
558 let result = SubdomainMultipleLabel l s
559 if (subdomain_has_equal_neighbors result)
560 then fail "subdomain cannot have equal neighboring labels"
561 else return result
562
563 just_one :: Parser Subdomain
564 just_one = fmap SubdomainSingleLabel label
565
566
567
568 -- | Retrieve a list of labels contained in a 'Subdomain'.
569 --
570 -- ==== _Examples_
571 --
572 -- >>> import Text.Parsec ( parse )
573 --
574 -- >>> let (Right r) = parse subdomain "" "a"
575 -- >>> pretty_print $ subdomain_labels r
576 -- ["a"]
577 --
578 -- >>> let (Right r) = parse subdomain "" "example.com"
579 -- >>> pretty_print $ subdomain_labels r
580 -- ["example","com"]
581 --
582 -- >>> let (Right r) = parse subdomain "" "www.example.com"
583 -- >>> pretty_print $ subdomain_labels r
584 -- ["www","example","com"]
585 --
586 subdomain_labels :: Subdomain -> [Label]
587 subdomain_labels (SubdomainSingleLabel l) = [l]
588 subdomain_labels (SubdomainMultipleLabel l s) = l : (subdomain_labels s)
589
590
591 -- | Return a list of pairs of neighboring labels in a subdomain.
592 --
593 -- ==== _Examples_
594 --
595 -- >>> import Text.Parsec ( parse )
596 -- >>> let (Right r) = parse subdomain "" "www.example.com"
597 -- >>> pretty_print $ subdomain_label_neighbors r
598 -- ["(\"www\",\"example\")","(\"example\",\"com\")"]
599 --
600 subdomain_label_neighbors :: Subdomain -> [(Label,Label)]
601 subdomain_label_neighbors s =
602 zip ls (tail ls)
603 where
604 ls = subdomain_labels s
605
606
607 -- | Return @True@ if the subdomain has any two equal neighboring
608 -- labels, and @False@ otherwise.
609 --
610 -- ==== _Examples_
611 --
612 -- >>> import Text.Parsec ( parse )
613 --
614 -- >>> let (Right r) = parse subdomain "" "www.example.com"
615 -- >>> subdomain_has_equal_neighbors r
616 -- False
617 --
618 -- >>> let (Right l) = parse label "" "www"
619 -- >>> let (Right s) = parse subdomain "" "www.example.com"
620 -- >>> let bad_subdomain = SubdomainMultipleLabel l s
621 -- >>> subdomain_has_equal_neighbors bad_subdomain
622 -- True
623 --
624 subdomain_has_equal_neighbors :: Subdomain -> Bool
625 subdomain_has_equal_neighbors s =
626 or [ x == y | (x,y) <- subdomain_label_neighbors s ]
627
628
629
630 -- * Domains
631
632 -- | An RFC1035 domain. According to RFC1035 a domain can be either a
633 -- subdomain or \" \", which according to RFC2181
634 -- <https://tools.ietf.org/html/rfc2181#section-11> means the root:
635 --
636 -- The zero length full name is defined as representing the root
637 -- of the DNS tree, and is typically written and displayed as
638 -- \".\".
639 --
640 -- We let the 'Domain' type remain true to those RFCs, even though
641 -- they don't support an absolute domain name of e.g. a single dot.
642 --
643 data Domain =
644 DomainName Subdomain |
645 DomainRoot
646 deriving (Eq, Show)
647
648 instance Pretty Domain where
649 pretty_show DomainRoot = ""
650 pretty_show (DomainName s) = pretty_show s
651
652 -- | Parse an RFC1035 \"domain\"
653 --
654 -- ==== _Examples_
655 --
656 -- >>> import Text.Parsec ( parse, parseTest )
657 --
658 -- Make sure we can parse a single character:
659 --
660 -- >>> pretty_print $ parse domain "" "a"
661 -- a
662 --
663 -- And the empty domain:
664 --
665 -- >>> parseTest domain ""
666 -- DomainRoot
667 --
668 -- We will in fact parse the \"empty\" domain off the front of
669 -- pretty much anything:
670 --
671 -- >>> parseTest domain "!8===D"
672 -- DomainRoot
673 --
674 -- Equality of domains is case-insensitive:
675 --
676 -- >>> let (Right r1) = parse domain "" "example.com"
677 -- >>> let (Right r2) = parse domain "" "ExaMPle.coM"
678 -- >>> r1 == r2
679 -- True
680 --
681 -- A single dot IS parsed as the root, but the dot isn't consumed:
682 --
683 -- >>> parseTest domain "."
684 -- DomainRoot
685 --
686 -- Anything over 255 characters is an error, so the root will be
687 -- parsed:
688 --
689 -- >>> let big_l1 = replicate 63 'x'
690 -- >>> let big_l2 = replicate 63 'y' -- Avoid equal neighboring labels!
691 -- >>> let big_labels = big_l1 ++ "." ++ big_l2 ++ "."
692 -- >>> let big_subdomain = concat $ replicate 3 big_labels
693 -- >>> parseTest domain big_subdomain
694 -- DomainRoot
695 --
696 -- But exactly 255 is allowed:
697 --
698 -- >>> import Data.List ( intercalate )
699 -- >>> let l1 = replicate 63 'w'
700 -- >>> let l2 = replicate 63 'x'
701 -- >>> let l3 = replicate 63 'y'
702 -- >>> let l4 = replicate 63 'z'
703 -- >>> let big_subdomain = intercalate "." [l1,l2,l3,l4]
704 -- >>> let (Right r) = parse domain "" big_subdomain
705 -- >>> length (pretty_show r)
706 -- 255
707 --
708 domain :: Parser Domain
709 domain = try parse_subdomain <|> parse_empty
710 where
711 parse_subdomain :: Parser Domain
712 parse_subdomain = do
713 s <- subdomain
714 if length (pretty_show s) <= 255
715 then return $ DomainName s
716 else fail "subdomains can be at most 255 characters"
717
718 parse_empty :: Parser Domain
719 parse_empty = string "" >> return DomainRoot