]> gitweb.michael.orlitzky.com - dead/harbl.git/blob - harbl/src/Network/DNS/RBL/Domain.hs
Add the Reversible class.
[dead/harbl.git] / harbl / src / Network / DNS / RBL / Domain.hs
1 {-# LANGUAGE DoAndIfThenElse #-}
2
3 -- | The 'Domain' data type and its parser. A 'Domain' represents a
4 -- name in the domain name system (DNS) as described by
5 -- RFC1035. In particular, we enforce the restrictions from Section
6 -- 2.3.1 \"Preferred name syntax\". See for example,
7 --
8 -- <https://tools.ietf.org/html/rfc1035#section-2.3.1>
9 --
10 -- We basically work with strings and characters everywhere, even
11 -- though this isn't really correct. The length specifications in
12 -- the RFCs are all in terms of octets, so really a ByteString.Char8
13 -- would be more appropriate. With strings, for example, we could
14 -- have a unicode mumbo jumbo character that takes up two bytes
15 -- (octets).
16 --
17 module Network.DNS.RBL.Domain (
18 Domain(..),
19 domain )
20 where
21
22 import Data.Char ( toLower )
23 import Text.Parsec (
24 (<|>),
25 char,
26 optionMaybe,
27 string,
28 try )
29 import qualified Text.Parsec as Parsec ( digit, letter)
30 import Text.Parsec.String ( Parser )
31
32 import Network.DNS.RBL.Pretty ( Pretty(..) )
33 import Network.DNS.RBL.Reversible ( Reversible(..) )
34
35 -- * Digits
36
37 -- | A wrapper around a digit character.
38 --
39 newtype Digit = Digit Char deriving (Eq, Show)
40 instance Pretty Digit where pretty_show (Digit d) = [d]
41
42 -- | Parse a single digit, but wrap it in our 'Digit' type.
43 --
44 digit :: Parser Digit
45 digit = fmap Digit Parsec.digit
46
47
48 -- * Letters
49
50 -- | A wrapper around a letter character.
51 --
52 newtype Letter = Letter Char deriving (Show)
53 instance Pretty Letter where pretty_show (Letter l) = [l]
54
55
56 -- | Parse a single letter, but wrap it in our 'Letter' type.
57 --
58 letter :: Parser Letter
59 letter = fmap Letter Parsec.letter
60
61 -- | The derived instance of 'Eq' for letters is incorrect. All
62 -- comparisons should be made case-insensitively. The following
63 -- is an excerpt from RFC1035:
64 --
65 -- 2.3.3. Character Case
66 --
67 -- For all parts of the DNS that are part of the official
68 -- protocol, all comparisons between character strings (e.g.,
69 -- labels, domain names, etc.) are done in a case-insensitive
70 -- manner...
71 --
72 -- Since each part of DNS name is composed of our custom types, it
73 -- suffices to munge the equality for 'Letter'. RFC4343
74 -- <https://tools.ietf.org/html/rfc4343> clarifies the
75 -- case-insensitivity rules, but the fact that we're treating DNS
76 -- names as strings makes most of those problems go away (in
77 -- exchange for new ones).
78 --
79 instance Eq Letter where
80 (Letter l1) == (Letter l2) = (toLower l1) == (toLower l2)
81
82 -- * Letters/Digits
83
84 -- | A sum type representing either a letter or a digit.
85 --
86 data LetDig =
87 LetDigLetter Letter |
88 LetDigDigit Digit
89 deriving (Eq, Show)
90
91 instance Pretty LetDig where
92 pretty_show (LetDigLetter l) = pretty_show l
93 pretty_show (LetDigDigit d) = pretty_show d
94
95 -- | Parse a letter or a digit and wrap it in our 'LetDig' type.
96 --
97 let_dig :: Parser LetDig
98 let_dig = (fmap LetDigLetter letter) <|> (fmap LetDigDigit digit)
99
100
101 -- * Hyphens
102
103 -- | A wrapper around a single hyphen character.
104 --
105 newtype Hyphen = Hyphen Char deriving (Eq, Show)
106 instance Pretty Hyphen where pretty_show (Hyphen h) = [h]
107
108 -- | Parse a single hyphen and wrap it in our 'Hyphen' type.
109 --
110 hyphen :: Parser Hyphen
111 hyphen = fmap Hyphen (char '-')
112
113
114 -- * Letter, Digit, or Hyphen.
115
116 -- | A sum type representing a letter, digit, or hyphen.
117 --
118 data LetDigHyp =
119 LetDigHypLetDig LetDig |
120 LetDigHypHyphen Hyphen
121 deriving (Eq, Show)
122
123 instance Pretty LetDigHyp where
124 pretty_show (LetDigHypLetDig ld) = pretty_show ld
125 pretty_show (LetDigHypHyphen h) = pretty_show h
126
127
128 -- | The following is the simplest type in the domain grammar that
129 -- isn't already implemented for us.
130 --
131 -- <let-dig> ::= <letter> | <digit>
132 --
133 -- ==== _Examples_
134 --
135 -- >>> import Text.Parsec ( parseTest )
136 --
137 -- Letters, digits, and hyphens are all parsed:
138 --
139 -- >>> parseTest let_dig_hyp "a"
140 -- LetDigHypLetDig (LetDigLetter (Letter 'a'))
141 --
142 -- >>> parseTest let_dig_hyp "7"
143 -- LetDigHypLetDig (LetDigDigit (Digit '7'))
144 --
145 -- >>> parseTest let_dig_hyp "-"
146 -- LetDigHypHyphen (Hyphen '-')
147 --
148 -- However, an underscore (for example) is not:
149 --
150 -- >>> parseTest let_dig_hyp "_"
151 -- parse error at (line 1, column 1):
152 -- unexpected "_"
153 -- expecting letter, digit or "-"
154 --
155 let_dig_hyp :: Parser LetDigHyp
156 let_dig_hyp =
157 parse_letdig <|> parse_hyphen
158 where
159 parse_letdig :: Parser LetDigHyp
160 parse_letdig = fmap LetDigHypLetDig let_dig
161
162 parse_hyphen :: Parser LetDigHyp
163 parse_hyphen = fmap LetDigHypHyphen hyphen
164
165
166 -- * Letter/Digit/Hyphen strings
167
168 -- | A string of letters, digits, and hyphens from the RFC1035 grammar:
169 --
170 -- <ldh-str> ::= <let-dig-hyp> | <let-dig-hyp> <ldh-str>
171 --
172 -- These are represented as either a single instance of a
173 -- 'LetDigHyp', or a string of them (recursive).
174 --
175 data LdhStr =
176 LdhStrSingleLdh LetDigHyp |
177 LdhStrMultipleLdh LetDigHyp LdhStr
178 deriving (Eq, Show)
179
180 instance Pretty LdhStr where
181 pretty_show (LdhStrSingleLdh ldh) = pretty_show ldh
182 pretty_show (LdhStrMultipleLdh ldh s) = (pretty_show ldh) ++ (pretty_show s)
183
184 -- | Parse a string of letters, digits, and hyphens (an 'LdhStr').
185 --
186 -- ==== _Examples_
187 --
188 -- >>> import Text.Parsec ( parseTest )
189 --
190 -- Single letters, digits, and hyphens are parsed:
191 --
192 -- >>> parseTest ldh_str "a"
193 -- LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a')))
194 --
195 -- >>> parseTest ldh_str "0"
196 -- LdhStrSingleLdh (LetDigHypLetDig (LetDigDigit (Digit '0')))
197 --
198 -- >>> parseTest ldh_str "-"
199 -- LdhStrSingleLdh (LetDigHypHyphen (Hyphen '-'))
200 --
201 -- As well as strings of them:
202 --
203 -- >>> import Text.Parsec ( parse )
204 -- >>> pretty_print $ parse ldh_str "" "a0-b"
205 -- a0-b
206 --
207 ldh_str :: Parser LdhStr
208 ldh_str = try both <|> just_one
209 where
210 both :: Parser LdhStr
211 both = do
212 ldh1 <- let_dig_hyp
213 ldh_tail <- ldh_str
214 return $ LdhStrMultipleLdh ldh1 ldh_tail
215
216 just_one :: Parser LdhStr
217 just_one = fmap LdhStrSingleLdh let_dig_hyp
218
219
220
221 -- | A version of 'last' that works on a 'LdhStr' rather than a
222 -- list. That is, it returns the last 'LetDigHyp' in the
223 -- string. Since 'LdhStr' contains at least one character, there's
224 -- no \"nil\" case here.
225 --
226 -- ==== _Examples_
227 --
228 -- >>> import Text.Parsec ( parse )
229 --
230 -- >>> let (Right r) = parse ldh_str "" "a"
231 -- >>> last_ldh_str r
232 -- LetDigHypLetDig (LetDigLetter (Letter 'a'))
233 --
234 -- >>> let (Right r) = parse ldh_str "" "abc-def"
235 -- >>> last_ldh_str r
236 -- LetDigHypLetDig (LetDigLetter (Letter 'f'))
237 --
238 last_ldh_str :: LdhStr -> LetDigHyp
239 last_ldh_str (LdhStrSingleLdh x) = x
240 last_ldh_str (LdhStrMultipleLdh _ x) = last_ldh_str x
241
242
243 -- | A version of 'init' that works on a 'LdhStr' rather than a
244 -- list. That is, it returns everything /except/ the last character in
245 -- the string.
246 --
247 -- Since an 'LdhStr' must contain at least one character, this might
248 -- not be opssible (when the input is of length one). So, we return
249 -- a 'Maybe' value.
250 --
251 -- ==== _Examples_
252 --
253 -- >>> import Text.Parsec ( parse )
254 --
255 -- >>> let (Right r) = parse ldh_str "" "a"
256 -- >>> init_ldh_str r
257 -- Nothing
258 --
259 -- >>> let (Right r) = parse ldh_str "" "ab"
260 -- >>> init_ldh_str r
261 -- Just (LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a'))))
262 --
263 -- >>> let (Right r) = parse ldh_str "" "abc-def"
264 -- >>> init_ldh_str r
265 -- Just (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a'))) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'b'))) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'c'))) (LdhStrMultipleLdh (LetDigHypHyphen (Hyphen '-')) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'd'))) (LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'e')))))))))
266 --
267 init_ldh_str :: LdhStr -> Maybe LdhStr
268 init_ldh_str (LdhStrSingleLdh _) = Nothing
269 init_ldh_str (LdhStrMultipleLdh h t) =
270 Just $ case (init_ldh_str t) of
271 -- We just got the second-to-last character, we're done.
272 Nothing -> LdhStrSingleLdh h
273
274 -- There's still more stuff. Recurse.
275 Just rest -> LdhStrMultipleLdh h rest
276
277
278 -- | Compute the length of an 'LdhStr'. It will be at least one, since
279 -- 'LdhStr's are non-empty. And if there's something other than the
280 -- first character present, we simply recurse.
281 --
282 -- ==== _Examples_
283 --
284 -- >>> import Text.Parsec ( parse )
285 --
286 -- >>> let (Right r) = parse ldh_str "" "a"
287 -- >>> length_ldh_str r
288 -- 1
289 --
290 -- >>> let (Right r) = parse ldh_str "" "abc-def"
291 -- >>> length_ldh_str r
292 -- 7
293 --
294 length_ldh_str :: LdhStr -> Int
295 length_ldh_str (LdhStrSingleLdh _) = 1
296 length_ldh_str (LdhStrMultipleLdh _ t) = 1 + (length_ldh_str t)
297
298 -- * Letter/Digit/Hyphen string followed by a trailing Letter/Digit
299
300 -- | This type isn't explicitly part of the grammar, but it's what
301 -- shows up in the square brackets of,
302 --
303 -- <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
304 --
305 -- The ldh-str is optional, but if one is present, we must also have
306 -- a trailing let-dig to prevent the name from ending with a
307 -- hyphen. This can be represented with a @Maybe LdhStrLetDig@,
308 -- which is why we're about to define it.
309 --
310 data LdhStrLetDig = LdhStrLetDig (Maybe LdhStr) LetDig
311 deriving (Eq, Show)
312
313 instance Pretty LdhStrLetDig where
314 pretty_show (LdhStrLetDig Nothing ld) = pretty_show ld
315 pretty_show (LdhStrLetDig (Just s) ld) = (pretty_show s) ++ (pretty_show ld)
316
317 -- | Parse an 'LdhStrLetDig'. This isn't in the grammar, but we might
318 -- as well define the parser for it independently since we gave it
319 -- its own data type.
320 --
321 -- ==== _Examples_
322 --
323 -- >>> import Text.Parsec ( parse, parseTest )
324 --
325 -- Make sure we can parse a single character:
326 --
327 -- >>> parseTest ldh_str_let_dig "a"
328 -- LdhStrLetDig Nothing (LetDigLetter (Letter 'a'))
329 --
330 -- And longer strings:
331 --
332 -- >>> pretty_print $ parse ldh_str_let_dig "" "ab"
333 -- ab
334 --
335 -- >>> pretty_print $ parse ldh_str_let_dig "" "-b"
336 -- -b
337 --
338 -- >>> parseTest ldh_str_let_dig "b-"
339 -- parse error at (line 1, column 3):
340 -- label cannot end with a hyphen
341 --
342 ldh_str_let_dig :: Parser LdhStrLetDig
343 ldh_str_let_dig = do
344 -- This will happily eat up the trailing let-dig...
345 full_ldh <- ldh_str
346
347 -- So we have to go back and see what happened.
348 case (last_ldh_str full_ldh) of
349 (LetDigHypHyphen _) -> fail "label cannot end with a hyphen"
350 (LetDigHypLetDig ld) ->
351 -- Ok, the label didn't end with a hyphen; now we need to split
352 -- off the last letter/digit so we can pack it into our return
353 -- type separately.
354 return $ case (init_ldh_str full_ldh) of
355 -- We only parsed one letter/digit. This can happen
356 -- if the label contains two characters. For example,
357 -- if we try to parse the label "ab", then the "a"
358 -- will be eaten by the label parser, and this
359 -- function will be left with only "b".
360 Nothing -> LdhStrLetDig Nothing ld
361
362 -- Usual case: there's was some leading let-dig-hyp junk,
363 -- return it too.
364 leading_ldhs -> LdhStrLetDig leading_ldhs ld
365
366
367
368 -- | Compute the length of a 'LdhStrLetDig'. It's at least one, since
369 -- the let-dig at the end is always there. And when there's an
370 -- ldh-str too, we add its length to one.
371 --
372 -- ==== _Examples_
373 --
374 -- >>> import Text.Parsec ( parse )
375 --
376 -- >>> let (Right r) = parse ldh_str_let_dig "" "a"
377 -- >>> length_ldh_str_let_dig r
378 -- 1
379 --
380 -- >>> let (Right r) = parse ldh_str_let_dig "" "abc-def"
381 -- >>> length_ldh_str_let_dig r
382 -- 7
383 --
384 length_ldh_str_let_dig :: LdhStrLetDig -> Int
385 length_ldh_str_let_dig (LdhStrLetDig Nothing _) = 1
386 length_ldh_str_let_dig (LdhStrLetDig (Just ldhstring) _) =
387 1 + (length_ldh_str ldhstring)
388
389
390 -- * Labels
391
392 -- | The label type from the RFC1035 grammar:
393 --
394 -- <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
395 --
396 -- We allow the slightly more general syntax from RFC1123, Section 2.1:
397 --
398 -- The syntax of a legal Internet host name was specified in RFC-952
399 -- [DNS:4]. One aspect of host name syntax is hereby changed: the
400 -- restriction on the first character is relaxed to allow either a
401 -- letter or a digit. Host software MUST support this more liberal
402 -- syntax.
403 --
404 data Label = Label LetDig (Maybe LdhStrLetDig)
405 deriving (Eq, Show)
406
407 instance Pretty Label where
408 pretty_show (Label l Nothing) = pretty_show l
409 pretty_show (Label l (Just s)) = (pretty_show l) ++ (pretty_show s)
410
411 -- | Parse a 'Label'.
412 --
413 -- In addition to the grammar, there's another restriction on
414 -- labels: their length must be 63 characters or less. Quoting
415 -- Section 2.3.1, \"Preferred name syntax\", of RFC1035:
416 --
417 -- The labels must follow the rules for ARPANET host names. They
418 -- must start with a letter, end with a letter or digit, and have
419 -- as interior characters only letters, digits, and hyphen. There
420 -- are also some restrictions on the length. Labels must be 63
421 -- characters or less.
422 --
423 -- We check this only after we have successfully parsed a label.
424 --
425 -- ==== _Examples_
426 --
427 -- >>> import Text.Parsec ( parse, parseTest )
428 --
429 -- Make sure we can parse a single character:
430 --
431 -- >>> parseTest label "a"
432 -- Label (LetDigLetter (Letter 'a')) Nothing
433 --
434 -- And longer strings:
435 --
436 -- >>> pretty_print $ parse label "" "abc-def"
437 -- abc-def
438 --
439 -- But not anything ending in a hyphen:
440 --
441 -- >>> parseTest label "abc-"
442 -- parse error at (line 1, column 5):
443 -- label cannot end with a hyphen
444 --
445 -- Or anything over 63 characters:
446 --
447 -- >>> parseTest label (['a'..'z'] ++ ['a'..'z'] ++ ['a'..'z'])
448 -- parse error at (line 1, column 79):
449 -- labels must be 63 or fewer characters
450 --
451 -- However, /exactly/ 63 characters is acceptable:
452 --
453 -- >>> pretty_print $ parse label "" (replicate 63 'x')
454 -- xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
455 --
456 -- Ensure that a label can begin with a digit:
457 --
458 -- >>> pretty_print $ parse label "" "3com"
459 -- 3com
460 --
461 label :: Parser Label
462 label = do
463 l <- let_dig -- Guaranteed to be there
464 maybe_s <- optionMaybe ldh_str_let_dig -- Might not be there
465 case maybe_s of
466 -- It can only be one character long, from the letter...
467 Nothing -> return $ Label l maybe_s
468
469 -- The letter gives us one character, so we check that the rest is
470 -- less than 62 characters long. But in the error message we need
471 -- to report 63.
472 Just s -> if (length_ldh_str_let_dig s) <= 62
473 then return $ Label l maybe_s
474 else fail "labels must be 63 or fewer characters"
475
476
477
478 -- * Subdomains
479
480
481 -- | The data type representing a \"subdomain\" from RFC1035:
482 --
483 -- <subdomain> ::= <label> | <subdomain> "." <label>
484 --
485 -- We have reversed the order of the subdomain and label in the
486 -- second option, however. This is explained in 'subdomain'.
487 --
488 data Subdomain =
489 SubdomainSingleLabel Label |
490 SubdomainMultipleLabel Label Subdomain
491 deriving (Eq, Show)
492
493
494
495 instance Pretty Subdomain where
496 pretty_show (SubdomainSingleLabel l) = pretty_show l
497 pretty_show (SubdomainMultipleLabel l s) =
498 (pretty_show l) ++ "." ++ (pretty_show s)
499
500
501 instance Reversible Subdomain where
502 -- | Reverse the labels of the given subdomain.
503 --
504 -- ==== _Examples_
505 --
506 -- >>> import Text.Parsec ( parse )
507 --
508 -- Standard usage:
509 --
510 -- >>> let (Right r) = parse subdomain "" "com"
511 -- >>> pretty_print $ backwards r
512 -- com
513 --
514 -- >>> let (Right r) = parse subdomain "" "example.com"
515 -- >>> pretty_print $ backwards r
516 -- com.example
517 --
518 -- >>> let (Right r) = parse subdomain "" "www.example.com"
519 -- >>> pretty_print $ backwards r
520 -- com.example.www
521 --
522 -- >>> let (Right r) = parse subdomain "" "new.www.example.com"
523 -- >>> pretty_print $ backwards r
524 -- com.example.www.new
525 --
526
527 -- It's easy to reverse a single label...
528 backwards s@(SubdomainSingleLabel _) = s
529
530 -- For multiple labels we have two cases. The first is where we have
531 -- exactly two labels, and we just need to swap them.
532 backwards (SubdomainMultipleLabel l (SubdomainSingleLabel m)) =
533 SubdomainMultipleLabel m (SubdomainSingleLabel l)
534
535 -- And now the hard case. If we reversed @s@, then the "head" of
536 -- the result (@last_s@) should be the last label in the entire
537 -- subdomain. Stick @last_s@ on the front of the result. That makes
538 -- enough sense.
539 --
540 -- But what to do about the rest? We need to get \"init s\"
541 -- somehow. Well, we have the reverse of it... why not waste a bunch
542 -- of time and reverse that, too? With @init s@ in hand, we can
543 -- prepend @l@ to that, and THEN reverse the entire thing. What we'll
544 -- wind up with looks like @[last_s, init_s_rev, l]@ which you can
545 -- pretend you recognize as the subdomain in reverse.
546 --
547 backwards (SubdomainMultipleLabel l s) =
548 case (backwards s) of
549 SubdomainMultipleLabel last_s init_s_rev ->
550 let init_s = backwards init_s_rev
551 in
552 SubdomainMultipleLabel
553 last_s
554 (backwards (SubdomainMultipleLabel l init_s))
555
556 -- Reversing a multiple label thing gives you back a multiple
557 -- label thing but there's no way to promise that.
558 impossible -> impossible
559
560
561
562 -- | Parse an RFC1035 \"subdomain\". The given grammar is,
563 --
564 -- <subdomain> ::= <label> | <subdomain> "." <label>
565 --
566 -- However, we have reversed the order of the subdomain and label to
567 -- prevent infinite recursion. The second option (subdomain + label)
568 -- is obviously more specific, we we need to try it first. This
569 -- presents a problem: we're trying to parse a subdomain in terms of
570 -- a subdomain! The given grammar represents subdomains how we like
571 -- to think of them; from right to left. But it's better to parse
572 -- from left to right, so we pick off the leading label and then
573 -- recurse into the definition of subdomain.
574 --
575 -- According to RFC1034, Section 3.1, two neighboring labels in a
576 -- DNS name cannot be equal:
577 --
578 -- Each node has a label, which is zero to 63 octets in length. Brother
579 -- nodes may not have the same label, although the same label can be used
580 -- for nodes which are not brothers. One label is reserved, and that is
581 -- the null (i.e., zero length) label used for the root.
582 --
583 -- We enforce this restriction, but the result is usually that we
584 -- only parse the part of the subdomain leading up to the repeated
585 -- label.
586 --
587 -- ==== _Examples_
588 --
589 -- >>> import Text.Parsec ( parse, parseTest )
590 --
591 -- Make sure we can parse a single character:
592 --
593 -- >>> parseTest subdomain "a"
594 -- SubdomainSingleLabel (Label (LetDigLetter (Letter 'a')) Nothing)
595 --
596 -- >>> pretty_print $ parse subdomain "" "example.com"
597 -- example.com
598 --
599 -- >>> pretty_print $ parse subdomain "" "www.example.com"
600 -- www.example.com
601 --
602 -- We reject a subdomain with equal neighbors, but this leads to
603 -- only the single first label being parsed instead:
604 --
605 -- >>> pretty_print $ parse subdomain "" "www.www.example.com"
606 -- www
607 --
608 -- But not one with a repeated but non-neighboring label:
609 --
610 -- >>> pretty_print $ parse subdomain "" "www.example.www.com"
611 -- www.example.www.com
612 --
613 subdomain :: Parser Subdomain
614 subdomain = try both <|> just_one
615 where
616 both :: Parser Subdomain
617 both = do
618 l <- label
619 _ <- char '.'
620 s <- subdomain
621 let result = SubdomainMultipleLabel l s
622 if (subdomain_has_equal_neighbors result)
623 then fail "subdomain cannot have equal neighboring labels"
624 else return result
625
626 just_one :: Parser Subdomain
627 just_one = fmap SubdomainSingleLabel label
628
629
630
631 -- | Retrieve a list of labels contained in a 'Subdomain'.
632 --
633 -- ==== _Examples_
634 --
635 -- >>> import Text.Parsec ( parse )
636 --
637 -- >>> let (Right r) = parse subdomain "" "a"
638 -- >>> pretty_print $ subdomain_labels r
639 -- ["a"]
640 --
641 -- >>> let (Right r) = parse subdomain "" "example.com"
642 -- >>> pretty_print $ subdomain_labels r
643 -- ["example","com"]
644 --
645 -- >>> let (Right r) = parse subdomain "" "www.example.com"
646 -- >>> pretty_print $ subdomain_labels r
647 -- ["www","example","com"]
648 --
649 subdomain_labels :: Subdomain -> [Label]
650 subdomain_labels (SubdomainSingleLabel l) = [l]
651 subdomain_labels (SubdomainMultipleLabel l s) = l : (subdomain_labels s)
652
653
654 -- | Return a list of pairs of neighboring labels in a subdomain.
655 --
656 -- ==== _Examples_
657 --
658 -- >>> import Text.Parsec ( parse )
659 -- >>> let (Right r) = parse subdomain "" "www.example.com"
660 -- >>> pretty_print $ subdomain_label_neighbors r
661 -- ["(\"www\",\"example\")","(\"example\",\"com\")"]
662 --
663 subdomain_label_neighbors :: Subdomain -> [(Label,Label)]
664 subdomain_label_neighbors s =
665 zip ls (tail ls)
666 where
667 ls = subdomain_labels s
668
669
670 -- | Return @True@ if the subdomain has any two equal neighboring
671 -- labels, and @False@ otherwise.
672 --
673 -- ==== _Examples_
674 --
675 -- >>> import Text.Parsec ( parse )
676 --
677 -- >>> let (Right r) = parse subdomain "" "www.example.com"
678 -- >>> subdomain_has_equal_neighbors r
679 -- False
680 --
681 -- >>> let (Right l) = parse label "" "www"
682 -- >>> let (Right s) = parse subdomain "" "www.example.com"
683 -- >>> let bad_subdomain = SubdomainMultipleLabel l s
684 -- >>> subdomain_has_equal_neighbors bad_subdomain
685 -- True
686 --
687 subdomain_has_equal_neighbors :: Subdomain -> Bool
688 subdomain_has_equal_neighbors s =
689 or [ x == y | (x,y) <- subdomain_label_neighbors s ]
690
691
692
693
694 -- * Domains
695
696 -- | An RFC1035 domain. According to RFC1035 a domain can be either a
697 -- subdomain or \" \", which according to RFC2181
698 -- <https://tools.ietf.org/html/rfc2181#section-11> means the root:
699 --
700 -- The zero length full name is defined as representing the root
701 -- of the DNS tree, and is typically written and displayed as
702 -- \".\".
703 --
704 -- We let the 'Domain' type remain true to those RFCs, even though
705 -- they don't support an absolute domain name of e.g. a single dot.
706 --
707 data Domain =
708 DomainName Subdomain |
709 DomainRoot
710 deriving (Eq, Show)
711
712 instance Pretty Domain where
713 pretty_show DomainRoot = ""
714 pretty_show (DomainName s) = pretty_show s
715
716 -- | Parse an RFC1035 \"domain\"
717 --
718 -- ==== _Examples_
719 --
720 -- >>> import Text.Parsec ( parse, parseTest )
721 --
722 -- Make sure we can parse a single character:
723 --
724 -- >>> pretty_print $ parse domain "" "a"
725 -- a
726 --
727 -- And the empty domain:
728 --
729 -- >>> parseTest domain ""
730 -- DomainRoot
731 --
732 -- We will in fact parse the \"empty\" domain off the front of
733 -- pretty much anything:
734 --
735 -- >>> parseTest domain "!8===D"
736 -- DomainRoot
737 --
738 -- Equality of domains is case-insensitive:
739 --
740 -- >>> let (Right r1) = parse domain "" "example.com"
741 -- >>> let (Right r2) = parse domain "" "ExaMPle.coM"
742 -- >>> r1 == r2
743 -- True
744 --
745 -- A single dot IS parsed as the root, but the dot isn't consumed:
746 --
747 -- >>> parseTest domain "."
748 -- DomainRoot
749 --
750 -- Anything over 255 characters is an error, so the root will be
751 -- parsed:
752 --
753 -- >>> let big_l1 = replicate 63 'x'
754 -- >>> let big_l2 = replicate 63 'y' -- Avoid equal neighboring labels!
755 -- >>> let big_labels = big_l1 ++ "." ++ big_l2 ++ "."
756 -- >>> let big_subdomain = concat $ replicate 3 big_labels
757 -- >>> parseTest domain big_subdomain
758 -- DomainRoot
759 --
760 -- But exactly 255 is allowed:
761 --
762 -- >>> import Data.List ( intercalate )
763 -- >>> let l1 = replicate 63 'w'
764 -- >>> let l2 = replicate 63 'x'
765 -- >>> let l3 = replicate 63 'y'
766 -- >>> let l4 = replicate 63 'z'
767 -- >>> let big_subdomain = intercalate "." [l1,l2,l3,l4]
768 -- >>> let (Right r) = parse domain "" big_subdomain
769 -- >>> length (pretty_show r)
770 -- 255
771 --
772 domain :: Parser Domain
773 domain = try parse_subdomain <|> parse_empty
774 where
775 parse_subdomain :: Parser Domain
776 parse_subdomain = do
777 s <- subdomain
778 if length (pretty_show s) <= 255
779 then return $ DomainName s
780 else fail "subdomains can be at most 255 characters"
781
782 parse_empty :: Parser Domain
783 parse_empty = string "" >> return DomainRoot
784
785
786 instance Reversible Domain where
787 -- | Reverse the labels of a 'Domain'.
788 --
789 -- -- ==== _Examples_
790 --
791 -- >>> import Text.Parsec ( parse )
792 --
793 -- The root reverses to itself:
794 --
795 -- >>> let (Right r) = parse domain "" ""
796 -- >>> backwards r
797 -- DomainRoot
798 --
799 -- >>> let (Right r) = parse domain "" "new.www.example.com"
800 -- >>> pretty_print $ backwards r
801 -- com.example.www.new
802 --
803 backwards DomainRoot = DomainRoot
804 backwards (DomainName s) = DomainName $ backwards s