harbl/src/Network/DNS/RBL/Domain.hs

   1 {-# LANGUAGE DoAndIfThenElse #-}
   2
   3 -- | The 'Domain' data type and its parser. A 'Domain' represents a
   4 --   name in the domain name system (DNS) as described by
   5 --   RFC1035. In particular, we enforce the restrictions from Section
   6 --   2.3.1 \"Preferred name syntax\". See for example,
   7 --
   8 --     <https://tools.ietf.org/html/rfc1035#section-2.3.1>
   9 --
  10 --   We basically work with strings and characters everywhere, even
  11 --   though this isn't really correct. The length specifications in
  12 --   the RFCs are all in terms of octets, so really a ByteString.Char8
  13 --   would be more appropriate. With strings, for example, we could
  14 --   have a unicode mumbo jumbo character that takes up two bytes
  15 --   (octets).
  16 --
  17 module Network.DNS.RBL.Domain (
  18   Domain,
  19   domain )
  20 where
  21
  22 import Data.Char ( toLower )
  23 import Text.Parsec (
  24   (<|>),
  25   char,
  26   optionMaybe,
  27   string,
  28   try )
  29 import qualified Text.Parsec as Parsec ( digit, letter)
  30 import Text.Parsec.String ( Parser )
  31
  32 import Network.DNS.RBL.Pretty ( Pretty(..) )
  33
  34 -- * Digits
  35
  36 -- | A wrapper around a digit character.
  37 --
  38 newtype Digit = Digit Char deriving (Eq, Show)
  39 instance Pretty Digit where pretty_show (Digit d) = [d]
  40
  41 -- | Parse a single digit, but wrap it in our 'Digit' type.
  42 --
  43 digit :: Parser Digit
  44 digit = fmap Digit Parsec.digit
  45
  46
  47 -- * Letters
  48
  49 -- | A wrapper around a letter character.
  50 --
  51 newtype Letter = Letter Char deriving (Show)
  52 instance Pretty Letter where pretty_show (Letter l) = [l]
  53
  54
  55 -- | Parse a single letter, but wrap it in our 'Letter' type.
  56 --
  57 letter :: Parser Letter
  58 letter = fmap Letter Parsec.letter
  59
  60 -- | The derived instance of 'Eq' for letters is incorrect. All
  61 --   comparisons should be made case-insensitively. The following
  62 --   is an excerpt from RFC1035:
  63 --
  64 --     2.3.3. Character Case
  65 --
  66 --     For all parts of the DNS that are part of the official
  67 --     protocol, all comparisons between character strings (e.g.,
  68 --     labels, domain names, etc.)  are done in a case-insensitive
  69 --     manner...
  70 --
  71 --   Since each part of DNS name is composed of our custom types, it
  72 --   suffices to munge the equality for 'Letter'. RFC4343
  73 --   <https://tools.ietf.org/html/rfc4343> clarifies the
  74 --   case-insensitivity rules, but the fact that we're treating DNS
  75 --   names as strings makes most of those problems go away (in
  76 --   exchange for new ones).
  77 --
  78 instance Eq Letter where
  79   (Letter l1) == (Letter l2) = (toLower l1) == (toLower l2)
  80
  81 -- * Letters/Digits
  82
  83 -- | A sum type representing either a letter or a digit.
  84 --
  85 data LetDig =
  86   LetDigLetter Letter |
  87   LetDigDigit  Digit
  88   deriving (Eq, Show)
  89
  90 instance Pretty LetDig where
  91   pretty_show (LetDigLetter l) = pretty_show l
  92   pretty_show (LetDigDigit d) = pretty_show d
  93
  94 -- | Parse a letter or a digit and wrap it in our 'LetDig' type.
  95 --
  96 let_dig :: Parser LetDig
  97 let_dig = (fmap LetDigLetter letter) <|> (fmap LetDigDigit digit)
  98
  99
 100 -- * Hyphens
 101
 102 -- | A wrapper around a single hyphen character.
 103 --
 104 newtype Hyphen = Hyphen Char deriving (Eq, Show)
 105 instance Pretty Hyphen where pretty_show (Hyphen h) = [h]
 106
 107 -- | Parse a single hyphen and wrap it in our 'Hyphen' type.
 108 --
 109 hyphen :: Parser Hyphen
 110 hyphen = fmap Hyphen (char '-')
 111
 112
 113 -- * Letter, Digit, or Hyphen.
 114
 115 -- | A sum type representing a letter, digit, or hyphen.
 116 --
 117 data LetDigHyp =
 118   LetDigHypLetDig LetDig |
 119   LetDigHypHyphen Hyphen
 120   deriving (Eq, Show)
 121
 122 instance Pretty LetDigHyp where
 123   pretty_show (LetDigHypLetDig ld) = pretty_show ld
 124   pretty_show (LetDigHypHyphen h) = pretty_show h
 125
 126
 127 -- | The following is the simplest type in the domain grammar that
 128 --   isn't already implemented for us.
 129 --
 130 --     <let-dig> ::= <letter> | <digit>
 131 --
 132 --   ==== _Examples_
 133 --
 134 --   >>> import Text.Parsec ( parseTest )
 135 --
 136 --   Letters, digits, and hyphens are all parsed:
 137 --
 138 --   >>> parseTest let_dig_hyp "a"
 139 --   LetDigHypLetDig (LetDigLetter (Letter 'a'))
 140 --
 141 --   >>> parseTest let_dig_hyp "7"
 142 --   LetDigHypLetDig (LetDigDigit (Digit '7'))
 143 --
 144 --   >>> parseTest let_dig_hyp "-"
 145 --   LetDigHypHyphen (Hyphen '-')
 146 --
 147 --   However, an underscore (for example) is not:
 148 --
 149 --   >>> parseTest let_dig_hyp "_"
 150 --   parse error at (line 1, column 1):
 151 --   unexpected "_"
 152 --   expecting letter, digit or "-"
 153 --
 154 let_dig_hyp :: Parser LetDigHyp
 155 let_dig_hyp =
 156   parse_letdig <|> parse_hyphen
 157   where
 158     parse_letdig :: Parser LetDigHyp
 159     parse_letdig = fmap LetDigHypLetDig let_dig
 160
 161     parse_hyphen :: Parser LetDigHyp
 162     parse_hyphen = fmap LetDigHypHyphen hyphen
 163
 164
 165 -- * Letter/Digit/Hyphen strings
 166
 167 -- | A string of letters, digits, and hyphens from the RFC1035 grammar:
 168 --
 169 --     <ldh-str> ::= <let-dig-hyp> | <let-dig-hyp> <ldh-str>
 170 --
 171 --   These are represented as either a single instance of a
 172 --   'LetDigHyp', or a string of them (recursive).
 173 --
 174 data LdhStr =
 175   LdhStrSingleLdh LetDigHyp |
 176   LdhStrMultipleLdh LetDigHyp LdhStr
 177   deriving (Eq, Show)
 178
 179 instance Pretty LdhStr where
 180   pretty_show (LdhStrSingleLdh ldh) = pretty_show ldh
 181   pretty_show (LdhStrMultipleLdh ldh s) = (pretty_show ldh) ++ (pretty_show s)
 182
 183 -- | Parse a string of letters, digits, and hyphens (an 'LdhStr').
 184 --
 185 --   ==== _Examples_
 186 --
 187 --   >>> import Text.Parsec ( parseTest )
 188 --
 189 --   Single letters, digits, and hyphens are parsed:
 190 --
 191 --   >>> parseTest ldh_str "a"
 192 --   LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a')))
 193 --
 194 --   >>> parseTest ldh_str "0"
 195 --   LdhStrSingleLdh (LetDigHypLetDig (LetDigDigit (Digit '0')))
 196 --
 197 --   >>> parseTest ldh_str "-"
 198 --   LdhStrSingleLdh (LetDigHypHyphen (Hyphen '-'))
 199 --
 200 --   As well as strings of them:
 201 --
 202 --   >>> import Text.Parsec ( parse )
 203 --   >>> pretty_print $ parse ldh_str "" "a0-b"
 204 --   a0-b
 205 --
 206 ldh_str :: Parser LdhStr
 207 ldh_str = try both <|> just_one
 208   where
 209     both :: Parser LdhStr
 210     both = do
 211       ldh1 <- let_dig_hyp
 212       ldh_tail <- ldh_str
 213       return $ LdhStrMultipleLdh ldh1 ldh_tail
 214
 215     just_one :: Parser LdhStr
 216     just_one = fmap LdhStrSingleLdh let_dig_hyp
 217
 218
 219
 220 -- | A version of 'last' that works on a 'LdhStr' rather than a
 221 --   list. That is, it returns the last 'LetDigHyp' in the
 222 --   string. Since 'LdhStr' contains at least one character, there's
 223 --   no \"nil\" case here.
 224 --
 225 --   ==== _Examples_
 226 --
 227 --   >>> import Text.Parsec ( parse )
 228 --
 229 --   >>> let (Right r) = parse ldh_str "" "a"
 230 --   >>> last_ldh_str r
 231 --   LetDigHypLetDig (LetDigLetter (Letter 'a'))
 232 --
 233 --   >>> let (Right r) = parse ldh_str "" "abc-def"
 234 --   >>> last_ldh_str r
 235 --   LetDigHypLetDig (LetDigLetter (Letter 'f'))
 236 --
 237 last_ldh_str :: LdhStr -> LetDigHyp
 238 last_ldh_str (LdhStrSingleLdh x) = x
 239 last_ldh_str (LdhStrMultipleLdh _ x) = last_ldh_str x
 240
 241
 242 -- | A version of 'init' that works on a 'LdhStr' rather than a
 243 --   list. That is, it returns everything /except/ the last character in
 244 --   the string.
 245 --
 246 --   Since an 'LdhStr' must contain at least one character, this might
 247 --   not be opssible (when the input is of length one). So, we return
 248 --   a 'Maybe' value.
 249 --
 250 --   ==== _Examples_
 251 --
 252 --   >>> import Text.Parsec ( parse )
 253 --
 254 --   >>> let (Right r) = parse ldh_str "" "a"
 255 --   >>> init_ldh_str r
 256 --   Nothing
 257 --
 258 --   >>> let (Right r) = parse ldh_str "" "ab"
 259 --   >>> init_ldh_str r
 260 --   Just (LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a'))))
 261 --
 262 --   >>> let (Right r) = parse ldh_str "" "abc-def"
 263 --   >>> init_ldh_str r
 264 --   Just (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a'))) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'b'))) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'c'))) (LdhStrMultipleLdh (LetDigHypHyphen (Hyphen '-')) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'd'))) (LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'e')))))))))
 265 --
 266 init_ldh_str :: LdhStr -> Maybe LdhStr
 267 init_ldh_str (LdhStrSingleLdh _) = Nothing
 268 init_ldh_str (LdhStrMultipleLdh h t) =
 269   Just $ case (init_ldh_str t) of
 270            -- We just got the second-to-last character, we're done.
 271            Nothing   -> LdhStrSingleLdh h
 272
 273            -- There's still more stuff. Recurse.
 274            Just rest -> LdhStrMultipleLdh h rest
 275
 276
 277 -- | Compute the length of an 'LdhStr'. It will be at least one, since
 278 --   'LdhStr's are non-empty. And if there's something other than the
 279 --   first character present, we simply recurse.
 280 --
 281 --   ==== _Examples_
 282 --
 283 --   >>> import Text.Parsec ( parse )
 284 --
 285 --   >>> let (Right r) = parse ldh_str "" "a"
 286 --   >>> length_ldh_str r
 287 --   1
 288 --
 289 --   >>> let (Right r) = parse ldh_str "" "abc-def"
 290 --   >>> length_ldh_str r
 291 --   7
 292 --
 293 length_ldh_str :: LdhStr -> Int
 294 length_ldh_str (LdhStrSingleLdh _) = 1
 295 length_ldh_str (LdhStrMultipleLdh _ t) = 1 + (length_ldh_str t)
 296
 297 -- * Letter/Digit/Hyphen string followed by a trailing Letter/Digit
 298
 299 -- | This type isn't explicitly part of the grammar, but it's what
 300 --   shows up in the square brackets of,
 301 --
 302 --     <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
 303 --
 304 --   The ldh-str is optional, but if one is present, we must also have
 305 --   a trailing let-dig to prevent the name from ending with a
 306 --   hyphen. This can be represented with a @Maybe LdhStrLetDig@,
 307 --   which is why we're about to define it.
 308 --
 309 data LdhStrLetDig = LdhStrLetDig (Maybe LdhStr) LetDig
 310   deriving (Eq, Show)
 311
 312 instance Pretty LdhStrLetDig where
 313   pretty_show (LdhStrLetDig Nothing ld) = pretty_show ld
 314   pretty_show (LdhStrLetDig (Just s) ld) = (pretty_show s) ++ (pretty_show ld)
 315
 316 -- | Parse an 'LdhStrLetDig'. This isn't in the grammar, but we might
 317 --   as well define the parser for it independently since we gave it
 318 --   its own data type.
 319 --
 320 --   ==== _Examples_
 321 --
 322 --   >>> import Text.Parsec ( parse, parseTest )
 323 --
 324 --   Make sure we can parse a single character:
 325 --
 326 --   >>> parseTest ldh_str_let_dig "a"
 327 --   LdhStrLetDig Nothing (LetDigLetter (Letter 'a'))
 328 --
 329 --   And longer strings:
 330 --
 331 --   >>> pretty_print $ parse ldh_str_let_dig "" "ab"
 332 --   ab
 333 --
 334 --   >>> pretty_print $ parse ldh_str_let_dig "" "-b"
 335 --   -b
 336 --
 337 --   >>> parseTest ldh_str_let_dig "b-"
 338 --   parse error at (line 1, column 3):
 339 --   label cannot end with a hyphen
 340 --
 341 ldh_str_let_dig :: Parser LdhStrLetDig
 342 ldh_str_let_dig = do
 343   -- This will happily eat up the trailing let-dig...
 344   full_ldh <- ldh_str
 345
 346   -- So we have to go back and see what happened.
 347   case (last_ldh_str full_ldh) of
 348     (LetDigHypHyphen _) -> fail "label cannot end with a hyphen"
 349     (LetDigHypLetDig ld) ->
 350       -- Ok, the label didn't end with a hyphen; now we need to split
 351       -- off the last letter/digit so we can pack it into our return
 352       -- type separately.
 353       return $ case (init_ldh_str full_ldh) of
 354                  -- We only parsed one letter/digit. This can happen
 355                  -- if the label contains two characters. For example,
 356                  -- if we try to parse the label "ab", then the "a"
 357                  -- will be eaten by the label parser, and this
 358                  -- function will be left with only "b".
 359                  Nothing -> LdhStrLetDig Nothing ld
 360
 361                  -- Usual case: there's was some leading let-dig-hyp junk,
 362                  -- return it too.
 363                  leading_ldhs -> LdhStrLetDig leading_ldhs ld
 364
 365
 366
 367 -- | Compute the length of a 'LdhStrLetDig'. It's at least one, since
 368 --   the let-dig at the end is always there. And when there's an
 369 --   ldh-str too, we add its length to one.
 370 --
 371 --   ==== _Examples_
 372 --
 373 --   >>> import Text.Parsec ( parse )
 374 --
 375 --   >>> let (Right r) = parse ldh_str_let_dig "" "a"
 376 --   >>> length_ldh_str_let_dig r
 377 --   1
 378 --
 379 --   >>> let (Right r) = parse ldh_str_let_dig "" "abc-def"
 380 --   >>> length_ldh_str_let_dig r
 381 --   7
 382 --
 383 length_ldh_str_let_dig :: LdhStrLetDig -> Int
 384 length_ldh_str_let_dig (LdhStrLetDig Nothing _) = 1
 385 length_ldh_str_let_dig (LdhStrLetDig (Just ldhstring) _) =
 386   1 + (length_ldh_str ldhstring)
 387
 388
 389 -- * Labels
 390
 391 -- | The label type from the RFC1035 grammar:
 392 --
 393 --     <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
 394 --
 395 --   We allow the slightly more general syntax from RFC1123, Section 2.1:
 396 --
 397 --     The syntax of a legal Internet host name was specified in RFC-952
 398 --     [DNS:4].  One aspect of host name syntax is hereby changed: the
 399 --     restriction on the first character is relaxed to allow either a
 400 --     letter or a digit.  Host software MUST support this more liberal
 401 --     syntax.
 402 --
 403 data Label = Label LetDig (Maybe LdhStrLetDig)
 404   deriving (Eq, Show)
 405
 406 instance Pretty Label where
 407   pretty_show (Label l Nothing) = pretty_show l
 408   pretty_show (Label l (Just s)) = (pretty_show l) ++ (pretty_show s)
 409
 410 -- | Parse a 'Label'.
 411 --
 412 --   In addition to the grammar, there's another restriction on
 413 --   labels: their length must be 63 characters or less. Quoting
 414 --   Section 2.3.1, \"Preferred name syntax\", of RFC1035:
 415 --
 416 --     The labels must follow the rules for ARPANET host names.  They
 417 --     must start with a letter, end with a letter or digit, and have
 418 --     as interior characters only letters, digits, and hyphen.  There
 419 --     are also some restrictions on the length.  Labels must be 63
 420 --     characters or less.
 421 --
 422 --   We check this only after we have successfully parsed a label.
 423 --
 424 --   ==== _Examples_
 425 --
 426 --   >>> import Text.Parsec ( parse, parseTest )
 427 --
 428 --   Make sure we can parse a single character:
 429 --
 430 --   >>> parseTest label "a"
 431 --   Label (LetDigLetter (Letter 'a')) Nothing
 432 --
 433 --   And longer strings:
 434 --
 435 --   >>> pretty_print $ parse label "" "abc-def"
 436 --   abc-def
 437 --
 438 --   But not anything ending in a hyphen:
 439 --
 440 --   >>> parseTest label "abc-"
 441 --   parse error at (line 1, column 5):
 442 --   label cannot end with a hyphen
 443 --
 444 --   Or anything over 63 characters:
 445 --
 446 --   >>> parseTest label (['a'..'z'] ++ ['a'..'z'] ++ ['a'..'z'])
 447 --   parse error at (line 1, column 79):
 448 --   labels must be 63 or fewer characters
 449 --
 450 --   However, /exactly/ 63 characters is acceptable:
 451 --
 452 --   >>> pretty_print $ parse label "" (replicate 63 'x')
 453 --   xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
 454 --
 455 --   Ensure that a label can begin with a digit:
 456 --
 457 --   >>> pretty_print $ parse label "" "3com"
 458 --   3com
 459 --
 460 label :: Parser Label
 461 label = do
 462   l <- let_dig -- Guaranteed to be there
 463   maybe_s <- optionMaybe ldh_str_let_dig -- Might not be there
 464   case maybe_s of
 465     -- It can only be one character long, from the letter...
 466     Nothing -> return $ Label l maybe_s
 467
 468     -- The letter gives us one character, so we check that the rest is
 469     -- less than 62 characters long. But in the error message we need
 470     -- to report 63.
 471     Just s  -> if (length_ldh_str_let_dig s) <= 62
 472               then return $ Label l maybe_s
 473               else fail "labels must be 63 or fewer characters"
 474
 475
 476
 477 -- * Subdomains
 478
 479
 480 -- | The data type representing a \"subdomain\" from RFC1035:
 481 --
 482 --     <subdomain> ::= <label> | <subdomain> "." <label>
 483 --
 484 --   We have reversed the order of the subdomain and label in the
 485 --   second option, however. This is explained in 'subdomain'.
 486 --
 487 data Subdomain =
 488   SubdomainSingleLabel Label |
 489   SubdomainMultipleLabel Label Subdomain
 490   deriving (Eq, Show)
 491
 492
 493
 494 instance Pretty Subdomain where
 495   pretty_show (SubdomainSingleLabel l) = pretty_show l
 496   pretty_show (SubdomainMultipleLabel l s) =
 497     (pretty_show l) ++ "." ++ (pretty_show s)
 498
 499 -- | Parse an RFC1035 \"subdomain\". The given grammar is,
 500 --
 501 --     <subdomain> ::= <label> | <subdomain> "." <label>
 502 --
 503 --   However, we have reversed the order of the subdomain and label to
 504 --   prevent infinite recursion. The second option (subdomain + label)
 505 --   is obviously more specific, we we need to try it first. This
 506 --   presents a problem: we're trying to parse a subdomain in terms of
 507 --   a subdomain! The given grammar represents subdomains how we like
 508 --   to think of them; from right to left. But it's better to parse
 509 --   from left to right, so we pick off the leading label and then
 510 --   recurse into the definition of subdomain.
 511 --
 512 --   According to RFC1034, Section 3.1, two neighboring labels in a
 513 --   DNS name cannot be equal:
 514 --
 515 --     Each node has a label, which is zero to 63 octets in length.  Brother
 516 --     nodes may not have the same label, although the same label can be used
 517 --     for nodes which are not brothers.  One label is reserved, and that is
 518 --     the null (i.e., zero length) label used for the root.
 519 --
 520 --   We enforce this restriction, but the result is usually that we
 521 --   only parse the part of the subdomain leading up to the repeated
 522 --   label.
 523 --
 524 --   ==== _Examples_
 525 --
 526 --   >>> import Text.Parsec ( parse, parseTest )
 527 --
 528 --   Make sure we can parse a single character:
 529 --
 530 --   >>> parseTest subdomain "a"
 531 --   SubdomainSingleLabel (Label (LetDigLetter (Letter 'a')) Nothing)
 532 --
 533 --   >>> pretty_print $ parse subdomain "" "example.com"
 534 --   example.com
 535 --
 536 --   >>> pretty_print $ parse subdomain "" "www.example.com"
 537 --   www.example.com
 538 --
 539 --   We reject a subdomain with equal neighbors, but this leads to
 540 --   only the single first label being parsed instead:
 541 --
 542 --   >>> pretty_print $ parse subdomain "" "www.www.example.com"
 543 --   www
 544 --
 545 --   But not one with a repeated but non-neighboring label:
 546 --
 547 --   >>> pretty_print $ parse subdomain "" "www.example.www.com"
 548 --   www.example.www.com
 549 --
 550 subdomain :: Parser Subdomain
 551 subdomain = try both <|> just_one
 552   where
 553     both :: Parser Subdomain
 554     both = do
 555       l <- label
 556       _ <- char '.'
 557       s <- subdomain
 558       let result = SubdomainMultipleLabel l s
 559       if (subdomain_has_equal_neighbors result)
 560       then fail "subdomain cannot have equal neighboring labels"
 561       else return result
 562
 563     just_one :: Parser Subdomain
 564     just_one = fmap SubdomainSingleLabel label
 565
 566
 567
 568 -- | Retrieve a list of labels contained in a 'Subdomain'.
 569 --
 570 --   ==== _Examples_
 571 --
 572 --   >>> import Text.Parsec ( parse )
 573 --
 574 --   >>> let (Right r) = parse subdomain "" "a"
 575 --   >>> pretty_print $ subdomain_labels r
 576 --   ["a"]
 577 --
 578 --   >>> let (Right r) = parse subdomain "" "example.com"
 579 --   >>> pretty_print $ subdomain_labels r
 580 --   ["example","com"]
 581 --
 582 --   >>> let (Right r) = parse subdomain "" "www.example.com"
 583 --   >>> pretty_print $ subdomain_labels r
 584 --   ["www","example","com"]
 585 --
 586 subdomain_labels :: Subdomain -> [Label]
 587 subdomain_labels (SubdomainSingleLabel l) = [l]
 588 subdomain_labels (SubdomainMultipleLabel l s) = l : (subdomain_labels s)
 589
 590
 591 -- | Return a list of pairs of neighboring labels in a subdomain.
 592 --
 593 --   ==== _Examples_
 594 --
 595 --   >>> import Text.Parsec ( parse )
 596 --   >>> let (Right r) = parse subdomain "" "www.example.com"
 597 --   >>> pretty_print $ subdomain_label_neighbors r
 598 --   ["(\"www\",\"example\")","(\"example\",\"com\")"]
 599 --
 600 subdomain_label_neighbors :: Subdomain -> [(Label,Label)]
 601 subdomain_label_neighbors s =
 602   zip ls (tail ls)
 603   where
 604     ls = subdomain_labels s
 605
 606
 607 -- | Return @True@ if the subdomain has any two equal neighboring
 608 --   labels, and @False@ otherwise.
 609 --
 610 --   ==== _Examples_
 611 --
 612 --   >>> import Text.Parsec ( parse )
 613 --
 614 --   >>> let (Right r) = parse subdomain "" "www.example.com"
 615 --   >>> subdomain_has_equal_neighbors r
 616 --   False
 617 --
 618 --   >>> let (Right l) = parse label "" "www"
 619 --   >>> let (Right s) = parse subdomain "" "www.example.com"
 620 --   >>> let bad_subdomain = SubdomainMultipleLabel l s
 621 --   >>> subdomain_has_equal_neighbors bad_subdomain
 622 --   True
 623 --
 624 subdomain_has_equal_neighbors :: Subdomain -> Bool
 625 subdomain_has_equal_neighbors s =
 626   or [ x == y | (x,y) <- subdomain_label_neighbors s ]
 627
 628
 629
 630 -- * Domains
 631
 632 -- | An RFC1035 domain. According to RFC1035 a domain can be either a
 633 -- subdomain or \" \", which according to RFC2181
 634 -- <https://tools.ietf.org/html/rfc2181#section-11> means the root:
 635 --
 636 --      The zero length full name is defined as representing the root
 637 --      of the DNS tree, and is typically written and displayed as
 638 --      \".\".
 639 --
 640 --   We let the 'Domain' type remain true to those RFCs, even though
 641 --   they don't support an absolute domain name of e.g. a single dot.
 642 --
 643 data Domain =
 644   DomainName Subdomain |
 645   DomainRoot
 646   deriving (Eq, Show)
 647
 648 instance Pretty Domain where
 649   pretty_show DomainRoot = ""
 650   pretty_show (DomainName s) = pretty_show s
 651
 652 -- | Parse an RFC1035 \"domain\"
 653 --
 654 --   ==== _Examples_
 655 --
 656 --   >>> import Text.Parsec ( parse, parseTest )
 657 --
 658 --   Make sure we can parse a single character:
 659 --
 660 --   >>> pretty_print $ parse domain "" "a"
 661 --   a
 662 --
 663 --   And the empty domain:
 664 --
 665 --   >>> parseTest domain ""
 666 --   DomainRoot
 667 --
 668 --   We will in fact parse the \"empty\" domain off the front of
 669 --   pretty much anything:
 670 --
 671 --   >>> parseTest domain "!8===D"
 672 --   DomainRoot
 673 --
 674 --   Equality of domains is case-insensitive:
 675 --
 676 --   >>> let (Right r1) = parse domain "" "example.com"
 677 --   >>> let (Right r2) = parse domain "" "ExaMPle.coM"
 678 --   >>> r1 == r2
 679 --   True
 680 --
 681 --   A single dot IS parsed as the root, but the dot isn't consumed:
 682 --
 683 --   >>> parseTest domain "."
 684 --   DomainRoot
 685 --
 686 --   Anything over 255 characters is an error, so the root will be
 687 --   parsed:
 688 --
 689 --   >>> let big_l1 = replicate 63 'x'
 690 --   >>> let big_l2 = replicate 63 'y' -- Avoid equal neighboring labels!
 691 --   >>> let big_labels = big_l1 ++ "." ++ big_l2 ++ "."
 692 --   >>> let big_subdomain = concat $ replicate 3 big_labels
 693 --   >>> parseTest domain big_subdomain
 694 --   DomainRoot
 695 --
 696 --   But exactly 255 is allowed:
 697 --
 698 --   >>> import Data.List ( intercalate )
 699 --   >>> let l1 = replicate 63 'w'
 700 --   >>> let l2 = replicate 63 'x'
 701 --   >>> let l3 = replicate 63 'y'
 702 --   >>> let l4 = replicate 63 'z'
 703 --   >>> let big_subdomain = intercalate "." [l1,l2,l3,l4]
 704 --   >>> let (Right r) = parse domain "" big_subdomain
 705 --   >>> length (pretty_show r)
 706 --   255
 707 --
 708 domain :: Parser Domain
 709 domain = try parse_subdomain <|> parse_empty
 710   where
 711     parse_subdomain :: Parser Domain
 712     parse_subdomain = do
 713       s <- subdomain
 714       if length (pretty_show s) <= 255
 715       then return $ DomainName s
 716       else fail "subdomains can be at most 255 characters"
 717
 718     parse_empty :: Parser Domain
 719     parse_empty = string "" >> return DomainRoot