src/Domain.hs

   1 -- | The 'Domain' data type and its parser. A 'Domain' represents a
   2 --   name in the domain name system (DNS) as described by
   3 --   RFC1035. In particular, we enforce the restrictions from Section
   4 --   2.3.1 \"Preferred name syntax\". See for example,
   5 --
   6 --     <https://tools.ietf.org/html/rfc1035#section-2.3.1>
   7 --
   8 --   We basically work with strings and characters everywhere, even
   9 --   though this isn't really correct. The length specifications in
  10 --   the RFCs are all in terms of octets, so really a ByteString.Char8
  11 --   would be more appropriate. With strings, for example, we could
  12 --   have a unicode mumbo jumbo character that takes up two bytes
  13 --   (octets).
  14 --
  15 module Domain (
  16   UserDomain,
  17   user_domain )
  18 where
  19
  20 import Data.Char ( toLower )
  21 import Text.Parsec (
  22   ParseError,
  23   (<|>),
  24   alphaNum,
  25   char,
  26   eof,
  27   many1,
  28   option,
  29   optionMaybe,
  30   parse,
  31   string,
  32   try,
  33   unexpected )
  34 import qualified Text.Parsec as Parsec ( digit, letter)
  35 import Text.Parsec.String ( Parser )
  36
  37 import Pretty ( Pretty(..) )
  38
  39 -- * Digits
  40
  41 -- | A wrapper around a digit character.
  42 --
  43 newtype Digit = Digit Char deriving (Eq, Show)
  44 instance Pretty Digit where pretty_show (Digit d) = [d]
  45
  46 -- | Parse a single digit, but wrap it in our 'Digit' type.
  47 --
  48 digit :: Parser Digit
  49 digit = fmap Digit Parsec.digit
  50
  51
  52 -- * Letters
  53
  54 -- | A wrapper around a letter character.
  55 --
  56 newtype Letter = Letter Char deriving (Show)
  57 instance Pretty Letter where pretty_show (Letter l) = [l]
  58
  59
  60 -- | Parse a single letter, but wrap it in our 'Letter' type.
  61 --
  62 letter :: Parser Letter
  63 letter = fmap Letter Parsec.letter
  64
  65 -- | The derived instance of 'Eq' for letters is incorrect. All
  66 --   comparisons should be made case-insensitively. The following
  67 --   is an excerpt from RFC1035:
  68 --
  69 --     2.3.3. Character Case
  70 --
  71 --     For all parts of the DNS that are part of the official
  72 --     protocol, all comparisons between character strings (e.g.,
  73 --     labels, domain names, etc.)  are done in a case-insensitive
  74 --     manner...
  75 --
  76 --   Since each part of DNS name is composed of our custom types, it
  77 --   suffices to munge the equality for 'Letter'. RFC4343
  78 --   <https://tools.ietf.org/html/rfc4343> clarifies the
  79 --   case-insensitivity rules, but the fact that we're treating DNS
  80 --   names as strings makes most of those problems go away (in
  81 --   exchange for new ones).
  82 --
  83 instance Eq Letter where
  84   (Letter l1) == (Letter l2) = (toLower l1) == (toLower l2)
  85
  86 -- * Letters/Digits
  87
  88 -- | A sum type representing either a letter or a digit.
  89 --
  90 data LetDig =
  91   LetDigLetter Letter |
  92   LetDigDigit  Digit
  93   deriving (Eq, Show)
  94
  95 instance Pretty LetDig where
  96   pretty_show (LetDigLetter l) = pretty_show l
  97   pretty_show (LetDigDigit d) = pretty_show d
  98
  99 -- | Parse a letter or a digit and wrap it in our 'LetDig' type.
 100 --
 101 let_dig :: Parser LetDig
 102 let_dig = (fmap LetDigLetter letter) <|> (fmap LetDigDigit digit)
 103
 104
 105 -- * Hyphens
 106
 107 -- | A wrapper around a single hyphen character.
 108 --
 109 newtype Hyphen = Hyphen Char deriving (Eq, Show)
 110 instance Pretty Hyphen where pretty_show (Hyphen h) = [h]
 111
 112 -- | Parse a single hyphen and wrap it in our 'Hyphen' type.
 113 --
 114 hyphen :: Parser Hyphen
 115 hyphen = fmap Hyphen (char '-')
 116
 117
 118 -- * Letter, Digit, or Hyphen.
 119
 120 -- | A sum type representing a letter, digit, or hyphen.
 121 --
 122 data LetDigHyp =
 123   LetDigHypLetDig LetDig |
 124   LetDigHypHyphen Hyphen
 125   deriving (Eq, Show)
 126
 127 instance Pretty LetDigHyp where
 128   pretty_show (LetDigHypLetDig ld) = pretty_show ld
 129   pretty_show (LetDigHypHyphen h) = pretty_show h
 130
 131
 132 -- | The following is the simplest type in the domain grammar that
 133 --   isn't already implemented for us.
 134 --
 135 --     <let-dig> ::= <letter> | <digit>
 136 --
 137 --   ==== _Examples_
 138 --
 139 --   >>> import Text.Parsec ( parseTest )
 140 --
 141 --   Letters, digits, and hyphens are all parsed:
 142 --
 143 --   >>> parseTest let_dig_hyp "a"
 144 --   LetDigHypLetDig (LetDigLetter (Letter 'a'))
 145 --
 146 --   >>> parseTest let_dig_hyp "7"
 147 --   LetDigHypLetDig (LetDigDigit (Digit '7'))
 148 --
 149 --   >>> parseTest let_dig_hyp "-"
 150 --   LetDigHypHyphen (Hyphen '-')
 151 --
 152 --   However, an underscore (for example) is not:
 153 --
 154 --   >>> parseTest let_dig_hyp "_"
 155 --   parse error at (line 1, column 1):
 156 --   unexpected "_"
 157 --   expecting letter, digit or "-"
 158 --
 159 let_dig_hyp :: Parser LetDigHyp
 160 let_dig_hyp =
 161   parse_letdig <|> parse_hyphen
 162   where
 163     parse_letdig :: Parser LetDigHyp
 164     parse_letdig = fmap LetDigHypLetDig let_dig
 165
 166     parse_hyphen :: Parser LetDigHyp
 167     parse_hyphen = fmap LetDigHypHyphen hyphen
 168
 169
 170 -- * Letter/Digit/Hyphen strings
 171
 172 -- | A string of letters, digits, and hyphens from the RFC1035 grammar:
 173 --
 174 --     <ldh-str> ::= <let-dig-hyp> | <let-dig-hyp> <ldh-str>
 175 --
 176 --   These are represented as either a single instance of a
 177 --   'LetDigHyp', or a string of them (recursive).
 178 --
 179 data LdhStr =
 180   LdhStrSingleLdh LetDigHyp |
 181   LdhStrMultipleLdh LetDigHyp LdhStr
 182   deriving (Eq, Show)
 183
 184 instance Pretty LdhStr where
 185   pretty_show (LdhStrSingleLdh ldh) = pretty_show ldh
 186   pretty_show (LdhStrMultipleLdh ldh s) = (pretty_show ldh) ++ (pretty_show s)
 187
 188 -- | Parse a string of letters, digits, and hyphens (an 'LdhStr').
 189 --
 190 --   ==== _Examples_
 191 --
 192 --   >>> import Text.Parsec ( parseTest )
 193 --
 194 --   Single letters, digits, and hyphens are parsed:
 195 --
 196 --   >>> parseTest ldh_str "a"
 197 --   LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a')))
 198 --
 199 --   >>> parseTest ldh_str "0"
 200 --   LdhStrSingleLdh (LetDigHypLetDig (LetDigDigit (Digit '0')))
 201 --
 202 --   >>> parseTest ldh_str "-"
 203 --   LdhStrSingleLdh (LetDigHypHyphen (Hyphen '-'))
 204 --
 205 --   As well as strings of them:
 206 --
 207 --   >>> pretty_print $ parse ldh_str "" "a0-b"
 208 --   a0-b
 209 --
 210 ldh_str :: Parser LdhStr
 211 ldh_str = try both <|> just_one
 212   where
 213     both :: Parser LdhStr
 214     both = do
 215       ldh1 <- let_dig_hyp
 216       ldh_tail <- ldh_str
 217       return $ LdhStrMultipleLdh ldh1 ldh_tail
 218
 219     just_one :: Parser LdhStr
 220     just_one = fmap LdhStrSingleLdh let_dig_hyp
 221
 222
 223
 224 -- | A version of 'last' that works on a 'LdhStr' rather than a
 225 --   list. That is, it returns the last 'LetDigHyp' in the
 226 --   string. Since 'LdhStr' contains at least one character, there's
 227 --   no \"nil\" case here.
 228 --
 229 --   ==== _Examples_
 230 --
 231 --   >>> let (Right r) = parse ldh_str "" "a"
 232 --   >>> last_ldh_str r
 233 --   LetDigHypLetDig (LetDigLetter (Letter 'a'))
 234 --
 235 --   >>> let (Right r) = parse ldh_str "" "abc-def"
 236 --   >>> last_ldh_str r
 237 --   LetDigHypLetDig (LetDigLetter (Letter 'f'))
 238 --
 239 last_ldh_str :: LdhStr -> LetDigHyp
 240 last_ldh_str (LdhStrSingleLdh x) = x
 241 last_ldh_str (LdhStrMultipleLdh _ x) = last_ldh_str x
 242
 243
 244 -- | A version of 'init' that works on a 'LdhStr' rather than a
 245 --   list. That is, it returns everything /except/ the last character in
 246 --   the string.
 247 --
 248 --   Since an 'LdhStr' must contain at least one character, this might
 249 --   not be opssible (when the input is of length one). So, we return
 250 --   a 'Maybe' value.
 251 --
 252 --   ==== _Examples_
 253 --
 254 --   >>> let (Right r) = parse ldh_str "" "a"
 255 --   >>> init_ldh_str r
 256 --   Nothing
 257 --
 258 --   >>> let (Right r) = parse ldh_str "" "ab"
 259 --   >>> init_ldh_str r
 260 --   Just (LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a'))))
 261 --
 262 --   >>> let (Right r) = parse ldh_str "" "abc-def"
 263 --   >>> init_ldh_str r
 264 --   Just (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a'))) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'b'))) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'c'))) (LdhStrMultipleLdh (LetDigHypHyphen (Hyphen '-')) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'd'))) (LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'e')))))))))
 265 --
 266 init_ldh_str :: LdhStr -> Maybe LdhStr
 267 init_ldh_str (LdhStrSingleLdh _) = Nothing
 268 init_ldh_str (LdhStrMultipleLdh h t) =
 269   Just $ case (init_ldh_str t) of
 270            -- We just got the second-to-last character, we're done.
 271            Nothing   -> LdhStrSingleLdh h
 272
 273            -- There's still more stuff. Recurse.
 274            Just rest -> LdhStrMultipleLdh h rest
 275
 276
 277 -- | Compute the length of an 'LdhStr'. It will be at least one, since
 278 --   'LdhStr's are non-empty. And if there's something other than the
 279 --   first character present, we simply recurse.
 280 --
 281 --   ==== _Examples_
 282 --
 283 --   >>> let (Right r) = parse ldh_str "" "a"
 284 --   >>> length_ldh_str r
 285 --   1
 286 --
 287 --   >>> let (Right r) = parse ldh_str "" "abc-def"
 288 --   >>> length_ldh_str r
 289 --   7
 290 --
 291 length_ldh_str :: LdhStr -> Int
 292 length_ldh_str (LdhStrSingleLdh _) = 1
 293 length_ldh_str (LdhStrMultipleLdh _ t) = 1 + (length_ldh_str t)
 294
 295 -- * Letter/Digit/Hyphen string followed by a trailing Letter/Digit
 296
 297 -- | This type isn't explicitly part of the grammar, but it's what
 298 --   shows up in the square brackets of,
 299 --
 300 --     <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
 301 --
 302 --   The ldh-str is optional, but if one is present, we must also have
 303 --   a trailing let-dig to prevent the name from ending with a
 304 --   hyphen. This can be represented with a @Maybe LdhStrLetDig@,
 305 --   which is why we're about to define it.
 306 --
 307 data LdhStrLetDig = LdhStrLetDig (Maybe LdhStr) LetDig
 308   deriving (Eq, Show)
 309
 310 instance Pretty LdhStrLetDig where
 311   pretty_show (LdhStrLetDig Nothing ld) = pretty_show ld
 312   pretty_show (LdhStrLetDig (Just s) ld) = (pretty_show s) ++ (pretty_show ld)
 313
 314 -- | Parse an 'LdhStrLetDig'. This isn't in the grammar, but we might
 315 --   as well define the parser for it independently since we gave it
 316 --   its own data type.
 317 --
 318 --   ==== _Examples_
 319 --
 320 --   >>> import Text.Parsec ( parseTest )
 321 --
 322 --   Make sure we can parse a single character:
 323 --
 324 --   >>> parseTest ldh_str_let_dig "a"
 325 --   LdhStrLetDig Nothing (LetDigLetter (Letter 'a'))
 326 --
 327 --   And longer strings:
 328 --
 329 --   >>> pretty_print $ parse ldh_str_let_dig "" "ab"
 330 --   ab
 331 --
 332 --   >>> pretty_print $ parse ldh_str_let_dig "" "-b"
 333 --   -b
 334 --
 335 --   >>> parseTest ldh_str_let_dig "b-"
 336 --   parse error at (line 1, column 3):
 337 --   label cannot end with a hyphen
 338 --
 339 ldh_str_let_dig :: Parser LdhStrLetDig
 340 ldh_str_let_dig = do
 341   -- This will happily eat up the trailing let-dig...
 342   full_ldh <- ldh_str
 343
 344   -- So we have to go back and see what happened.
 345   case (last_ldh_str full_ldh) of
 346     (LetDigHypHyphen _) -> fail "label cannot end with a hyphen"
 347     (LetDigHypLetDig ld) ->
 348       -- Ok, the label didn't end with a hyphen; now we need to split
 349       -- off the last letter/digit so we can pack it into our return
 350       -- type separately.
 351       return $ case (init_ldh_str full_ldh) of
 352                  -- We only parsed one letter/digit. This can happen
 353                  -- if the label contains two characters. For example,
 354                  -- if we try to parse the label "ab", then the "a"
 355                  -- will be eaten by the label parser, and this
 356                  -- function will be left with only "b".
 357                  Nothing -> LdhStrLetDig Nothing ld
 358
 359                  -- Usual case: there's was some leading let-dig-hyp junk,
 360                  -- return it too.
 361                  leading_ldhs -> LdhStrLetDig leading_ldhs ld
 362
 363
 364
 365 -- | Compute the length of a 'LdhStrLetDig'. It's at least one, since
 366 --   the let-dig at the end is always there. And when there's an
 367 --   ldh-str too, we add its length to one.
 368 --
 369 --   ==== _Examples_
 370 --
 371 --   >>> let (Right r) = parse ldh_str_let_dig "" "a"
 372 --   >>> length_ldh_str_let_dig r
 373 --   1
 374 --
 375 --   >>> let (Right r) = parse ldh_str_let_dig "" "abc-def"
 376 --   >>> length_ldh_str_let_dig r
 377 --   7
 378 --
 379 length_ldh_str_let_dig :: LdhStrLetDig -> Int
 380 length_ldh_str_let_dig (LdhStrLetDig Nothing _) = 1
 381 length_ldh_str_let_dig (LdhStrLetDig (Just ldhstring) _) =
 382   1 + (length_ldh_str ldhstring)
 383
 384
 385 -- * Labels
 386
 387 -- | The label type from the RFC1035 grammar:
 388 --
 389 --     <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
 390 --
 391 --   We allow the slightly more general syntax from RFC1123, Section 2.1:
 392 --
 393 --     The syntax of a legal Internet host name was specified in RFC-952
 394 --     [DNS:4].  One aspect of host name syntax is hereby changed: the
 395 --     restriction on the first character is relaxed to allow either a
 396 --     letter or a digit.  Host software MUST support this more liberal
 397 --     syntax.
 398 --
 399 data Label = Label Letter (Maybe LdhStrLetDig)
 400   deriving (Eq, Show)
 401
 402 instance Pretty Label where
 403   pretty_show (Label l Nothing) = pretty_show l
 404   pretty_show (Label l (Just s)) = (pretty_show l) ++ (pretty_show s)
 405
 406 -- | Parse a 'Label'.
 407 --
 408 --   In addition to the grammar, there's another restriction on
 409 --   labels: their length must be 63 characters or less. Quoting
 410 --   Section 2.3.1, \"Preferred name syntax\", of RFC1035:
 411 --
 412 --     The labels must follow the rules for ARPANET host names.  They
 413 --     must start with a letter, end with a letter or digit, and have
 414 --     as interior characters only letters, digits, and hyphen.  There
 415 --     are also some restrictions on the length.  Labels must be 63
 416 --     characters or less.
 417 --
 418 --   We check this only after we have successfully parsed a label.
 419 --
 420 --   ==== _Examples_
 421 --
 422 --   >>> import Text.Parsec ( parseTest )
 423 --
 424 --   Make sure we can parse a single character:
 425 --
 426 --   >>> parseTest label "a"
 427 --   Label (Letter 'a') Nothing
 428 --
 429 --   And longer strings:
 430 --
 431 --   >>> pretty_print $ parse label "" "abc-def"
 432 --   abc-def
 433 --
 434 --   But not anything ending in a hyphen:
 435 --
 436 --   >>> parseTest label "abc-"
 437 --   parse error at (line 1, column 5):
 438 --   label cannot end with a hyphen
 439 --
 440 --   Or anything over 63 characters:
 441 --
 442 --   >>> parseTest label (['a'..'z'] ++ ['a'..'z'] ++ ['a'..'z'])
 443 --   parse error at (line 1, column 79):
 444 --   labels must be 63 or fewer characters
 445 --
 446 --   However, /exactly/ 63 characters is acceptable:
 447 --
 448 --   >>> pretty_print $ parse label "" (replicate 63 'x')
 449 --   xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
 450 --
 451 label :: Parser Label
 452 label = do
 453   l <- letter -- Guaranteed to be there
 454   maybe_s <- optionMaybe ldh_str_let_dig -- Might not be there
 455   case maybe_s of
 456     -- It can only be one character long, from the letter...
 457     Nothing -> return $ Label l maybe_s
 458
 459     -- The letter gives us one character, so we check that the rest is
 460     -- less than 62 characters long. But in the error message we need
 461     -- to report 63.
 462     Just s  -> if (length_ldh_str_let_dig s) <= 62
 463               then return $ Label l maybe_s
 464               else fail "labels must be 63 or fewer characters"
 465
 466
 467
 468 -- * Subdomains
 469
 470
 471 -- | The data type representing a \"subdomain\" from RFC1035:
 472 --
 473 --     <subdomain> ::= <label> | <subdomain> "." <label>
 474 --
 475 --   We have reversed the order of the subdomain and label in the
 476 --   second option, however. This is explained in 'subdomain'.
 477 --
 478 data Subdomain =
 479   SubdomainSingleLabel Label |
 480   SubdomainMultipleLabel Label Subdomain
 481   deriving (Eq, Show)
 482
 483
 484
 485 instance Pretty Subdomain where
 486   pretty_show (SubdomainSingleLabel l) = pretty_show l
 487   pretty_show (SubdomainMultipleLabel l s) =
 488     (pretty_show l) ++ "." ++ (pretty_show s)
 489
 490 -- | Parse an RFC1035 \"subdomain\". The given grammar is,
 491 --
 492 --     <subdomain> ::= <label> | <subdomain> "." <label>
 493 --
 494 --   However, we have reversed the order of the subdomain and label to
 495 --   prevent infinite recursion. The second option (subdomain + label)
 496 --   is obviously more specific, we we need to try it first. This
 497 --   presents a problem: we're trying to parse a subdomain in terms of
 498 --   a subdomain! The given grammar represents subdomains how we like
 499 --   to think of them; from right to left. But it's better to parse
 500 --   from left to right, so we pick off the leading label and then
 501 --   recurse into the definition of subdomain.
 502 --
 503 --   According to RFC1034, Section 3.1, two neighboring labels in a
 504 --   DNS name cannot be equal:
 505 --
 506 --     Each node has a label, which is zero to 63 octets in length.  Brother
 507 --     nodes may not have the same label, although the same label can be used
 508 --     for nodes which are not brothers.  One label is reserved, and that is
 509 --     the null (i.e., zero length) label used for the root.
 510 --
 511 --   We enforce this restriction, but the result is usually that we
 512 --   only parse the part of the subdomain leading up to the repeated
 513 --   label.
 514 --
 515 --   ==== _Examples_
 516 --
 517 --   >>> import Text.Parsec ( parseTest )
 518 --
 519 --   Make sure we can parse a single character:
 520 --
 521 --   >>> parseTest subdomain "a"
 522 --   SubdomainSingleLabel (Label (Letter 'a') Nothing)
 523 --
 524 --   >>> pretty_print $ parse subdomain "" "example.com"
 525 --   example.com
 526 --
 527 --   >>> pretty_print $ parse subdomain "" "www.example.com"
 528 --   www.example.com
 529 --
 530 --   We reject a subdomain with equal neighbors, but this leads to
 531 --   only the single first label being parsed instead:
 532 --
 533 --   >>> pretty_print $ parse subdomain "" "www.www.example.com"
 534 --   www
 535 --
 536 --   But not one with a repeated but non-neighboring label:
 537 --
 538 --   >>> pretty_print $ parse subdomain "" "www.example.www.com"
 539 --   www.example.www.com
 540 --
 541 subdomain :: Parser Subdomain
 542 subdomain = try both <|> just_one
 543   where
 544     both :: Parser Subdomain
 545     both = do
 546       l <- label
 547       char '.'
 548       s <- subdomain
 549       let result = SubdomainMultipleLabel l s
 550       if (subdomain_has_equal_neighbors result)
 551       then fail "subdomain cannot have equal neighboring labels"
 552       else return result
 553
 554     just_one :: Parser Subdomain
 555     just_one = fmap SubdomainSingleLabel label
 556
 557
 558
 559 -- | Retrieve a list of labels contained in a 'Subdomain'.
 560 --
 561 --   ==== _Examples_
 562 --
 563 --   >>> let (Right r) = parse subdomain "" "a"
 564 --   >>> pretty_print $ subdomain_labels r
 565 --   ["a"]
 566 --
 567 --   >>> let (Right r) = parse subdomain "" "example.com"
 568 --   >>> pretty_print $ subdomain_labels r
 569 --   ["example","com"]
 570 --
 571 --   >>> let (Right r) = parse subdomain "" "www.example.com"
 572 --   >>> pretty_print $ subdomain_labels r
 573 --   ["www","example","com"]
 574 --
 575 subdomain_labels :: Subdomain -> [Label]
 576 subdomain_labels (SubdomainSingleLabel l) = [l]
 577 subdomain_labels (SubdomainMultipleLabel l s) = l : (subdomain_labels s)
 578
 579
 580 -- | Return a list of pairs of neighboring labels in a subdomain.
 581 --
 582 --   ==== _Examples_
 583 --
 584 --   >>> let (Right r) = parse subdomain "" "www.example.com"
 585 --   >>> pretty_print $ subdomain_label_neighbors r
 586 --   ["(\"www\",\"example\")","(\"example\",\"com\")"]
 587 --
 588 subdomain_label_neighbors :: Subdomain -> [(Label,Label)]
 589 subdomain_label_neighbors s =
 590   zip ls (tail ls)
 591   where
 592     ls = subdomain_labels s
 593
 594
 595 -- | Return @True@ if the subdomain has any two equal neighboring
 596 --   labels, and @False@ otherwise.
 597 --
 598 --   ==== _Examples_
 599 --
 600 --   >>> let (Right r) = parse subdomain "" "www.example.com"
 601 --   >>> subdomain_has_equal_neighbors r
 602 --   False
 603 --
 604 --   >>> let (Right l) = parse label "" "www"
 605 --   >>> let (Right s) = parse subdomain "" "www.example.com"
 606 --   >>> let bad_subdomain = SubdomainMultipleLabel l s
 607 --   >>> subdomain_has_equal_neighbors bad_subdomain
 608 --   True
 609 --
 610 subdomain_has_equal_neighbors :: Subdomain -> Bool
 611 subdomain_has_equal_neighbors s =
 612   or [ x == y | (x,y) <- subdomain_label_neighbors s ]
 613
 614
 615
 616 -- * Domains
 617
 618 -- | An RFC1035 domain. According to RFC1035 a domain can be either a
 619 -- subdomain or \" \", which according to RFC2181
 620 -- <https://tools.ietf.org/html/rfc2181#section-11> means the root:
 621 --
 622 --      The zero length full name is defined as representing the root
 623 --      of the DNS tree, and is typically written and displayed as
 624 --      \".\".
 625 --
 626 --   We let the 'Domain' type remain true to those RFCs, even though
 627 --   they don't support an absolute domain name of e.g. a single dot.
 628 --   We have one more data type 'UserDomain' which handles the possibility
 629 --   of an absolute path.
 630 --
 631 data Domain =
 632   DomainName Subdomain |
 633   DomainRoot
 634   deriving (Eq, Show)
 635
 636 instance Pretty Domain where
 637   pretty_show DomainRoot = ""
 638   pretty_show (DomainName s) = pretty_show s
 639
 640 -- | Parse an RFC1035 \"domain\"
 641 --
 642 --   ==== _Examples_
 643 --
 644 --   >>> import Text.Parsec ( parseTest )
 645 --
 646 --   Make sure we can parse a single character:
 647 --
 648 --   >>> parseTest domain "a"
 649 --   DomainName (SubdomainSingleLabel (Label (Letter 'a') Nothing))
 650 --
 651 --   And the empty domain:
 652 --
 653 --   >>> parseTest domain ""
 654 --   DomainRoot
 655 --
 656 --   We will in fact parse the \"empty\" domain off the front of
 657 --   pretty much anything:
 658 --
 659 --   >>> parseTest domain "8===D"
 660 --   DomainRoot
 661 --
 662 --   Equality of domains is case-insensitive:
 663 --
 664 --   >>> let (Right r1) = parse domain "" "example.com"
 665 --   >>> let (Right r2) = parse domain "" "ExaMPle.coM"
 666 --   >>> r1 == r2
 667 --   True
 668 --
 669 --   A single dot IS parsed as the root, but the dot isn't consumed:
 670 --
 671 --   >>> parseTest domain "."
 672 --   DomainRoot
 673 --
 674 --   Anything over 255 characters is an error, so the root will be
 675 --   parsed:
 676 --
 677 --   >>> let big_l1 = replicate 63 'x'
 678 --   >>> let big_l2 = replicate 63 'y' -- Avoid equal neighboring labels!
 679 --   >>> let big_labels = big_l1 ++ "." ++ big_l2 ++ "."
 680 --   >>> let big_subdomain = concat $ replicate 3 big_labels
 681 --   >>> parseTest domain big_subdomain
 682 --   DomainRoot
 683 --
 684 --   But exactly 255 is allowed:
 685 --
 686 --   >>> import Data.List ( intercalate )
 687 --   >>> let l1 = replicate 63 'w'
 688 --   >>> let l2 = replicate 63 'x'
 689 --   >>> let l3 = replicate 63 'y'
 690 --   >>> let l4 = replicate 63 'z'
 691 --   >>> let big_subdomain = intercalate "." [l1,l2,l3,l4]
 692 --   >>> let (Right r) = parse domain "" big_subdomain
 693 --   >>> length (pretty_show r)
 694 --   255
 695 --
 696 domain :: Parser Domain
 697 domain = try parse_subdomain <|> parse_empty
 698   where
 699     parse_subdomain :: Parser Domain
 700     parse_subdomain = do
 701       s <- subdomain
 702       if (length $ pretty_show s) <= 255
 703       then return $ DomainName s
 704       else fail "subdomains can be at most 255 characters"
 705
 706     parse_empty :: Parser Domain
 707     parse_empty = string "" >> return DomainRoot
 708
 709
 710
 711 -- * User domains
 712
 713 -- | This type helps clarify some murkiness in the DNS \"domain\" name
 714 --   specification. In RFC1034, it is acknowledged that a domain name
 715 --   input with a trailing \".\" will represent an absolute domain
 716 --   name (i.e. with respect to the DNS root). However, the grammar in
 717 --   RFC1035 disallows a trailing dot.
 718 --
 719 --   This makes some sense: within the DNS, everything knows its
 720 --   position in the tree. The relative/absolute distinction only
 721 --   makes sense on the client side, where a user's resolver might
 722 --   decide to append some suffix to a relative
 723 --   request. Unfortunately, that's where we live. So we have to deal
 724 --   with the possibility of having a trailing dot at the end of any
 725 --   domain name.
 726 --
 727 data UserDomain =
 728   UserDomainRelative Domain |
 729   UserDomainAbsolute Domain
 730   deriving (Eq, Show)
 731
 732 instance Pretty UserDomain where
 733   pretty_show (UserDomainRelative d) = pretty_show d
 734   pretty_show (UserDomainAbsolute d) = (pretty_show d) ++ "."
 735
 736
 737 -- | Parse a 'UserDomain'. This is what we'll be using to read user
 738 --   input, since it supports both relative and absolute domain names
 739 --   (unlike the implicitly-absolute 'Domain').
 740 --
 741 --   ==== _Examples_
 742 --
 743 --   >>> import Text.Parsec ( parseTest )
 744 --
 745 --   We can really parse the root now!
 746 --
 747 --   >>> parseTest user_domain "."
 748 --   UserDomainAbsolute DomainRoot
 749 --
 750 --   But multiple dots aren't (only the first):
 751 --
 752 --   >>> pretty_print $ parse user_domain "" ".."
 753 --   .
 754 --
 755 --   We can also optionally have a trailing dot at the end of a
 756 --   non-empty name:
 757 --
 758 --   >>>  pretty_print $ parse user_domain "" "www.example.com"
 759 --   www.example.com
 760 --
 761 --   >>>  pretty_print $ parse user_domain "" "www.example.com."
 762 --   www.example.com.
 763 --
 764 --   A \"relative root\" can also be parsed, letting the user's
 765 --   resolver deal with it:
 766 --
 767 --   >>> parseTest user_domain ""
 768 --   UserDomainRelative DomainRoot
 769 --
 770 user_domain :: Parser UserDomain
 771 user_domain = try absolute <|> relative
 772   where
 773     absolute :: Parser UserDomain
 774     absolute = do
 775       d <- domain
 776       r <- char '.'
 777       return $ UserDomainAbsolute d
 778
 779     relative :: Parser UserDomain
 780     relative = fmap UserDomainRelative domain