--- /dev/null
+{-# LANGUAGE DoAndIfThenElse #-}
+
+-- | The 'Domain' data type and its parser. A 'Domain' represents a
+-- name in the domain name system (DNS) as described by
+-- RFC1035. In particular, we enforce the restrictions from Section
+-- 2.3.1 \"Preferred name syntax\". See for example,
+--
+-- <https://tools.ietf.org/html/rfc1035#section-2.3.1>
+--
+-- We basically work with strings and characters everywhere, even
+-- though this isn't really correct. The length specifications in
+-- the RFCs are all in terms of octets, so really a ByteString.Char8
+-- would be more appropriate. With strings, for example, we could
+-- have a unicode mumbo jumbo character that takes up two bytes
+-- (octets).
+--
+module Network.DNS.RBL.Domain (
+ UserDomain,
+ user_domain )
+where
+
+import Data.Char ( toLower )
+import Text.Parsec (
+ (<|>),
+ char,
+ optionMaybe,
+ string,
+ try )
+import qualified Text.Parsec as Parsec ( digit, letter)
+import Text.Parsec.String ( Parser )
+
+import Network.DNS.RBL.Pretty ( Pretty(..) )
+
+-- * Digits
+
+-- | A wrapper around a digit character.
+--
+newtype Digit = Digit Char deriving (Eq, Show)
+instance Pretty Digit where pretty_show (Digit d) = [d]
+
+-- | Parse a single digit, but wrap it in our 'Digit' type.
+--
+digit :: Parser Digit
+digit = fmap Digit Parsec.digit
+
+
+-- * Letters
+
+-- | A wrapper around a letter character.
+--
+newtype Letter = Letter Char deriving (Show)
+instance Pretty Letter where pretty_show (Letter l) = [l]
+
+
+-- | Parse a single letter, but wrap it in our 'Letter' type.
+--
+letter :: Parser Letter
+letter = fmap Letter Parsec.letter
+
+-- | The derived instance of 'Eq' for letters is incorrect. All
+-- comparisons should be made case-insensitively. The following
+-- is an excerpt from RFC1035:
+--
+-- 2.3.3. Character Case
+--
+-- For all parts of the DNS that are part of the official
+-- protocol, all comparisons between character strings (e.g.,
+-- labels, domain names, etc.) are done in a case-insensitive
+-- manner...
+--
+-- Since each part of DNS name is composed of our custom types, it
+-- suffices to munge the equality for 'Letter'. RFC4343
+-- <https://tools.ietf.org/html/rfc4343> clarifies the
+-- case-insensitivity rules, but the fact that we're treating DNS
+-- names as strings makes most of those problems go away (in
+-- exchange for new ones).
+--
+instance Eq Letter where
+ (Letter l1) == (Letter l2) = (toLower l1) == (toLower l2)
+
+-- * Letters/Digits
+
+-- | A sum type representing either a letter or a digit.
+--
+data LetDig =
+ LetDigLetter Letter |
+ LetDigDigit Digit
+ deriving (Eq, Show)
+
+instance Pretty LetDig where
+ pretty_show (LetDigLetter l) = pretty_show l
+ pretty_show (LetDigDigit d) = pretty_show d
+
+-- | Parse a letter or a digit and wrap it in our 'LetDig' type.
+--
+let_dig :: Parser LetDig
+let_dig = (fmap LetDigLetter letter) <|> (fmap LetDigDigit digit)
+
+
+-- * Hyphens
+
+-- | A wrapper around a single hyphen character.
+--
+newtype Hyphen = Hyphen Char deriving (Eq, Show)
+instance Pretty Hyphen where pretty_show (Hyphen h) = [h]
+
+-- | Parse a single hyphen and wrap it in our 'Hyphen' type.
+--
+hyphen :: Parser Hyphen
+hyphen = fmap Hyphen (char '-')
+
+
+-- * Letter, Digit, or Hyphen.
+
+-- | A sum type representing a letter, digit, or hyphen.
+--
+data LetDigHyp =
+ LetDigHypLetDig LetDig |
+ LetDigHypHyphen Hyphen
+ deriving (Eq, Show)
+
+instance Pretty LetDigHyp where
+ pretty_show (LetDigHypLetDig ld) = pretty_show ld
+ pretty_show (LetDigHypHyphen h) = pretty_show h
+
+
+-- | The following is the simplest type in the domain grammar that
+-- isn't already implemented for us.
+--
+-- <let-dig> ::= <letter> | <digit>
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parseTest )
+--
+-- Letters, digits, and hyphens are all parsed:
+--
+-- >>> parseTest let_dig_hyp "a"
+-- LetDigHypLetDig (LetDigLetter (Letter 'a'))
+--
+-- >>> parseTest let_dig_hyp "7"
+-- LetDigHypLetDig (LetDigDigit (Digit '7'))
+--
+-- >>> parseTest let_dig_hyp "-"
+-- LetDigHypHyphen (Hyphen '-')
+--
+-- However, an underscore (for example) is not:
+--
+-- >>> parseTest let_dig_hyp "_"
+-- parse error at (line 1, column 1):
+-- unexpected "_"
+-- expecting letter, digit or "-"
+--
+let_dig_hyp :: Parser LetDigHyp
+let_dig_hyp =
+ parse_letdig <|> parse_hyphen
+ where
+ parse_letdig :: Parser LetDigHyp
+ parse_letdig = fmap LetDigHypLetDig let_dig
+
+ parse_hyphen :: Parser LetDigHyp
+ parse_hyphen = fmap LetDigHypHyphen hyphen
+
+
+-- * Letter/Digit/Hyphen strings
+
+-- | A string of letters, digits, and hyphens from the RFC1035 grammar:
+--
+-- <ldh-str> ::= <let-dig-hyp> | <let-dig-hyp> <ldh-str>
+--
+-- These are represented as either a single instance of a
+-- 'LetDigHyp', or a string of them (recursive).
+--
+data LdhStr =
+ LdhStrSingleLdh LetDigHyp |
+ LdhStrMultipleLdh LetDigHyp LdhStr
+ deriving (Eq, Show)
+
+instance Pretty LdhStr where
+ pretty_show (LdhStrSingleLdh ldh) = pretty_show ldh
+ pretty_show (LdhStrMultipleLdh ldh s) = (pretty_show ldh) ++ (pretty_show s)
+
+-- | Parse a string of letters, digits, and hyphens (an 'LdhStr').
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parseTest )
+--
+-- Single letters, digits, and hyphens are parsed:
+--
+-- >>> parseTest ldh_str "a"
+-- LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a')))
+--
+-- >>> parseTest ldh_str "0"
+-- LdhStrSingleLdh (LetDigHypLetDig (LetDigDigit (Digit '0')))
+--
+-- >>> parseTest ldh_str "-"
+-- LdhStrSingleLdh (LetDigHypHyphen (Hyphen '-'))
+--
+-- As well as strings of them:
+--
+-- >>> import Text.Parsec ( parse )
+-- >>> pretty_print $ parse ldh_str "" "a0-b"
+-- a0-b
+--
+ldh_str :: Parser LdhStr
+ldh_str = try both <|> just_one
+ where
+ both :: Parser LdhStr
+ both = do
+ ldh1 <- let_dig_hyp
+ ldh_tail <- ldh_str
+ return $ LdhStrMultipleLdh ldh1 ldh_tail
+
+ just_one :: Parser LdhStr
+ just_one = fmap LdhStrSingleLdh let_dig_hyp
+
+
+
+-- | A version of 'last' that works on a 'LdhStr' rather than a
+-- list. That is, it returns the last 'LetDigHyp' in the
+-- string. Since 'LdhStr' contains at least one character, there's
+-- no \"nil\" case here.
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parse )
+--
+-- >>> let (Right r) = parse ldh_str "" "a"
+-- >>> last_ldh_str r
+-- LetDigHypLetDig (LetDigLetter (Letter 'a'))
+--
+-- >>> let (Right r) = parse ldh_str "" "abc-def"
+-- >>> last_ldh_str r
+-- LetDigHypLetDig (LetDigLetter (Letter 'f'))
+--
+last_ldh_str :: LdhStr -> LetDigHyp
+last_ldh_str (LdhStrSingleLdh x) = x
+last_ldh_str (LdhStrMultipleLdh _ x) = last_ldh_str x
+
+
+-- | A version of 'init' that works on a 'LdhStr' rather than a
+-- list. That is, it returns everything /except/ the last character in
+-- the string.
+--
+-- Since an 'LdhStr' must contain at least one character, this might
+-- not be opssible (when the input is of length one). So, we return
+-- a 'Maybe' value.
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parse )
+--
+-- >>> let (Right r) = parse ldh_str "" "a"
+-- >>> init_ldh_str r
+-- Nothing
+--
+-- >>> let (Right r) = parse ldh_str "" "ab"
+-- >>> init_ldh_str r
+-- Just (LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a'))))
+--
+-- >>> let (Right r) = parse ldh_str "" "abc-def"
+-- >>> init_ldh_str r
+-- Just (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a'))) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'b'))) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'c'))) (LdhStrMultipleLdh (LetDigHypHyphen (Hyphen '-')) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'd'))) (LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'e')))))))))
+--
+init_ldh_str :: LdhStr -> Maybe LdhStr
+init_ldh_str (LdhStrSingleLdh _) = Nothing
+init_ldh_str (LdhStrMultipleLdh h t) =
+ Just $ case (init_ldh_str t) of
+ -- We just got the second-to-last character, we're done.
+ Nothing -> LdhStrSingleLdh h
+
+ -- There's still more stuff. Recurse.
+ Just rest -> LdhStrMultipleLdh h rest
+
+
+-- | Compute the length of an 'LdhStr'. It will be at least one, since
+-- 'LdhStr's are non-empty. And if there's something other than the
+-- first character present, we simply recurse.
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parse )
+--
+-- >>> let (Right r) = parse ldh_str "" "a"
+-- >>> length_ldh_str r
+-- 1
+--
+-- >>> let (Right r) = parse ldh_str "" "abc-def"
+-- >>> length_ldh_str r
+-- 7
+--
+length_ldh_str :: LdhStr -> Int
+length_ldh_str (LdhStrSingleLdh _) = 1
+length_ldh_str (LdhStrMultipleLdh _ t) = 1 + (length_ldh_str t)
+
+-- * Letter/Digit/Hyphen string followed by a trailing Letter/Digit
+
+-- | This type isn't explicitly part of the grammar, but it's what
+-- shows up in the square brackets of,
+--
+-- <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
+--
+-- The ldh-str is optional, but if one is present, we must also have
+-- a trailing let-dig to prevent the name from ending with a
+-- hyphen. This can be represented with a @Maybe LdhStrLetDig@,
+-- which is why we're about to define it.
+--
+data LdhStrLetDig = LdhStrLetDig (Maybe LdhStr) LetDig
+ deriving (Eq, Show)
+
+instance Pretty LdhStrLetDig where
+ pretty_show (LdhStrLetDig Nothing ld) = pretty_show ld
+ pretty_show (LdhStrLetDig (Just s) ld) = (pretty_show s) ++ (pretty_show ld)
+
+-- | Parse an 'LdhStrLetDig'. This isn't in the grammar, but we might
+-- as well define the parser for it independently since we gave it
+-- its own data type.
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parse, parseTest )
+--
+-- Make sure we can parse a single character:
+--
+-- >>> parseTest ldh_str_let_dig "a"
+-- LdhStrLetDig Nothing (LetDigLetter (Letter 'a'))
+--
+-- And longer strings:
+--
+-- >>> pretty_print $ parse ldh_str_let_dig "" "ab"
+-- ab
+--
+-- >>> pretty_print $ parse ldh_str_let_dig "" "-b"
+-- -b
+--
+-- >>> parseTest ldh_str_let_dig "b-"
+-- parse error at (line 1, column 3):
+-- label cannot end with a hyphen
+--
+ldh_str_let_dig :: Parser LdhStrLetDig
+ldh_str_let_dig = do
+ -- This will happily eat up the trailing let-dig...
+ full_ldh <- ldh_str
+
+ -- So we have to go back and see what happened.
+ case (last_ldh_str full_ldh) of
+ (LetDigHypHyphen _) -> fail "label cannot end with a hyphen"
+ (LetDigHypLetDig ld) ->
+ -- Ok, the label didn't end with a hyphen; now we need to split
+ -- off the last letter/digit so we can pack it into our return
+ -- type separately.
+ return $ case (init_ldh_str full_ldh) of
+ -- We only parsed one letter/digit. This can happen
+ -- if the label contains two characters. For example,
+ -- if we try to parse the label "ab", then the "a"
+ -- will be eaten by the label parser, and this
+ -- function will be left with only "b".
+ Nothing -> LdhStrLetDig Nothing ld
+
+ -- Usual case: there's was some leading let-dig-hyp junk,
+ -- return it too.
+ leading_ldhs -> LdhStrLetDig leading_ldhs ld
+
+
+
+-- | Compute the length of a 'LdhStrLetDig'. It's at least one, since
+-- the let-dig at the end is always there. And when there's an
+-- ldh-str too, we add its length to one.
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parse )
+--
+-- >>> let (Right r) = parse ldh_str_let_dig "" "a"
+-- >>> length_ldh_str_let_dig r
+-- 1
+--
+-- >>> let (Right r) = parse ldh_str_let_dig "" "abc-def"
+-- >>> length_ldh_str_let_dig r
+-- 7
+--
+length_ldh_str_let_dig :: LdhStrLetDig -> Int
+length_ldh_str_let_dig (LdhStrLetDig Nothing _) = 1
+length_ldh_str_let_dig (LdhStrLetDig (Just ldhstring) _) =
+ 1 + (length_ldh_str ldhstring)
+
+
+-- * Labels
+
+-- | The label type from the RFC1035 grammar:
+--
+-- <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
+--
+-- We allow the slightly more general syntax from RFC1123, Section 2.1:
+--
+-- The syntax of a legal Internet host name was specified in RFC-952
+-- [DNS:4]. One aspect of host name syntax is hereby changed: the
+-- restriction on the first character is relaxed to allow either a
+-- letter or a digit. Host software MUST support this more liberal
+-- syntax.
+--
+data Label = Label Letter (Maybe LdhStrLetDig)
+ deriving (Eq, Show)
+
+instance Pretty Label where
+ pretty_show (Label l Nothing) = pretty_show l
+ pretty_show (Label l (Just s)) = (pretty_show l) ++ (pretty_show s)
+
+-- | Parse a 'Label'.
+--
+-- In addition to the grammar, there's another restriction on
+-- labels: their length must be 63 characters or less. Quoting
+-- Section 2.3.1, \"Preferred name syntax\", of RFC1035:
+--
+-- The labels must follow the rules for ARPANET host names. They
+-- must start with a letter, end with a letter or digit, and have
+-- as interior characters only letters, digits, and hyphen. There
+-- are also some restrictions on the length. Labels must be 63
+-- characters or less.
+--
+-- We check this only after we have successfully parsed a label.
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parse, parseTest )
+--
+-- Make sure we can parse a single character:
+--
+-- >>> parseTest label "a"
+-- Label (Letter 'a') Nothing
+--
+-- And longer strings:
+--
+-- >>> pretty_print $ parse label "" "abc-def"
+-- abc-def
+--
+-- But not anything ending in a hyphen:
+--
+-- >>> parseTest label "abc-"
+-- parse error at (line 1, column 5):
+-- label cannot end with a hyphen
+--
+-- Or anything over 63 characters:
+--
+-- >>> parseTest label (['a'..'z'] ++ ['a'..'z'] ++ ['a'..'z'])
+-- parse error at (line 1, column 79):
+-- labels must be 63 or fewer characters
+--
+-- However, /exactly/ 63 characters is acceptable:
+--
+-- >>> pretty_print $ parse label "" (replicate 63 'x')
+-- xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+--
+label :: Parser Label
+label = do
+ l <- letter -- Guaranteed to be there
+ maybe_s <- optionMaybe ldh_str_let_dig -- Might not be there
+ case maybe_s of
+ -- It can only be one character long, from the letter...
+ Nothing -> return $ Label l maybe_s
+
+ -- The letter gives us one character, so we check that the rest is
+ -- less than 62 characters long. But in the error message we need
+ -- to report 63.
+ Just s -> if (length_ldh_str_let_dig s) <= 62
+ then return $ Label l maybe_s
+ else fail "labels must be 63 or fewer characters"
+
+
+
+-- * Subdomains
+
+
+-- | The data type representing a \"subdomain\" from RFC1035:
+--
+-- <subdomain> ::= <label> | <subdomain> "." <label>
+--
+-- We have reversed the order of the subdomain and label in the
+-- second option, however. This is explained in 'subdomain'.
+--
+data Subdomain =
+ SubdomainSingleLabel Label |
+ SubdomainMultipleLabel Label Subdomain
+ deriving (Eq, Show)
+
+
+
+instance Pretty Subdomain where
+ pretty_show (SubdomainSingleLabel l) = pretty_show l
+ pretty_show (SubdomainMultipleLabel l s) =
+ (pretty_show l) ++ "." ++ (pretty_show s)
+
+-- | Parse an RFC1035 \"subdomain\". The given grammar is,
+--
+-- <subdomain> ::= <label> | <subdomain> "." <label>
+--
+-- However, we have reversed the order of the subdomain and label to
+-- prevent infinite recursion. The second option (subdomain + label)
+-- is obviously more specific, we we need to try it first. This
+-- presents a problem: we're trying to parse a subdomain in terms of
+-- a subdomain! The given grammar represents subdomains how we like
+-- to think of them; from right to left. But it's better to parse
+-- from left to right, so we pick off the leading label and then
+-- recurse into the definition of subdomain.
+--
+-- According to RFC1034, Section 3.1, two neighboring labels in a
+-- DNS name cannot be equal:
+--
+-- Each node has a label, which is zero to 63 octets in length. Brother
+-- nodes may not have the same label, although the same label can be used
+-- for nodes which are not brothers. One label is reserved, and that is
+-- the null (i.e., zero length) label used for the root.
+--
+-- We enforce this restriction, but the result is usually that we
+-- only parse the part of the subdomain leading up to the repeated
+-- label.
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parse, parseTest )
+--
+-- Make sure we can parse a single character:
+--
+-- >>> parseTest subdomain "a"
+-- SubdomainSingleLabel (Label (Letter 'a') Nothing)
+--
+-- >>> pretty_print $ parse subdomain "" "example.com"
+-- example.com
+--
+-- >>> pretty_print $ parse subdomain "" "www.example.com"
+-- www.example.com
+--
+-- We reject a subdomain with equal neighbors, but this leads to
+-- only the single first label being parsed instead:
+--
+-- >>> pretty_print $ parse subdomain "" "www.www.example.com"
+-- www
+--
+-- But not one with a repeated but non-neighboring label:
+--
+-- >>> pretty_print $ parse subdomain "" "www.example.www.com"
+-- www.example.www.com
+--
+subdomain :: Parser Subdomain
+subdomain = try both <|> just_one
+ where
+ both :: Parser Subdomain
+ both = do
+ l <- label
+ _ <- char '.'
+ s <- subdomain
+ let result = SubdomainMultipleLabel l s
+ if (subdomain_has_equal_neighbors result)
+ then fail "subdomain cannot have equal neighboring labels"
+ else return result
+
+ just_one :: Parser Subdomain
+ just_one = fmap SubdomainSingleLabel label
+
+
+
+-- | Retrieve a list of labels contained in a 'Subdomain'.
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parse )
+--
+-- >>> let (Right r) = parse subdomain "" "a"
+-- >>> pretty_print $ subdomain_labels r
+-- ["a"]
+--
+-- >>> let (Right r) = parse subdomain "" "example.com"
+-- >>> pretty_print $ subdomain_labels r
+-- ["example","com"]
+--
+-- >>> let (Right r) = parse subdomain "" "www.example.com"
+-- >>> pretty_print $ subdomain_labels r
+-- ["www","example","com"]
+--
+subdomain_labels :: Subdomain -> [Label]
+subdomain_labels (SubdomainSingleLabel l) = [l]
+subdomain_labels (SubdomainMultipleLabel l s) = l : (subdomain_labels s)
+
+
+-- | Return a list of pairs of neighboring labels in a subdomain.
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parse )
+-- >>> let (Right r) = parse subdomain "" "www.example.com"
+-- >>> pretty_print $ subdomain_label_neighbors r
+-- ["(\"www\",\"example\")","(\"example\",\"com\")"]
+--
+subdomain_label_neighbors :: Subdomain -> [(Label,Label)]
+subdomain_label_neighbors s =
+ zip ls (tail ls)
+ where
+ ls = subdomain_labels s
+
+
+-- | Return @True@ if the subdomain has any two equal neighboring
+-- labels, and @False@ otherwise.
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parse )
+--
+-- >>> let (Right r) = parse subdomain "" "www.example.com"
+-- >>> subdomain_has_equal_neighbors r
+-- False
+--
+-- >>> let (Right l) = parse label "" "www"
+-- >>> let (Right s) = parse subdomain "" "www.example.com"
+-- >>> let bad_subdomain = SubdomainMultipleLabel l s
+-- >>> subdomain_has_equal_neighbors bad_subdomain
+-- True
+--
+subdomain_has_equal_neighbors :: Subdomain -> Bool
+subdomain_has_equal_neighbors s =
+ or [ x == y | (x,y) <- subdomain_label_neighbors s ]
+
+
+
+-- * Domains
+
+-- | An RFC1035 domain. According to RFC1035 a domain can be either a
+-- subdomain or \" \", which according to RFC2181
+-- <https://tools.ietf.org/html/rfc2181#section-11> means the root:
+--
+-- The zero length full name is defined as representing the root
+-- of the DNS tree, and is typically written and displayed as
+-- \".\".
+--
+-- We let the 'Domain' type remain true to those RFCs, even though
+-- they don't support an absolute domain name of e.g. a single dot.
+-- We have one more data type 'UserDomain' which handles the possibility
+-- of an absolute path.
+--
+data Domain =
+ DomainName Subdomain |
+ DomainRoot
+ deriving (Eq, Show)
+
+instance Pretty Domain where
+ pretty_show DomainRoot = ""
+ pretty_show (DomainName s) = pretty_show s
+
+-- | Parse an RFC1035 \"domain\"
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parse, parseTest )
+--
+-- Make sure we can parse a single character:
+--
+-- >>> parseTest domain "a"
+-- DomainName (SubdomainSingleLabel (Label (Letter 'a') Nothing))
+--
+-- And the empty domain:
+--
+-- >>> parseTest domain ""
+-- DomainRoot
+--
+-- We will in fact parse the \"empty\" domain off the front of
+-- pretty much anything:
+--
+-- >>> parseTest domain "8===D"
+-- DomainRoot
+--
+-- Equality of domains is case-insensitive:
+--
+-- >>> let (Right r1) = parse domain "" "example.com"
+-- >>> let (Right r2) = parse domain "" "ExaMPle.coM"
+-- >>> r1 == r2
+-- True
+--
+-- A single dot IS parsed as the root, but the dot isn't consumed:
+--
+-- >>> parseTest domain "."
+-- DomainRoot
+--
+-- Anything over 255 characters is an error, so the root will be
+-- parsed:
+--
+-- >>> let big_l1 = replicate 63 'x'
+-- >>> let big_l2 = replicate 63 'y' -- Avoid equal neighboring labels!
+-- >>> let big_labels = big_l1 ++ "." ++ big_l2 ++ "."
+-- >>> let big_subdomain = concat $ replicate 3 big_labels
+-- >>> parseTest domain big_subdomain
+-- DomainRoot
+--
+-- But exactly 255 is allowed:
+--
+-- >>> import Data.List ( intercalate )
+-- >>> let l1 = replicate 63 'w'
+-- >>> let l2 = replicate 63 'x'
+-- >>> let l3 = replicate 63 'y'
+-- >>> let l4 = replicate 63 'z'
+-- >>> let big_subdomain = intercalate "." [l1,l2,l3,l4]
+-- >>> let (Right r) = parse domain "" big_subdomain
+-- >>> length (pretty_show r)
+-- 255
+--
+domain :: Parser Domain
+domain = try parse_subdomain <|> parse_empty
+ where
+ parse_subdomain :: Parser Domain
+ parse_subdomain = do
+ s <- subdomain
+ if length (pretty_show s) <= 255
+ then return $ DomainName s
+ else fail "subdomains can be at most 255 characters"
+
+ parse_empty :: Parser Domain
+ parse_empty = string "" >> return DomainRoot
+
+
+
+-- * User domains
+
+-- | This type helps clarify some murkiness in the DNS \"domain\" name
+-- specification. In RFC1034, it is acknowledged that a domain name
+-- input with a trailing \".\" will represent an absolute domain
+-- name (i.e. with respect to the DNS root). However, the grammar in
+-- RFC1035 disallows a trailing dot.
+--
+-- This makes some sense: within the DNS, everything knows its
+-- position in the tree. The relative/absolute distinction only
+-- makes sense on the client side, where a user's resolver might
+-- decide to append some suffix to a relative
+-- request. Unfortunately, that's where we live. So we have to deal
+-- with the possibility of having a trailing dot at the end of any
+-- domain name.
+--
+data UserDomain =
+ UserDomainRelative Domain |
+ UserDomainAbsolute Domain
+ deriving (Eq, Show)
+
+instance Pretty UserDomain where
+ pretty_show (UserDomainRelative d) = pretty_show d
+ pretty_show (UserDomainAbsolute d) = (pretty_show d) ++ "."
+
+
+-- | Parse a 'UserDomain'. This is what we'll be using to read user
+-- input, since it supports both relative and absolute domain names
+-- (unlike the implicitly-absolute 'Domain').
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parse, parseTest )
+--
+-- We can really parse the root now!
+--
+-- >>> parseTest user_domain "."
+-- UserDomainAbsolute DomainRoot
+--
+-- But multiple dots aren't (only the first):
+--
+-- >>> pretty_print $ parse user_domain "" ".."
+-- .
+--
+-- We can also optionally have a trailing dot at the end of a
+-- non-empty name:
+--
+-- >>> pretty_print $ parse user_domain "" "www.example.com"
+-- www.example.com
+--
+-- >>> pretty_print $ parse user_domain "" "www.example.com."
+-- www.example.com.
+--
+-- A \"relative root\" can also be parsed, letting the user's
+-- resolver deal with it:
+--
+-- >>> parseTest user_domain ""
+-- UserDomainRelative DomainRoot
+--
+user_domain :: Parser UserDomain
+user_domain = try absolute <|> relative
+ where
+ absolute :: Parser UserDomain
+ absolute = do
+ d <- domain
+ _ <- char '.'
+ return $ UserDomainAbsolute d
+
+ relative :: Parser UserDomain
+ relative = fmap UserDomainRelative domain