+
+-- * Subdomains
+
+
+-- | The data type representing a \"subdomain\" from RFC1035:
+--
+-- <subdomain> ::= <label> | <subdomain> "." <label>
+--
+-- We have reversed the order of the subdomain and label in the
+-- second option, however. This is explained in 'subdomain'.
+--
+data Subdomain =
+ SubdomainSingleLabel Label |
+ SubdomainMultipleLabel Label Subdomain
+ deriving (Eq, Show)
+
+
+
+instance Pretty Subdomain where
+ pretty_show (SubdomainSingleLabel l) = pretty_show l
+ pretty_show (SubdomainMultipleLabel l s) =
+ (pretty_show l) ++ "." ++ (pretty_show s)
+
+-- | Parse an RFC1035 \"subdomain\". The given grammar is,
+--
+-- <subdomain> ::= <label> | <subdomain> "." <label>
+--
+-- However, we have reversed the order of the subdomain and label to
+-- prevent infinite recursion. The second option (subdomain + label)
+-- is obviously more specific, we we need to try it first. This
+-- presents a problem: we're trying to parse a subdomain in terms of
+-- a subdomain! The given grammar represents subdomains how we like
+-- to think of them; from right to left. But it's better to parse
+-- from left to right, so we pick off the leading label and then
+-- recurse into the definition of subdomain.
+--
+-- According to RFC1034, Section 3.1, two neighboring labels in a
+-- DNS name cannot be equal:
+--
+-- Each node has a label, which is zero to 63 octets in length. Brother
+-- nodes may not have the same label, although the same label can be used
+-- for nodes which are not brothers. One label is reserved, and that is
+-- the null (i.e., zero length) label used for the root.
+--
+-- We enforce this restriction, but the result is usually that we
+-- only parse the part of the subdomain leading up to the repeated
+-- label.
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parse, parseTest )
+--
+-- Make sure we can parse a single character:
+--
+-- >>> parseTest subdomain "a"
+-- SubdomainSingleLabel (Label (Letter 'a') Nothing)
+--
+-- >>> pretty_print $ parse subdomain "" "example.com"
+-- example.com
+--
+-- >>> pretty_print $ parse subdomain "" "www.example.com"
+-- www.example.com
+--
+-- We reject a subdomain with equal neighbors, but this leads to
+-- only the single first label being parsed instead:
+--
+-- >>> pretty_print $ parse subdomain "" "www.www.example.com"
+-- www
+--
+-- But not one with a repeated but non-neighboring label:
+--
+-- >>> pretty_print $ parse subdomain "" "www.example.www.com"
+-- www.example.www.com
+--
+subdomain :: Parser Subdomain
+subdomain = try both <|> just_one
+ where
+ both :: Parser Subdomain
+ both = do
+ l <- label
+ _ <- char '.'
+ s <- subdomain
+ let result = SubdomainMultipleLabel l s
+ if (subdomain_has_equal_neighbors result)
+ then fail "subdomain cannot have equal neighboring labels"
+ else return result
+
+ just_one :: Parser Subdomain
+ just_one = fmap SubdomainSingleLabel label
+
+
+
+-- | Retrieve a list of labels contained in a 'Subdomain'.
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parse )
+--
+-- >>> let (Right r) = parse subdomain "" "a"
+-- >>> pretty_print $ subdomain_labels r
+-- ["a"]
+--
+-- >>> let (Right r) = parse subdomain "" "example.com"
+-- >>> pretty_print $ subdomain_labels r
+-- ["example","com"]
+--
+-- >>> let (Right r) = parse subdomain "" "www.example.com"
+-- >>> pretty_print $ subdomain_labels r
+-- ["www","example","com"]
+--
+subdomain_labels :: Subdomain -> [Label]
+subdomain_labels (SubdomainSingleLabel l) = [l]
+subdomain_labels (SubdomainMultipleLabel l s) = l : (subdomain_labels s)
+
+
+-- | Return a list of pairs of neighboring labels in a subdomain.
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parse )
+-- >>> let (Right r) = parse subdomain "" "www.example.com"
+-- >>> pretty_print $ subdomain_label_neighbors r
+-- ["(\"www\",\"example\")","(\"example\",\"com\")"]
+--
+subdomain_label_neighbors :: Subdomain -> [(Label,Label)]
+subdomain_label_neighbors s =
+ zip ls (tail ls)
+ where
+ ls = subdomain_labels s
+
+
+-- | Return @True@ if the subdomain has any two equal neighboring
+-- labels, and @False@ otherwise.
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parse )
+--
+-- >>> let (Right r) = parse subdomain "" "www.example.com"
+-- >>> subdomain_has_equal_neighbors r
+-- False
+--
+-- >>> let (Right l) = parse label "" "www"
+-- >>> let (Right s) = parse subdomain "" "www.example.com"
+-- >>> let bad_subdomain = SubdomainMultipleLabel l s
+-- >>> subdomain_has_equal_neighbors bad_subdomain
+-- True
+--
+subdomain_has_equal_neighbors :: Subdomain -> Bool
+subdomain_has_equal_neighbors s =
+ or [ x == y | (x,y) <- subdomain_label_neighbors s ]
+
+
+
+-- * Domains
+
+-- | An RFC1035 domain. According to RFC1035 a domain can be either a
+-- subdomain or \" \", which according to RFC2181
+-- <https://tools.ietf.org/html/rfc2181#section-11> means the root:
+--
+-- The zero length full name is defined as representing the root
+-- of the DNS tree, and is typically written and displayed as
+-- \".\".
+--
+-- We let the 'Domain' type remain true to those RFCs, even though
+-- they don't support an absolute domain name of e.g. a single dot.
+-- We have one more data type 'UserDomain' which handles the possibility
+-- of an absolute path.
+--
+data Domain =
+ DomainName Subdomain |
+ DomainRoot
+ deriving (Eq, Show)
+
+instance Pretty Domain where
+ pretty_show DomainRoot = ""
+ pretty_show (DomainName s) = pretty_show s
+
+-- | Parse an RFC1035 \"domain\"
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parse, parseTest )
+--
+-- Make sure we can parse a single character:
+--
+-- >>> parseTest domain "a"
+-- DomainName (SubdomainSingleLabel (Label (Letter 'a') Nothing))
+--
+-- And the empty domain:
+--
+-- >>> parseTest domain ""
+-- DomainRoot
+--
+-- We will in fact parse the \"empty\" domain off the front of
+-- pretty much anything:
+--
+-- >>> parseTest domain "8===D"
+-- DomainRoot
+--
+-- Equality of domains is case-insensitive:
+--
+-- >>> let (Right r1) = parse domain "" "example.com"
+-- >>> let (Right r2) = parse domain "" "ExaMPle.coM"
+-- >>> r1 == r2
+-- True
+--
+-- A single dot IS parsed as the root, but the dot isn't consumed:
+--
+-- >>> parseTest domain "."
+-- DomainRoot
+--
+-- Anything over 255 characters is an error, so the root will be
+-- parsed:
+--
+-- >>> let big_l1 = replicate 63 'x'
+-- >>> let big_l2 = replicate 63 'y' -- Avoid equal neighboring labels!
+-- >>> let big_labels = big_l1 ++ "." ++ big_l2 ++ "."
+-- >>> let big_subdomain = concat $ replicate 3 big_labels
+-- >>> parseTest domain big_subdomain
+-- DomainRoot
+--
+-- But exactly 255 is allowed:
+--
+-- >>> import Data.List ( intercalate )
+-- >>> let l1 = replicate 63 'w'
+-- >>> let l2 = replicate 63 'x'
+-- >>> let l3 = replicate 63 'y'
+-- >>> let l4 = replicate 63 'z'
+-- >>> let big_subdomain = intercalate "." [l1,l2,l3,l4]
+-- >>> let (Right r) = parse domain "" big_subdomain
+-- >>> length (pretty_show r)
+-- 255
+--