-- manner...
--
-- Since each part of DNS name is composed of our custom types, it
--- suffices to munge the equality for 'Letter'.
+-- suffices to munge the equality for 'Letter'. RFC4343
+-- <https://tools.ietf.org/html/rfc4343> clarifies the
+-- case-insensitivity rules, but the fact that we're treating DNS
+-- names as strings makes most of those problems go away (in
+-- exchange for new ones).
--
instance Eq Letter where
(Letter l1) == (Letter l2) = (toLower l1) == (toLower l2)
--
-- <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
--
+-- We allow the slightly more general syntax from RFC1123, Section 2.1:
+--
+-- The syntax of a legal Internet host name was specified in RFC-952
+-- [DNS:4]. One aspect of host name syntax is hereby changed: the
+-- restriction on the first character is relaxed to allow either a
+-- letter or a digit. Host software MUST support this more liberal
+-- syntax.
+--
data Label = Label Letter (Maybe LdhStrLetDig)
deriving (Eq, Show)
SubdomainMultipleLabel Label Subdomain
deriving (Eq, Show)
+
+
instance Pretty Subdomain where
pretty_show (SubdomainSingleLabel l) = pretty_show l
pretty_show (SubdomainMultipleLabel l s) =
-- from left to right, so we pick off the leading label and then
-- recurse into the definition of subdomain.
--
+-- According to RFC1034, Section 3.1, two neighboring labels in a
+-- DNS name cannot be equal:
+--
+-- Each node has a label, which is zero to 63 octets in length. Brother
+-- nodes may not have the same label, although the same label can be used
+-- for nodes which are not brothers. One label is reserved, and that is
+-- the null (i.e., zero length) label used for the root.
+--
+-- We enforce this restriction, but the result is usually that we
+-- only parse the part of the subdomain leading up to the repeated
+-- label.
+--
-- ==== _Examples_
--
-- >>> import Text.Parsec ( parseTest )
-- >>> pretty_print $ parse subdomain "" "www.example.com"
-- www.example.com
--
+-- We reject a subdomain with equal neighbors, but this leads to
+-- only the single first label being parsed instead:
+--
+-- >>> pretty_print $ parse subdomain "" "www.www.example.com"
+-- www
+--
+-- But not one with a repeated but non-neighboring label:
+--
+-- >>> pretty_print $ parse subdomain "" "www.example.www.com"
+-- www.example.www.com
+--
subdomain :: Parser Subdomain
subdomain = try both <|> just_one
where
l <- label
char '.'
s <- subdomain
- return (SubdomainMultipleLabel l s)
+ let result = SubdomainMultipleLabel l s
+ if (subdomain_has_equal_neighbors result)
+ then fail "subdomain cannot have equal neighboring labels"
+ else return result
just_one :: Parser Subdomain
just_one = fmap SubdomainSingleLabel label
+-- | Retrieve a list of labels contained in a 'Subdomain'.
+--
+-- ==== _Examples_
+--
+-- >>> let (Right r) = parse subdomain "" "a"
+-- >>> pretty_print $ subdomain_labels r
+-- ["a"]
+--
+-- >>> let (Right r) = parse subdomain "" "example.com"
+-- >>> pretty_print $ subdomain_labels r
+-- ["example","com"]
+--
+-- >>> let (Right r) = parse subdomain "" "www.example.com"
+-- >>> pretty_print $ subdomain_labels r
+-- ["www","example","com"]
+--
+subdomain_labels :: Subdomain -> [Label]
+subdomain_labels (SubdomainSingleLabel l) = [l]
+subdomain_labels (SubdomainMultipleLabel l s) = l : (subdomain_labels s)
+
+
+-- | Return a list of pairs of neighboring labels in a subdomain.
+--
+-- ==== _Examples_
+--
+-- >>> let (Right r) = parse subdomain "" "www.example.com"
+-- >>> pretty_print $ subdomain_label_neighbors r
+-- ["(\"www\",\"example\")","(\"example\",\"com\")"]
+--
+subdomain_label_neighbors :: Subdomain -> [(Label,Label)]
+subdomain_label_neighbors s =
+ zip ls (tail ls)
+ where
+ ls = subdomain_labels s
+
+
+-- | Return @True@ if the subdomain has any two equal neighboring
+-- labels, and @False@ otherwise.
+--
+-- ==== _Examples_
+--
+-- >>> let (Right r) = parse subdomain "" "www.example.com"
+-- >>> subdomain_has_equal_neighbors r
+-- False
+--
+-- >>> let (Right l) = parse label "" "www"
+-- >>> let (Right s) = parse subdomain "" "www.example.com"
+-- >>> let bad_subdomain = SubdomainMultipleLabel l s
+-- >>> subdomain_has_equal_neighbors bad_subdomain
+-- True
+--
+subdomain_has_equal_neighbors :: Subdomain -> Bool
+subdomain_has_equal_neighbors s =
+ or [ x == y | (x,y) <- subdomain_label_neighbors s ]
+
+
+
-- * Domains
-- | An RFC1035 domain. According to RFC1035 a domain can be either a
-- Anything over 255 characters is an error, so the root will be
-- parsed:
--
--- >>> let big_label = replicate 63 'x'
--- >>> let big_subdomain = concat $ replicate 5 (big_label ++ ".")
+-- >>> let big_l1 = replicate 63 'x'
+-- >>> let big_l2 = replicate 63 'y' -- Avoid equal neighboring labels!
+-- >>> let big_labels = big_l1 ++ "." ++ big_l2 ++ "."
+-- >>> let big_subdomain = concat $ replicate 3 big_labels
-- >>> parseTest domain big_subdomain
-- DomainRoot
--
-- But exactly 255 is allowed:
--
-- >>> import Data.List ( intercalate )
--- >>> let big_label = replicate 63 'x'
--- >>> let big_subdomain = intercalate "." (replicate 4 big_label)
+-- >>> let l1 = replicate 63 'w'
+-- >>> let l2 = replicate 63 'x'
+-- >>> let l3 = replicate 63 'y'
+-- >>> let l4 = replicate 63 'z'
+-- >>> let big_subdomain = intercalate "." [l1,l2,l3,l4]
-- >>> let (Right r) = parse domain "" big_subdomain
-- >>> length (pretty_show r)
-- 255