+{-# LANGUAGE DoAndIfThenElse #-}
+
-- | The 'Domain' data type and its parser. A 'Domain' represents a
-- name in the domain name system (DNS) as described by
-- RFC1035. In particular, we enforce the restrictions from Section
--
-- <https://tools.ietf.org/html/rfc1035#section-2.3.1>
--
-module Domain
+-- We basically work with strings and characters everywhere, even
+-- though this isn't really correct. The length specifications in
+-- the RFCs are all in terms of octets, so really a ByteString.Char8
+-- would be more appropriate. With strings, for example, we could
+-- have a unicode mumbo jumbo character that takes up two bytes
+-- (octets).
+--
+module Domain (
+ UserDomain,
+ user_domain )
where
import Data.Char ( toLower )
import Text.Parsec (
- ParseError,
(<|>),
- alphaNum,
char,
- eof,
- many1,
- option,
optionMaybe,
- parse,
string,
- try,
- unexpected )
+ try )
import qualified Text.Parsec as Parsec ( digit, letter)
import Text.Parsec.String ( Parser )
-- manner...
--
-- Since each part of DNS name is composed of our custom types, it
--- suffices to munge the equality for 'Letter'.
+-- suffices to munge the equality for 'Letter'. RFC4343
+-- <https://tools.ietf.org/html/rfc4343> clarifies the
+-- case-insensitivity rules, but the fact that we're treating DNS
+-- names as strings makes most of those problems go away (in
+-- exchange for new ones).
--
instance Eq Letter where
(Letter l1) == (Letter l2) = (toLower l1) == (toLower l2)
--
-- As well as strings of them:
--
+-- >>> import Text.Parsec ( parse )
-- >>> pretty_print $ parse ldh_str "" "a0-b"
-- a0-b
--
--
-- ==== _Examples_
--
+-- >>> import Text.Parsec ( parse )
+--
-- >>> let (Right r) = parse ldh_str "" "a"
-- >>> last_ldh_str r
-- LetDigHypLetDig (LetDigLetter (Letter 'a'))
--
-- ==== _Examples_
--
+-- >>> import Text.Parsec ( parse )
+--
-- >>> let (Right r) = parse ldh_str "" "a"
-- >>> init_ldh_str r
-- Nothing
--
-- ==== _Examples_
--
+-- >>> import Text.Parsec ( parse )
+--
-- >>> let (Right r) = parse ldh_str "" "a"
-- >>> length_ldh_str r
-- 1
--
-- ==== _Examples_
--
--- >>> import Text.Parsec ( parseTest )
+-- >>> import Text.Parsec ( parse, parseTest )
--
-- Make sure we can parse a single character:
--
--
-- ==== _Examples_
--
+-- >>> import Text.Parsec ( parse )
+--
-- >>> let (Right r) = parse ldh_str_let_dig "" "a"
-- >>> length_ldh_str_let_dig r
-- 1
--
-- <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
--
+-- We allow the slightly more general syntax from RFC1123, Section 2.1:
+--
+-- The syntax of a legal Internet host name was specified in RFC-952
+-- [DNS:4]. One aspect of host name syntax is hereby changed: the
+-- restriction on the first character is relaxed to allow either a
+-- letter or a digit. Host software MUST support this more liberal
+-- syntax.
+--
data Label = Label Letter (Maybe LdhStrLetDig)
deriving (Eq, Show)
--
-- ==== _Examples_
--
--- >>> import Text.Parsec ( parseTest )
+-- >>> import Text.Parsec ( parse, parseTest )
--
-- Make sure we can parse a single character:
--
SubdomainMultipleLabel Label Subdomain
deriving (Eq, Show)
+
+
instance Pretty Subdomain where
pretty_show (SubdomainSingleLabel l) = pretty_show l
pretty_show (SubdomainMultipleLabel l s) =
-- from left to right, so we pick off the leading label and then
-- recurse into the definition of subdomain.
--
+-- According to RFC1034, Section 3.1, two neighboring labels in a
+-- DNS name cannot be equal:
+--
+-- Each node has a label, which is zero to 63 octets in length. Brother
+-- nodes may not have the same label, although the same label can be used
+-- for nodes which are not brothers. One label is reserved, and that is
+-- the null (i.e., zero length) label used for the root.
+--
+-- We enforce this restriction, but the result is usually that we
+-- only parse the part of the subdomain leading up to the repeated
+-- label.
+--
-- ==== _Examples_
--
--- >>> import Text.Parsec ( parseTest )
+-- >>> import Text.Parsec ( parse, parseTest )
--
-- Make sure we can parse a single character:
--
-- >>> pretty_print $ parse subdomain "" "www.example.com"
-- www.example.com
--
+-- We reject a subdomain with equal neighbors, but this leads to
+-- only the single first label being parsed instead:
+--
+-- >>> pretty_print $ parse subdomain "" "www.www.example.com"
+-- www
+--
+-- But not one with a repeated but non-neighboring label:
+--
+-- >>> pretty_print $ parse subdomain "" "www.example.www.com"
+-- www.example.www.com
+--
subdomain :: Parser Subdomain
subdomain = try both <|> just_one
where
both :: Parser Subdomain
both = do
l <- label
- char '.'
+ _ <- char '.'
s <- subdomain
- return (SubdomainMultipleLabel l s)
+ let result = SubdomainMultipleLabel l s
+ if (subdomain_has_equal_neighbors result)
+ then fail "subdomain cannot have equal neighboring labels"
+ else return result
just_one :: Parser Subdomain
just_one = fmap SubdomainSingleLabel label
+-- | Retrieve a list of labels contained in a 'Subdomain'.
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parse )
+--
+-- >>> let (Right r) = parse subdomain "" "a"
+-- >>> pretty_print $ subdomain_labels r
+-- ["a"]
+--
+-- >>> let (Right r) = parse subdomain "" "example.com"
+-- >>> pretty_print $ subdomain_labels r
+-- ["example","com"]
+--
+-- >>> let (Right r) = parse subdomain "" "www.example.com"
+-- >>> pretty_print $ subdomain_labels r
+-- ["www","example","com"]
+--
+subdomain_labels :: Subdomain -> [Label]
+subdomain_labels (SubdomainSingleLabel l) = [l]
+subdomain_labels (SubdomainMultipleLabel l s) = l : (subdomain_labels s)
+
+
+-- | Return a list of pairs of neighboring labels in a subdomain.
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parse )
+-- >>> let (Right r) = parse subdomain "" "www.example.com"
+-- >>> pretty_print $ subdomain_label_neighbors r
+-- ["(\"www\",\"example\")","(\"example\",\"com\")"]
+--
+subdomain_label_neighbors :: Subdomain -> [(Label,Label)]
+subdomain_label_neighbors s =
+ zip ls (tail ls)
+ where
+ ls = subdomain_labels s
+
+
+-- | Return @True@ if the subdomain has any two equal neighboring
+-- labels, and @False@ otherwise.
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parse )
+--
+-- >>> let (Right r) = parse subdomain "" "www.example.com"
+-- >>> subdomain_has_equal_neighbors r
+-- False
+--
+-- >>> let (Right l) = parse label "" "www"
+-- >>> let (Right s) = parse subdomain "" "www.example.com"
+-- >>> let bad_subdomain = SubdomainMultipleLabel l s
+-- >>> subdomain_has_equal_neighbors bad_subdomain
+-- True
+--
+subdomain_has_equal_neighbors :: Subdomain -> Bool
+subdomain_has_equal_neighbors s =
+ or [ x == y | (x,y) <- subdomain_label_neighbors s ]
+
+
+
-- * Domains
-- | An RFC1035 domain. According to RFC1035 a domain can be either a
--
-- ==== _Examples_
--
--- >>> import Text.Parsec ( parseTest )
+-- >>> import Text.Parsec ( parse, parseTest )
--
-- Make sure we can parse a single character:
--
-- >>> parseTest domain "."
-- DomainRoot
--
+-- Anything over 255 characters is an error, so the root will be
+-- parsed:
+--
+-- >>> let big_l1 = replicate 63 'x'
+-- >>> let big_l2 = replicate 63 'y' -- Avoid equal neighboring labels!
+-- >>> let big_labels = big_l1 ++ "." ++ big_l2 ++ "."
+-- >>> let big_subdomain = concat $ replicate 3 big_labels
+-- >>> parseTest domain big_subdomain
+-- DomainRoot
+--
+-- But exactly 255 is allowed:
+--
+-- >>> import Data.List ( intercalate )
+-- >>> let l1 = replicate 63 'w'
+-- >>> let l2 = replicate 63 'x'
+-- >>> let l3 = replicate 63 'y'
+-- >>> let l4 = replicate 63 'z'
+-- >>> let big_subdomain = intercalate "." [l1,l2,l3,l4]
+-- >>> let (Right r) = parse domain "" big_subdomain
+-- >>> length (pretty_show r)
+-- 255
+--
domain :: Parser Domain
domain = try parse_subdomain <|> parse_empty
where
- parse_subdomain = fmap DomainName subdomain
+ parse_subdomain :: Parser Domain
+ parse_subdomain = do
+ s <- subdomain
+ if length (pretty_show s) <= 255
+ then return $ DomainName s
+ else fail "subdomains can be at most 255 characters"
+
+ parse_empty :: Parser Domain
parse_empty = string "" >> return DomainRoot
--
-- ==== _Examples_
--
--- >>> import Text.Parsec ( parseTest )
+-- >>> import Text.Parsec ( parse, parseTest )
--
-- We can really parse the root now!
--
absolute :: Parser UserDomain
absolute = do
d <- domain
- r <- char '.'
+ _ <- char '.'
return $ UserDomainAbsolute d
relative :: Parser UserDomain