--- /dev/null
+{-# LANGUAGE DoAndIfThenElse #-}
+
+-- | This module contains the 'Subdomain' type and a Parsec parser to
+-- parse one. We don't export its constructor because then you could
+-- do something dumb like stick a period inside one.
+--
+-- These are defined in RFC1035, Section 2.3.1, \"Preferred name
+-- syntax\" <https://tools.ietf.org/html/rfc1035#section-2.3.1>:
+--
+-- <subdomain> ::= <label> | <subdomain> "." <label>
+--
+module Network.DNS.RBL.Domain.Subdomain (
+ Subdomain,
+ subdomain )
+where
+
+import Text.Parsec ( (<|>), char, try )
+import Text.Parsec.String ( Parser )
+
+import Network.DNS.RBL.Domain.Label ( Label, label )
+import Network.DNS.RBL.Pretty ( Pretty(..) )
+import Network.DNS.RBL.Reversible ( Reversible(..) )
+
+
+-- | The data type representing a \"subdomain\" from RFC1035. We have
+-- reversed the order of the subdomain and label in the second
+-- option (from the grammar), however. This is explained in the
+-- 'subdomain' parser.
+--
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parse )
+--
+-- >>> let (Right r) = parse label "" "x"
+-- >>> SubdomainSingleLabel r
+-- SubdomainSingleLabel (Label (LetDigLetter (Letter 'x')) Nothing)
+--
+data Subdomain =
+ SubdomainSingleLabel Label |
+ SubdomainMultipleLabel Label Subdomain
+ deriving (Eq, Show)
+
+
+-- | Pretty-print a 'Subdomain'. We print additional \".\" characters
+-- between our labels when there are more than one of them.
+--
+instance Pretty Subdomain where
+ pretty_show (SubdomainSingleLabel l) = pretty_show l
+ pretty_show (SubdomainMultipleLabel l s) =
+ (pretty_show l) ++ "." ++ (pretty_show s)
+
+
+instance Reversible Subdomain where
+ -- | Reverse the labels of the given subdomain.
+ --
+ -- ==== _Examples_
+ --
+ -- >>> import Text.Parsec ( parse )
+ --
+ -- Standard usage:
+ --
+ -- >>> let (Right r) = parse subdomain "" "com"
+ -- >>> pretty_print $ backwards r
+ -- com
+ --
+ -- >>> let (Right r) = parse subdomain "" "example.com"
+ -- >>> pretty_print $ backwards r
+ -- com.example
+ --
+ -- >>> let (Right r) = parse subdomain "" "www.example.com"
+ -- >>> pretty_print $ backwards r
+ -- com.example.www
+ --
+ -- >>> let (Right r) = parse subdomain "" "new.www.example.com"
+ -- >>> pretty_print $ backwards r
+ -- com.example.www.new
+ --
+
+ -- It's easy to reverse a single label...
+ backwards s@(SubdomainSingleLabel _) = s
+
+ -- For multiple labels we have two cases. The first is where we have
+ -- exactly two labels, and we just need to swap them.
+ backwards (SubdomainMultipleLabel l (SubdomainSingleLabel m)) =
+ SubdomainMultipleLabel m (SubdomainSingleLabel l)
+
+ -- And now the hard case. See the 'LdhStr' implementation for an
+ -- explanation.
+ --
+ backwards (SubdomainMultipleLabel l s) = build (SubdomainSingleLabel l) s
+ where
+ -- Build up the first Subdomain on the left by peeling off the
+ -- leading elements of the second Subdomain.
+ build :: Subdomain -> Subdomain -> Subdomain
+ build dst (SubdomainSingleLabel final) = SubdomainMultipleLabel final dst
+ build dst (SubdomainMultipleLabel leading rest) =
+ build (SubdomainMultipleLabel leading dst) rest
+
+
+
+-- | Parse a 'Subdomain'. Here is the RFC1035 grammar for reference:
+--
+-- <subdomain> ::= <label> | <subdomain> "." <label>
+--
+-- In contrast with the grammar, we have reversed the order of the
+-- subdomain and label to prevent infinite recursion. The second
+-- option (subdomain + label) is obviously more specific, we we need
+-- to try it first. This presents a problem: we're trying to parse a
+-- subdomain in terms of a subdomain! The given grammar represents
+-- subdomains how we like to think of them; from right to left. But
+-- it's better to parse from left to right, so we pick off the
+-- leading label and then recurse into the definition of subdomain.
+--
+-- According to RFC1034 <https://tools.ietf.org/html/rfc1034>
+-- Section 3.1, \"3.1. Name space specifications and terminology\"
+-- two neighboring labels in a DNS name cannot be equal:
+--
+-- Each node has a label, which is zero to 63 octets in length. Brother
+-- nodes may not have the same label, although the same label can be used
+-- for nodes which are not brothers. One label is reserved, and that is
+-- the null (i.e., zero length) label used for the root.
+--
+-- We enforce this restriction, but the result is usually that we
+-- only parse the part of the subdomain leading up to the repeated
+-- label.
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parse, parseTest )
+--
+-- Make sure we can parse a single character:
+--
+-- >>> parseTest subdomain "a"
+-- SubdomainSingleLabel (Label (LetDigLetter (Letter 'a')) Nothing)
+--
+-- >>> pretty_print $ parse subdomain "" "example.com"
+-- example.com
+--
+-- >>> pretty_print $ parse subdomain "" "www.example.com"
+-- www.example.com
+--
+-- We reject a subdomain with equal neighbors, but this leads to
+-- only the single first label being parsed instead:
+--
+-- >>> pretty_print $ parse subdomain "" "www.www.example.com"
+-- www
+--
+-- But not one with a repeated but non-neighboring label:
+--
+-- >>> pretty_print $ parse subdomain "" "www.example.www.com"
+-- www.example.www.com
+--
+subdomain :: Parser Subdomain
+subdomain = try both <|> just_one
+ where
+ both :: Parser Subdomain
+ both = do
+ l <- label
+ _ <- char '.'
+ s <- subdomain
+ let result = SubdomainMultipleLabel l s
+ if (subdomain_has_equal_neighbors result)
+ then fail "subdomain cannot have equal neighboring labels"
+ else return result
+
+ just_one :: Parser Subdomain
+ just_one = fmap SubdomainSingleLabel label
+
+
+
+-- | Retrieve a list of labels contained in a 'Subdomain'.
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parse )
+--
+-- >>> let (Right r) = parse subdomain "" "a"
+-- >>> pretty_print $ subdomain_labels r
+-- ["a"]
+--
+-- >>> let (Right r) = parse subdomain "" "example.com"
+-- >>> pretty_print $ subdomain_labels r
+-- ["example","com"]
+--
+-- >>> let (Right r) = parse subdomain "" "www.example.com"
+-- >>> pretty_print $ subdomain_labels r
+-- ["www","example","com"]
+--
+subdomain_labels :: Subdomain -> [Label]
+subdomain_labels (SubdomainSingleLabel l) = [l]
+subdomain_labels (SubdomainMultipleLabel l s) = l : (subdomain_labels s)
+
+
+-- | Return a list of pairs of neighboring labels in a subdomain.
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parse )
+-- >>> let (Right r) = parse subdomain "" "www.example.com"
+-- >>> pretty_print $ subdomain_label_neighbors r
+-- ["(\"www\",\"example\")","(\"example\",\"com\")"]
+--
+subdomain_label_neighbors :: Subdomain -> [(Label,Label)]
+subdomain_label_neighbors s =
+ zip ls (tail ls)
+ where
+ ls = subdomain_labels s
+
+
+-- | Return @True@ if the subdomain has any two equal neighboring
+-- labels, and @False@ otherwise.
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parse )
+--
+-- >>> let (Right r) = parse subdomain "" "www.example.com"
+-- >>> subdomain_has_equal_neighbors r
+-- False
+--
+-- >>> let (Right l) = parse label "" "www"
+-- >>> let (Right s) = parse subdomain "" "www.example.com"
+-- >>> let bad_subdomain = SubdomainMultipleLabel l s
+-- >>> subdomain_has_equal_neighbors bad_subdomain
+-- True
+--
+subdomain_has_equal_neighbors :: Subdomain -> Bool
+subdomain_has_equal_neighbors s =
+ or [ x == y | (x,y) <- subdomain_label_neighbors s ]