Network.DNS.RBL.Tests
other-modules:
- Network.DNS.RBL.Domain
Network.DNS.RBL.Domain.Digit
+ Network.DNS.RBL.Domain.Domain
Network.DNS.RBL.Domain.Hyphen
+ Network.DNS.RBL.Domain.Label
Network.DNS.RBL.Domain.LdhStr
+ Network.DNS.RBL.Domain.LdhStrLetDig
Network.DNS.RBL.Domain.LetDig
Network.DNS.RBL.Domain.LetDigHyp
Network.DNS.RBL.Domain.Letter
+ Network.DNS.RBL.Domain.Subdomain
Network.DNS.RBL.Host
Network.DNS.RBL.IPv4Pattern
Network.DNS.RBL.Pretty
+++ /dev/null
-{-# LANGUAGE DoAndIfThenElse #-}
-
--- | The 'Domain' data type and its parser. A 'Domain' represents a
--- name in the domain name system (DNS) as described by
--- RFC1035. In particular, we enforce the restrictions from Section
--- 2.3.1 \"Preferred name syntax\". See for example,
---
--- <https://tools.ietf.org/html/rfc1035#section-2.3.1>
---
--- We basically work with strings and characters everywhere, even
--- though this isn't really correct. The length specifications in
--- the RFCs are all in terms of octets, so really a ByteString.Char8
--- would be more appropriate. With strings, for example, we could
--- have a unicode mumbo jumbo character that takes up two bytes
--- (octets).
---
-module Network.DNS.RBL.Domain (
- Domain(..),
- domain )
-where
-
-import Text.Parsec (
- (<|>),
- char,
- optionMaybe,
- string,
- try )
-import Text.Parsec.String ( Parser )
-
-import Network.DNS.RBL.Domain.LdhStr (
- LdhStr(..),
- ldh_str,
- ldh_str_length )
-import Network.DNS.RBL.Domain.LetDig ( LetDig, let_dig )
-import Network.DNS.RBL.Domain.LetDigHyp ( LetDigHyp(..) )
-import Network.DNS.RBL.Pretty ( Pretty(..) )
-import Network.DNS.RBL.Reversible ( Reversible(..) )
-
-
--- * Letter/Digit/Hyphen string followed by a trailing Letter/Digit
-
--- | This type isn't explicitly part of the grammar, but it's what
--- shows up in the square brackets of,
---
--- <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
---
--- The ldh-str is optional, but if one is present, we must also have
--- a trailing let-dig to prevent the name from ending with a
--- hyphen. This can be represented with a @Maybe LdhStrLetDig@,
--- which is why we're about to define it.
---
-data LdhStrLetDig = LdhStrLetDig (Maybe LdhStr) LetDig
- deriving (Eq, Show)
-
-instance Pretty LdhStrLetDig where
- pretty_show (LdhStrLetDig Nothing ld) = pretty_show ld
- pretty_show (LdhStrLetDig (Just s) ld) = (pretty_show s) ++ (pretty_show ld)
-
-
-
--- | Parse an 'LdhStrLetDig'. This isn't in the grammar, but we might
--- as well define the parser for it independently since we gave it
--- its own data type.
---
--- ==== _Examples_
---
--- >>> import Text.Parsec ( parse, parseTest )
---
--- Make sure we can parse a single character:
---
--- >>> parseTest ldh_str_let_dig "a"
--- LdhStrLetDig Nothing (LetDigLetter (Letter 'a'))
---
--- And longer strings:
---
--- >>> pretty_print $ parse ldh_str_let_dig "" "ab"
--- ab
---
--- >>> pretty_print $ parse ldh_str_let_dig "" "-b"
--- -b
---
--- >>> parseTest ldh_str_let_dig "b-"
--- parse error at (line 1, column 3):
--- label cannot end with a hyphen
---
-ldh_str_let_dig :: Parser LdhStrLetDig
-ldh_str_let_dig = do
- -- This will happily eat up the trailing let-dig...
- full_ldh <- ldh_str
-
- -- So we have to go back and see what happened.
- case (backwards full_ldh) of
-
- -- Fail on a single hyphen.
- (LdhStrSingleLdh (LetDigHypHyphen _)) ->
- fail "label cannot end with a hyphen"
-
- -- Fail for a hyphen followed by other stuff.
- (LdhStrMultipleLdh (LetDigHypHyphen _) _) ->
- fail "label cannot end with a hyphen"
-
- -- Simply return the thing if it's a single non-hyphen.
- (LdhStrSingleLdh (LetDigHypLetDig ld)) -> return $ LdhStrLetDig Nothing ld
-
- -- And peel off the last character for a non-hyphen followed by
- -- other stuff. We wind up reversing things twice, but whatever.
- (LdhStrMultipleLdh (LetDigHypLetDig ld) init_ldh_rev) ->
- let init_ldh = backwards init_ldh_rev
- in return $ LdhStrLetDig (Just init_ldh) ld
-
-
-
--- | Compute the length of a 'LdhStrLetDig'. It's at least one, since
--- the let-dig at the end is always there. And when there's an
--- ldh-str too, we add its length to one.
---
--- ==== _Examples_
---
--- >>> import Text.Parsec ( parse )
---
--- >>> let (Right r) = parse ldh_str_let_dig "" "a"
--- >>> length_ldh_str_let_dig r
--- 1
---
--- >>> let (Right r) = parse ldh_str_let_dig "" "abc-def"
--- >>> length_ldh_str_let_dig r
--- 7
---
-length_ldh_str_let_dig :: LdhStrLetDig -> Int
-length_ldh_str_let_dig (LdhStrLetDig Nothing _) = 1
-length_ldh_str_let_dig (LdhStrLetDig (Just ldhstring) _) =
- 1 + (ldh_str_length ldhstring)
-
-
--- * Labels
-
--- | The label type from the RFC1035 grammar:
---
--- <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
---
--- We allow the slightly more general syntax from RFC1123, Section 2.1:
---
--- The syntax of a legal Internet host name was specified in RFC-952
--- [DNS:4]. One aspect of host name syntax is hereby changed: the
--- restriction on the first character is relaxed to allow either a
--- letter or a digit. Host software MUST support this more liberal
--- syntax.
---
-data Label = Label LetDig (Maybe LdhStrLetDig)
- deriving (Eq, Show)
-
-instance Pretty Label where
- pretty_show (Label l Nothing) = pretty_show l
- pretty_show (Label l (Just s)) = (pretty_show l) ++ (pretty_show s)
-
--- | Parse a 'Label'.
---
--- In addition to the grammar, there's another restriction on
--- labels: their length must be 63 characters or less. Quoting
--- Section 2.3.1, \"Preferred name syntax\", of RFC1035:
---
--- The labels must follow the rules for ARPANET host names. They
--- must start with a letter, end with a letter or digit, and have
--- as interior characters only letters, digits, and hyphen. There
--- are also some restrictions on the length. Labels must be 63
--- characters or less.
---
--- We check this only after we have successfully parsed a label.
---
--- ==== _Examples_
---
--- >>> import Text.Parsec ( parse, parseTest )
---
--- Make sure we can parse a single character:
---
--- >>> parseTest label "a"
--- Label (LetDigLetter (Letter 'a')) Nothing
---
--- And longer strings:
---
--- >>> pretty_print $ parse label "" "abc-def"
--- abc-def
---
--- But not anything ending in a hyphen:
---
--- >>> parseTest label "abc-"
--- parse error at (line 1, column 5):
--- label cannot end with a hyphen
---
--- Or anything over 63 characters:
---
--- >>> parseTest label (['a'..'z'] ++ ['a'..'z'] ++ ['a'..'z'])
--- parse error at (line 1, column 79):
--- labels must be 63 or fewer characters
---
--- However, /exactly/ 63 characters is acceptable:
---
--- >>> pretty_print $ parse label "" (replicate 63 'x')
--- xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
---
--- Ensure that a label can begin with a digit:
---
--- >>> pretty_print $ parse label "" "3com"
--- 3com
---
-label :: Parser Label
-label = do
- l <- let_dig -- Guaranteed to be there
- maybe_s <- optionMaybe ldh_str_let_dig -- Might not be there
- case maybe_s of
- -- It can only be one character long, from the letter...
- Nothing -> return $ Label l maybe_s
-
- -- The letter gives us one character, so we check that the rest is
- -- less than 62 characters long. But in the error message we need
- -- to report 63.
- Just s -> if (length_ldh_str_let_dig s) <= 62
- then return $ Label l maybe_s
- else fail "labels must be 63 or fewer characters"
-
-
-
--- * Subdomains
-
-
--- | The data type representing a \"subdomain\" from RFC1035:
---
--- <subdomain> ::= <label> | <subdomain> "." <label>
---
--- We have reversed the order of the subdomain and label in the
--- second option, however. This is explained in 'subdomain'.
---
-data Subdomain =
- SubdomainSingleLabel Label |
- SubdomainMultipleLabel Label Subdomain
- deriving (Eq, Show)
-
-
-
-instance Pretty Subdomain where
- pretty_show (SubdomainSingleLabel l) = pretty_show l
- pretty_show (SubdomainMultipleLabel l s) =
- (pretty_show l) ++ "." ++ (pretty_show s)
-
-
-instance Reversible Subdomain where
- -- | Reverse the labels of the given subdomain.
- --
- -- ==== _Examples_
- --
- -- >>> import Text.Parsec ( parse )
- --
- -- Standard usage:
- --
- -- >>> let (Right r) = parse subdomain "" "com"
- -- >>> pretty_print $ backwards r
- -- com
- --
- -- >>> let (Right r) = parse subdomain "" "example.com"
- -- >>> pretty_print $ backwards r
- -- com.example
- --
- -- >>> let (Right r) = parse subdomain "" "www.example.com"
- -- >>> pretty_print $ backwards r
- -- com.example.www
- --
- -- >>> let (Right r) = parse subdomain "" "new.www.example.com"
- -- >>> pretty_print $ backwards r
- -- com.example.www.new
- --
-
- -- It's easy to reverse a single label...
- backwards s@(SubdomainSingleLabel _) = s
-
- -- For multiple labels we have two cases. The first is where we have
- -- exactly two labels, and we just need to swap them.
- backwards (SubdomainMultipleLabel l (SubdomainSingleLabel m)) =
- SubdomainMultipleLabel m (SubdomainSingleLabel l)
-
- -- And now the hard case. See the 'LdhStr' implementation for an
- -- explanation.
- --
- backwards (SubdomainMultipleLabel l s) = build (SubdomainSingleLabel l) s
- where
- -- Build up the first Subdomain on the left by peeling off the
- -- leading elements of the second Subdomain.
- build :: Subdomain -> Subdomain -> Subdomain
- build dst (SubdomainSingleLabel final) = SubdomainMultipleLabel final dst
- build dst (SubdomainMultipleLabel leading rest) =
- build (SubdomainMultipleLabel leading dst) rest
-
-
-
--- | Parse an RFC1035 \"subdomain\". The given grammar is,
---
--- <subdomain> ::= <label> | <subdomain> "." <label>
---
--- However, we have reversed the order of the subdomain and label to
--- prevent infinite recursion. The second option (subdomain + label)
--- is obviously more specific, we we need to try it first. This
--- presents a problem: we're trying to parse a subdomain in terms of
--- a subdomain! The given grammar represents subdomains how we like
--- to think of them; from right to left. But it's better to parse
--- from left to right, so we pick off the leading label and then
--- recurse into the definition of subdomain.
---
--- According to RFC1034, Section 3.1, two neighboring labels in a
--- DNS name cannot be equal:
---
--- Each node has a label, which is zero to 63 octets in length. Brother
--- nodes may not have the same label, although the same label can be used
--- for nodes which are not brothers. One label is reserved, and that is
--- the null (i.e., zero length) label used for the root.
---
--- We enforce this restriction, but the result is usually that we
--- only parse the part of the subdomain leading up to the repeated
--- label.
---
--- ==== _Examples_
---
--- >>> import Text.Parsec ( parse, parseTest )
---
--- Make sure we can parse a single character:
---
--- >>> parseTest subdomain "a"
--- SubdomainSingleLabel (Label (LetDigLetter (Letter 'a')) Nothing)
---
--- >>> pretty_print $ parse subdomain "" "example.com"
--- example.com
---
--- >>> pretty_print $ parse subdomain "" "www.example.com"
--- www.example.com
---
--- We reject a subdomain with equal neighbors, but this leads to
--- only the single first label being parsed instead:
---
--- >>> pretty_print $ parse subdomain "" "www.www.example.com"
--- www
---
--- But not one with a repeated but non-neighboring label:
---
--- >>> pretty_print $ parse subdomain "" "www.example.www.com"
--- www.example.www.com
---
-subdomain :: Parser Subdomain
-subdomain = try both <|> just_one
- where
- both :: Parser Subdomain
- both = do
- l <- label
- _ <- char '.'
- s <- subdomain
- let result = SubdomainMultipleLabel l s
- if (subdomain_has_equal_neighbors result)
- then fail "subdomain cannot have equal neighboring labels"
- else return result
-
- just_one :: Parser Subdomain
- just_one = fmap SubdomainSingleLabel label
-
-
-
--- | Retrieve a list of labels contained in a 'Subdomain'.
---
--- ==== _Examples_
---
--- >>> import Text.Parsec ( parse )
---
--- >>> let (Right r) = parse subdomain "" "a"
--- >>> pretty_print $ subdomain_labels r
--- ["a"]
---
--- >>> let (Right r) = parse subdomain "" "example.com"
--- >>> pretty_print $ subdomain_labels r
--- ["example","com"]
---
--- >>> let (Right r) = parse subdomain "" "www.example.com"
--- >>> pretty_print $ subdomain_labels r
--- ["www","example","com"]
---
-subdomain_labels :: Subdomain -> [Label]
-subdomain_labels (SubdomainSingleLabel l) = [l]
-subdomain_labels (SubdomainMultipleLabel l s) = l : (subdomain_labels s)
-
-
--- | Return a list of pairs of neighboring labels in a subdomain.
---
--- ==== _Examples_
---
--- >>> import Text.Parsec ( parse )
--- >>> let (Right r) = parse subdomain "" "www.example.com"
--- >>> pretty_print $ subdomain_label_neighbors r
--- ["(\"www\",\"example\")","(\"example\",\"com\")"]
---
-subdomain_label_neighbors :: Subdomain -> [(Label,Label)]
-subdomain_label_neighbors s =
- zip ls (tail ls)
- where
- ls = subdomain_labels s
-
-
--- | Return @True@ if the subdomain has any two equal neighboring
--- labels, and @False@ otherwise.
---
--- ==== _Examples_
---
--- >>> import Text.Parsec ( parse )
---
--- >>> let (Right r) = parse subdomain "" "www.example.com"
--- >>> subdomain_has_equal_neighbors r
--- False
---
--- >>> let (Right l) = parse label "" "www"
--- >>> let (Right s) = parse subdomain "" "www.example.com"
--- >>> let bad_subdomain = SubdomainMultipleLabel l s
--- >>> subdomain_has_equal_neighbors bad_subdomain
--- True
---
-subdomain_has_equal_neighbors :: Subdomain -> Bool
-subdomain_has_equal_neighbors s =
- or [ x == y | (x,y) <- subdomain_label_neighbors s ]
-
-
-
-
--- * Domains
-
--- | An RFC1035 domain. According to RFC1035 a domain can be either a
--- subdomain or \" \", which according to RFC2181
--- <https://tools.ietf.org/html/rfc2181#section-11> means the root:
---
--- The zero length full name is defined as representing the root
--- of the DNS tree, and is typically written and displayed as
--- \".\".
---
--- We let the 'Domain' type remain true to those RFCs, even though
--- they don't support an absolute domain name of e.g. a single dot.
---
-data Domain =
- DomainName Subdomain |
- DomainRoot
- deriving (Eq, Show)
-
-instance Pretty Domain where
- pretty_show DomainRoot = ""
- pretty_show (DomainName s) = pretty_show s
-
--- | Parse an RFC1035 \"domain\"
---
--- ==== _Examples_
---
--- >>> import Text.Parsec ( parse, parseTest )
---
--- Make sure we can parse a single character:
---
--- >>> pretty_print $ parse domain "" "a"
--- a
---
--- And the empty domain:
---
--- >>> parseTest domain ""
--- DomainRoot
---
--- We will in fact parse the \"empty\" domain off the front of
--- pretty much anything:
---
--- >>> parseTest domain "!8===D"
--- DomainRoot
---
--- Equality of domains is case-insensitive:
---
--- >>> let (Right r1) = parse domain "" "example.com"
--- >>> let (Right r2) = parse domain "" "ExaMPle.coM"
--- >>> r1 == r2
--- True
---
--- A single dot IS parsed as the root, but the dot isn't consumed:
---
--- >>> parseTest domain "."
--- DomainRoot
---
--- Anything over 255 characters is an error, so the root will be
--- parsed:
---
--- >>> let big_l1 = replicate 63 'x'
--- >>> let big_l2 = replicate 63 'y' -- Avoid equal neighboring labels!
--- >>> let big_labels = big_l1 ++ "." ++ big_l2 ++ "."
--- >>> let big_subdomain = concat $ replicate 3 big_labels
--- >>> parseTest domain big_subdomain
--- DomainRoot
---
--- But exactly 255 is allowed:
---
--- >>> import Data.List ( intercalate )
--- >>> let l1 = replicate 63 'w'
--- >>> let l2 = replicate 63 'x'
--- >>> let l3 = replicate 63 'y'
--- >>> let l4 = replicate 63 'z'
--- >>> let big_subdomain = intercalate "." [l1,l2,l3,l4]
--- >>> let (Right r) = parse domain "" big_subdomain
--- >>> length (pretty_show r)
--- 255
---
-domain :: Parser Domain
-domain = try parse_subdomain <|> parse_empty
- where
- parse_subdomain :: Parser Domain
- parse_subdomain = do
- s <- subdomain
- if length (pretty_show s) <= 255
- then return $ DomainName s
- else fail "subdomains can be at most 255 characters"
-
- parse_empty :: Parser Domain
- parse_empty = string "" >> return DomainRoot
-
-
-instance Reversible Domain where
- -- | Reverse the labels of a 'Domain'.
- --
- -- -- ==== _Examples_
- --
- -- >>> import Text.Parsec ( parse )
- --
- -- The root reverses to itself:
- --
- -- >>> let (Right r) = parse domain "" ""
- -- >>> backwards r
- -- DomainRoot
- --
- -- >>> let (Right r) = parse domain "" "new.www.example.com"
- -- >>> pretty_print $ backwards r
- -- com.example.www.new
- --
- backwards DomainRoot = DomainRoot
- backwards (DomainName s) = DomainName $ backwards s
--- /dev/null
+{-# LANGUAGE DoAndIfThenElse #-}
+
+-- | The 'Domain' data type and its parser. A 'Domain' represents a
+-- name in the domain name system (DNS) as described by
+-- RFC1035. In particular, we enforce the restrictions from Section
+-- 2.3.1 \"Preferred name syntax\". See for example,
+--
+-- <https://tools.ietf.org/html/rfc1035#section-2.3.1>
+--
+-- We basically work with strings and characters everywhere, even
+-- though this isn't really correct. The length specifications in
+-- the RFCs are all in terms of octets, so really a ByteString.Char8
+-- would be more appropriate. With strings, for example, we could
+-- have a unicode mumbo jumbo character that takes up two bytes
+-- (octets).
+--
+module Network.DNS.RBL.Domain.Domain (
+ Domain(..),
+ domain )
+where
+
+import Text.Parsec (
+ (<|>),
+ string,
+ try )
+import Text.Parsec.String ( Parser )
+
+import Network.DNS.RBL.Domain.Subdomain ( Subdomain, subdomain )
+import Network.DNS.RBL.Pretty ( Pretty(..) )
+import Network.DNS.RBL.Reversible ( Reversible(..) )
+
+
+
+-- | An RFC1035 domain. According to RFC1035 a domain can be either a
+-- subdomain or \" \", which according to RFC2181
+-- <https://tools.ietf.org/html/rfc2181#section-11> means the root:
+--
+-- The zero length full name is defined as representing the root
+-- of the DNS tree, and is typically written and displayed as
+-- \".\".
+--
+-- We let the 'Domain' type remain true to those RFCs, even though
+-- they don't support an absolute domain name of e.g. a single dot.
+--
+-- ==== _Examples_
+--
+-- >>> DomainRoot
+-- DomainRoot
+--
+-- >>> import Text.Parsec ( parse )
+-- >>> let s = parse subdomain "" "x"
+-- >>> DomainName s
+-- DomainName (SubdomainSingleLabel (Label (LetDigLetter (Letter 'x')) Nothing))
+--
+data Domain =
+ DomainName Subdomain |
+ DomainRoot
+ deriving (Eq, Show)
+
+
+-- | Pretty-print a 'Domain'.
+--
+-- ==== _Examples_
+--
+-- >>> pretty_show $ DomainRoot
+-- ""
+--
+-- >>> import Text.Parsec ( parse )
+-- >>> let s = parse subdomain "" "x"
+-- >>> pretty_print $ DomainName s
+-- x
+--
+instance Pretty Domain where
+ pretty_show DomainRoot = ""
+ pretty_show (DomainName s) = pretty_show s
+
+
+-- | The maximum number of characters (octets, really) allowed in a
+-- label. Quoting Section 3.1, \"Name space definitions\", of
+-- RFC1035:
+--
+-- To simplify implementations, the total length of a domain name
+-- (i.e., label octets and label length octets) is restricted to 255
+-- octets or less.
+--
+domain_max_length :: Int
+domain_max_length = 255
+
+
+-- | Parse an RFC1035 \"domain\"
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parse, parseTest )
+--
+-- Make sure we can parse a single character:
+--
+-- >>> pretty_print $ parse domain "" "a"
+-- a
+--
+-- And the empty domain:
+--
+-- >>> parseTest domain ""
+-- DomainRoot
+--
+-- We will in fact parse the \"empty\" domain off the front of
+-- pretty much anything:
+--
+-- >>> parseTest domain "!8===D"
+-- DomainRoot
+--
+-- Equality of domains is case-insensitive:
+--
+-- >>> let (Right r1) = parse domain "" "example.com"
+-- >>> let (Right r2) = parse domain "" "ExaMPle.coM"
+-- >>> r1 == r2
+-- True
+--
+-- A single dot IS parsed as the root, but the dot isn't consumed:
+--
+-- >>> parseTest domain "."
+-- DomainRoot
+--
+-- Anything over domain_max_length characters is an error, so the
+-- root will be parsed:
+--
+-- >>> let big_l1 = replicate 63 'x'
+-- >>> let big_l2 = replicate 63 'y' -- Avoid equal neighboring labels!
+-- >>> let big_labels = big_l1 ++ "." ++ big_l2 ++ "."
+-- >>> let big_subdomain = concat $ replicate 3 big_labels
+-- >>> parseTest domain big_subdomain
+-- DomainRoot
+--
+-- But exactly domain_max_length is allowed:
+--
+-- >>> import Data.List ( intercalate )
+-- >>> let l1 = replicate 63 'w'
+-- >>> let l2 = replicate 63 'x'
+-- >>> let l3 = replicate 63 'y'
+-- >>> let l4 = replicate 63 'z'
+-- >>> let big_subdomain = intercalate "." [l1,l2,l3,l4]
+-- >>> let (Right r) = parse domain "" big_subdomain
+-- >>> length (pretty_show r)
+-- 255
+--
+domain :: Parser Domain
+domain = try parse_subdomain <|> parse_empty
+ where
+ parse_subdomain :: Parser Domain
+ parse_subdomain = do
+ s <- subdomain
+ if length (pretty_show s) <= domain_max_length
+ then return $ DomainName s
+ else fail $ "subdomains can be at most " ++
+ (show domain_max_length) ++
+ " characters"
+
+ parse_empty :: Parser Domain
+ parse_empty = string "" >> return DomainRoot
+
+
+instance Reversible Domain where
+ -- | Reverse the labels of a 'Domain'.
+ --
+ -- -- ==== _Examples_
+ --
+ -- >>> import Text.Parsec ( parse )
+ --
+ -- The root reverses to itself:
+ --
+ -- >>> let (Right r) = parse domain "" ""
+ -- >>> backwards r
+ -- DomainRoot
+ --
+ -- >>> let (Right r) = parse domain "" "new.www.example.com"
+ -- >>> pretty_print $ backwards r
+ -- com.example.www.new
+ --
+ backwards DomainRoot = DomainRoot
+ backwards (DomainName s) = DomainName $ backwards s
--- /dev/null
+-- | This module contains the 'Letter' type and a Parsec parser to
+-- parse one. We don't export its constructor because then you could
+-- do something dumb like stick a period inside one.
+--
+-- These are defined in RFC1035, Section 2.3.1, \"Preferred name
+-- syntax\" <https://tools.ietf.org/html/rfc1035#section-2.3.1>:
+--
+-- <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
+--
+-- However, we allow the slightly more general syntax from RFC1123,
+-- Section 2.1 <https://tools.ietf.org/html/rfc1123#section-2.1>:
+--
+-- The syntax of a legal Internet host name was specified in RFC-952
+-- [DNS:4]. One aspect of host name syntax is hereby changed: the
+-- restriction on the first character is relaxed to allow either a
+-- letter or a digit. Host software MUST support this more liberal
+-- syntax.
+--
+module Network.DNS.RBL.Domain.Label (
+ Label,
+ label )
+where
+
+import Text.Parsec ( optionMaybe )
+import Text.Parsec.String ( Parser )
+
+import Network.DNS.RBL.Domain.LdhStrLetDig (
+ LdhStrLetDig,
+ ldh_str_let_dig,
+ ldh_str_let_dig_length )
+import Network.DNS.RBL.Domain.LetDig ( LetDig, let_dig )
+import Network.DNS.RBL.Pretty ( Pretty(..) )
+
+-- | The label type from the RFC1035 and RFC1123 grammars.
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parse )
+--
+-- >>> let (Right r) = parse let_dig "" "x"
+-- >>> r
+-- LetDigLetter (Letter 'x')
+-- >>> Label r Nothing
+-- Label (LetDigLetter (Letter 'x')) Nothing
+--
+data Label = Label LetDig (Maybe LdhStrLetDig)
+ deriving (Eq, Show)
+
+-- | Pretty-print a 'Label'. Should give you back a string that can be
+-- parsed as a 'Label'.
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parse )
+--
+-- >>> let (Right r) = parse label "" "www"
+-- >>> pretty_print r
+-- www
+--
+-- >>> let (Right r) = parse label "" "example"
+-- >>> pretty_print r
+-- example
+--
+-- >>> let (Right r) = parse label "" "com"
+-- >>> pretty_print r
+-- com
+--
+instance Pretty Label where
+ pretty_show (Label l Nothing) = pretty_show l
+ pretty_show (Label l (Just s)) = (pretty_show l) ++ (pretty_show s)
+
+
+-- | The maximum number of characters (octets, really) allowed in a
+-- label. Quoting Section 2.3.1, \"Preferred name syntax\", of
+-- RFC1035:
+--
+-- The labels must follow the rules for ARPANET host names. They
+-- must start with a letter, end with a letter or digit, and have
+-- as interior characters only letters, digits, and hyphen. There
+-- are also some restrictions on the length. Labels must be 63
+-- characters or less.
+--
+label_max_length :: Int
+label_max_length = 63
+
+
+-- | Parse a 'Label'.
+--
+-- In addition to the grammar, there's another restriction on
+-- labels: their length must be 'label_max_length' characters or
+-- less. We check this only after we have successfully parsed a
+-- label.
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parse, parseTest )
+--
+-- Make sure we can parse a single character:
+--
+-- >>> parseTest label "a"
+-- Label (LetDigLetter (Letter 'a')) Nothing
+--
+-- And longer strings:
+--
+-- >>> pretty_print $ parse label "" "abc-def"
+-- abc-def
+--
+-- But not anything ending in a hyphen:
+--
+-- >>> parseTest label "abc-"
+-- parse error at (line 1, column 5):
+-- label cannot end with a hyphen
+--
+-- Or anything over 'label_max_length' characters:
+--
+-- >>> parseTest label (['a'..'z'] ++ ['a'..'z'] ++ ['a'..'z'])
+-- parse error at (line 1, column 79):
+-- labels must be 63 or fewer characters
+--
+-- However, /exactly/ 'label_max_length' characters is acceptable:
+--
+-- >>> pretty_print $ parse label "" (replicate label_max_length 'x')
+-- xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+--
+-- Ensure that a label can begin with a digit:
+--
+-- >>> pretty_print $ parse label "" "3com"
+-- 3com
+--
+label :: Parser Label
+label = do
+ l <- let_dig -- Guaranteed to be there
+ maybe_s <- optionMaybe ldh_str_let_dig -- Might not be there
+ case maybe_s of
+ -- It can only be one character long, from the letter...
+ Nothing -> return $ Label l maybe_s
+
+ -- The letter gives us one character, so we check that the rest is
+ -- less than (label_max_length - 1) characters long. But in the
+ -- error message we need to report label_max_length.
+ Just s -> if (ldh_str_let_dig_length s) <= (label_max_length - 1)
+ then return $ Label l maybe_s
+ else fail $ "labels must be " ++
+ (show label_max_length) ++
+ " or fewer characters"
--- /dev/null
+-- | This module contains the 'LdhStrLetDig' type a Parsec parser to
+-- parse one. This type isn't part of the RFC grammar, but it's
+-- used implicitly. In RFC1035, Section 2.3.1, \"Preferred name
+-- syntax\" <https://tools.ietf.org/html/rfc1035#section-2.3.1>:
+--
+-- <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
+--
+-- The stuff in square brackets can be thought of as an 'LdhStrLetDig'.
+--
+module Network.DNS.RBL.Domain.LdhStrLetDig (
+ LdhStrLetDig,
+ ldh_str_let_dig,
+ ldh_str_let_dig_length )
+where
+
+import Text.Parsec.String ( Parser )
+
+import Network.DNS.RBL.Domain.LdhStr ( LdhStr(..), ldh_str, ldh_str_length )
+import Network.DNS.RBL.Domain.LetDig ( LetDig )
+import Network.DNS.RBL.Domain.LetDigHyp ( LetDigHyp(..) )
+import Network.DNS.RBL.Pretty ( Pretty(..) )
+import Network.DNS.RBL.Reversible ( Reversible(..) )
+
+
+-- | A type representing a Letter/Digit/Hyphen string followed by a
+-- trailing Letter/Digit. This type isn't explicitly part of the
+-- grammar, but it's what shows up in the square brackets of,
+--
+-- <label> ::= <letter> [ [ <ldh-str> ] <let-dig> ]
+--
+-- The ldh-str is optional, but if one is present, we must also have
+-- a trailing let-dig to prevent the name from ending with a
+-- hyphen. This can be represented with a 'LdhStrLetDig',
+-- which is why we're about to define it.
+--
+data LdhStrLetDig = LdhStrLetDig (Maybe LdhStr) LetDig
+ deriving (Eq, Show)
+
+
+-- | Pretty-printing for strings of letters/digits/hyphens (ending
+-- with a letter or a digit) that we've already parsed.
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parse )
+-- >>> let (Right r) = parse ldh_str_let_dig "" "xy-z"
+-- >>> pretty_print $ r
+-- xy-z
+--
+instance Pretty LdhStrLetDig where
+ pretty_show (LdhStrLetDig Nothing ld) = pretty_show ld
+ pretty_show (LdhStrLetDig (Just s) ld) = (pretty_show s) ++ (pretty_show ld)
+
+
+
+-- | Parse an 'LdhStrLetDig'. This isn't in the grammar, but we might
+-- as well define the parser for it independently since we gave it
+-- its own data type.
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parse, parseTest )
+--
+-- Make sure we can parse a single character:
+--
+-- >>> parseTest ldh_str_let_dig "a"
+-- LdhStrLetDig Nothing (LetDigLetter (Letter 'a'))
+--
+-- And longer strings:
+--
+-- >>> pretty_print $ parse ldh_str_let_dig "" "ab"
+-- ab
+--
+-- >>> pretty_print $ parse ldh_str_let_dig "" "-b"
+-- -b
+--
+-- >>> parseTest ldh_str_let_dig "b-"
+-- parse error at (line 1, column 3):
+-- label cannot end with a hyphen
+--
+ldh_str_let_dig :: Parser LdhStrLetDig
+ldh_str_let_dig = do
+ -- This will happily eat up the trailing let-dig...
+ full_ldh <- ldh_str
+
+ -- So we have to go back and see what happened.
+ case (backwards full_ldh) of
+
+ -- Fail on a single hyphen.
+ (LdhStrSingleLdh (LetDigHypHyphen _)) ->
+ fail "label cannot end with a hyphen"
+
+ -- Fail for a hyphen followed by other stuff.
+ (LdhStrMultipleLdh (LetDigHypHyphen _) _) ->
+ fail "label cannot end with a hyphen"
+
+ -- Simply return the thing if it's a single non-hyphen.
+ (LdhStrSingleLdh (LetDigHypLetDig ld)) -> return $ LdhStrLetDig Nothing ld
+
+ -- And peel off the last character for a non-hyphen followed by
+ -- other stuff. We wind up reversing things twice, but whatever.
+ (LdhStrMultipleLdh (LetDigHypLetDig ld) init_ldh_rev) ->
+ let init_ldh = backwards init_ldh_rev
+ in return $ LdhStrLetDig (Just init_ldh) ld
+
+
+
+-- | Compute the length of a 'LdhStrLetDig'. It's at least one, since
+-- the let-dig at the end is always there. And when there's an
+-- ldh-str too, we add its length to one.
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parse )
+--
+-- >>> let (Right r) = parse ldh_str_let_dig "" "a"
+-- >>> ldh_str_let_dig_length r
+-- 1
+--
+-- >>> let (Right r) = parse ldh_str_let_dig "" "abc-def"
+-- >>> ldh_str_let_dig_length r
+-- 7
+--
+ldh_str_let_dig_length :: LdhStrLetDig -> Int
+ldh_str_let_dig_length (LdhStrLetDig Nothing _) = 1
+ldh_str_let_dig_length (LdhStrLetDig (Just ldhstring) _) =
+ 1 + (ldh_str_length ldhstring)
import Network.DNS.RBL.Pretty ( Pretty(..) )
--- * Letters
-- | A wrapper around a letter character.
--
--- /dev/null
+{-# LANGUAGE DoAndIfThenElse #-}
+
+-- | This module contains the 'Subdomain' type and a Parsec parser to
+-- parse one. We don't export its constructor because then you could
+-- do something dumb like stick a period inside one.
+--
+-- These are defined in RFC1035, Section 2.3.1, \"Preferred name
+-- syntax\" <https://tools.ietf.org/html/rfc1035#section-2.3.1>:
+--
+-- <subdomain> ::= <label> | <subdomain> "." <label>
+--
+module Network.DNS.RBL.Domain.Subdomain (
+ Subdomain,
+ subdomain )
+where
+
+import Text.Parsec ( (<|>), char, try )
+import Text.Parsec.String ( Parser )
+
+import Network.DNS.RBL.Domain.Label ( Label, label )
+import Network.DNS.RBL.Pretty ( Pretty(..) )
+import Network.DNS.RBL.Reversible ( Reversible(..) )
+
+
+-- | The data type representing a \"subdomain\" from RFC1035. We have
+-- reversed the order of the subdomain and label in the second
+-- option (from the grammar), however. This is explained in the
+-- 'subdomain' parser.
+--
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parse )
+--
+-- >>> let (Right r) = parse label "" "x"
+-- >>> SubdomainSingleLabel r
+-- SubdomainSingleLabel (Label (LetDigLetter (Letter 'x')) Nothing)
+--
+data Subdomain =
+ SubdomainSingleLabel Label |
+ SubdomainMultipleLabel Label Subdomain
+ deriving (Eq, Show)
+
+
+-- | Pretty-print a 'Subdomain'. We print additional \".\" characters
+-- between our labels when there are more than one of them.
+--
+instance Pretty Subdomain where
+ pretty_show (SubdomainSingleLabel l) = pretty_show l
+ pretty_show (SubdomainMultipleLabel l s) =
+ (pretty_show l) ++ "." ++ (pretty_show s)
+
+
+instance Reversible Subdomain where
+ -- | Reverse the labels of the given subdomain.
+ --
+ -- ==== _Examples_
+ --
+ -- >>> import Text.Parsec ( parse )
+ --
+ -- Standard usage:
+ --
+ -- >>> let (Right r) = parse subdomain "" "com"
+ -- >>> pretty_print $ backwards r
+ -- com
+ --
+ -- >>> let (Right r) = parse subdomain "" "example.com"
+ -- >>> pretty_print $ backwards r
+ -- com.example
+ --
+ -- >>> let (Right r) = parse subdomain "" "www.example.com"
+ -- >>> pretty_print $ backwards r
+ -- com.example.www
+ --
+ -- >>> let (Right r) = parse subdomain "" "new.www.example.com"
+ -- >>> pretty_print $ backwards r
+ -- com.example.www.new
+ --
+
+ -- It's easy to reverse a single label...
+ backwards s@(SubdomainSingleLabel _) = s
+
+ -- For multiple labels we have two cases. The first is where we have
+ -- exactly two labels, and we just need to swap them.
+ backwards (SubdomainMultipleLabel l (SubdomainSingleLabel m)) =
+ SubdomainMultipleLabel m (SubdomainSingleLabel l)
+
+ -- And now the hard case. See the 'LdhStr' implementation for an
+ -- explanation.
+ --
+ backwards (SubdomainMultipleLabel l s) = build (SubdomainSingleLabel l) s
+ where
+ -- Build up the first Subdomain on the left by peeling off the
+ -- leading elements of the second Subdomain.
+ build :: Subdomain -> Subdomain -> Subdomain
+ build dst (SubdomainSingleLabel final) = SubdomainMultipleLabel final dst
+ build dst (SubdomainMultipleLabel leading rest) =
+ build (SubdomainMultipleLabel leading dst) rest
+
+
+
+-- | Parse a 'Subdomain'. Here is the RFC1035 grammar for reference:
+--
+-- <subdomain> ::= <label> | <subdomain> "." <label>
+--
+-- In contrast with the grammar, we have reversed the order of the
+-- subdomain and label to prevent infinite recursion. The second
+-- option (subdomain + label) is obviously more specific, we we need
+-- to try it first. This presents a problem: we're trying to parse a
+-- subdomain in terms of a subdomain! The given grammar represents
+-- subdomains how we like to think of them; from right to left. But
+-- it's better to parse from left to right, so we pick off the
+-- leading label and then recurse into the definition of subdomain.
+--
+-- According to RFC1034 <https://tools.ietf.org/html/rfc1034>
+-- Section 3.1, \"3.1. Name space specifications and terminology\"
+-- two neighboring labels in a DNS name cannot be equal:
+--
+-- Each node has a label, which is zero to 63 octets in length. Brother
+-- nodes may not have the same label, although the same label can be used
+-- for nodes which are not brothers. One label is reserved, and that is
+-- the null (i.e., zero length) label used for the root.
+--
+-- We enforce this restriction, but the result is usually that we
+-- only parse the part of the subdomain leading up to the repeated
+-- label.
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parse, parseTest )
+--
+-- Make sure we can parse a single character:
+--
+-- >>> parseTest subdomain "a"
+-- SubdomainSingleLabel (Label (LetDigLetter (Letter 'a')) Nothing)
+--
+-- >>> pretty_print $ parse subdomain "" "example.com"
+-- example.com
+--
+-- >>> pretty_print $ parse subdomain "" "www.example.com"
+-- www.example.com
+--
+-- We reject a subdomain with equal neighbors, but this leads to
+-- only the single first label being parsed instead:
+--
+-- >>> pretty_print $ parse subdomain "" "www.www.example.com"
+-- www
+--
+-- But not one with a repeated but non-neighboring label:
+--
+-- >>> pretty_print $ parse subdomain "" "www.example.www.com"
+-- www.example.www.com
+--
+subdomain :: Parser Subdomain
+subdomain = try both <|> just_one
+ where
+ both :: Parser Subdomain
+ both = do
+ l <- label
+ _ <- char '.'
+ s <- subdomain
+ let result = SubdomainMultipleLabel l s
+ if (subdomain_has_equal_neighbors result)
+ then fail "subdomain cannot have equal neighboring labels"
+ else return result
+
+ just_one :: Parser Subdomain
+ just_one = fmap SubdomainSingleLabel label
+
+
+
+-- | Retrieve a list of labels contained in a 'Subdomain'.
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parse )
+--
+-- >>> let (Right r) = parse subdomain "" "a"
+-- >>> pretty_print $ subdomain_labels r
+-- ["a"]
+--
+-- >>> let (Right r) = parse subdomain "" "example.com"
+-- >>> pretty_print $ subdomain_labels r
+-- ["example","com"]
+--
+-- >>> let (Right r) = parse subdomain "" "www.example.com"
+-- >>> pretty_print $ subdomain_labels r
+-- ["www","example","com"]
+--
+subdomain_labels :: Subdomain -> [Label]
+subdomain_labels (SubdomainSingleLabel l) = [l]
+subdomain_labels (SubdomainMultipleLabel l s) = l : (subdomain_labels s)
+
+
+-- | Return a list of pairs of neighboring labels in a subdomain.
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parse )
+-- >>> let (Right r) = parse subdomain "" "www.example.com"
+-- >>> pretty_print $ subdomain_label_neighbors r
+-- ["(\"www\",\"example\")","(\"example\",\"com\")"]
+--
+subdomain_label_neighbors :: Subdomain -> [(Label,Label)]
+subdomain_label_neighbors s =
+ zip ls (tail ls)
+ where
+ ls = subdomain_labels s
+
+
+-- | Return @True@ if the subdomain has any two equal neighboring
+-- labels, and @False@ otherwise.
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parse )
+--
+-- >>> let (Right r) = parse subdomain "" "www.example.com"
+-- >>> subdomain_has_equal_neighbors r
+-- False
+--
+-- >>> let (Right l) = parse label "" "www"
+-- >>> let (Right s) = parse subdomain "" "www.example.com"
+-- >>> let bad_subdomain = SubdomainMultipleLabel l s
+-- >>> subdomain_has_equal_neighbors bad_subdomain
+-- True
+--
+subdomain_has_equal_neighbors :: Subdomain -> Bool
+subdomain_has_equal_neighbors s =
+ or [ x == y | (x,y) <- subdomain_label_neighbors s ]
try )
import Text.Parsec.String ( Parser )
-import Network.DNS.RBL.Domain (
+import Network.DNS.RBL.Domain.Domain (
Domain(..),
domain )
import Network.DNS.RBL.Pretty ( Pretty(..) )