X-Git-Url: http://gitweb.michael.orlitzky.com/?p=dead%2Fharbl.git;a=blobdiff_plain;f=harbl%2Fsrc%2FNetwork%2FDNS%2FRBL%2FDomain%2FDomain.hs;fp=harbl%2Fsrc%2FNetwork%2FDNS%2FRBL%2FDomain%2FDomain.hs;h=1049e887739c15dcd8481193e3417f2cdf2dd0a5;hp=0000000000000000000000000000000000000000;hb=5c8702f0be60474482c587ba353e01ddf24f79cc;hpb=014dacb6ef0e93d0e67ebb154c397e999431469f diff --git a/harbl/src/Network/DNS/RBL/Domain/Domain.hs b/harbl/src/Network/DNS/RBL/Domain/Domain.hs new file mode 100644 index 0000000..1049e88 --- /dev/null +++ b/harbl/src/Network/DNS/RBL/Domain/Domain.hs @@ -0,0 +1,180 @@ +{-# LANGUAGE DoAndIfThenElse #-} + +-- | The 'Domain' data type and its parser. A 'Domain' represents a +-- name in the domain name system (DNS) as described by +-- RFC1035. In particular, we enforce the restrictions from Section +-- 2.3.1 \"Preferred name syntax\". See for example, +-- +-- +-- +-- We basically work with strings and characters everywhere, even +-- though this isn't really correct. The length specifications in +-- the RFCs are all in terms of octets, so really a ByteString.Char8 +-- would be more appropriate. With strings, for example, we could +-- have a unicode mumbo jumbo character that takes up two bytes +-- (octets). +-- +module Network.DNS.RBL.Domain.Domain ( + Domain(..), + domain ) +where + +import Text.Parsec ( + (<|>), + string, + try ) +import Text.Parsec.String ( Parser ) + +import Network.DNS.RBL.Domain.Subdomain ( Subdomain, subdomain ) +import Network.DNS.RBL.Pretty ( Pretty(..) ) +import Network.DNS.RBL.Reversible ( Reversible(..) ) + + + +-- | An RFC1035 domain. According to RFC1035 a domain can be either a +-- subdomain or \" \", which according to RFC2181 +-- means the root: +-- +-- The zero length full name is defined as representing the root +-- of the DNS tree, and is typically written and displayed as +-- \".\". +-- +-- We let the 'Domain' type remain true to those RFCs, even though +-- they don't support an absolute domain name of e.g. a single dot. +-- +-- ==== _Examples_ +-- +-- >>> DomainRoot +-- DomainRoot +-- +-- >>> import Text.Parsec ( parse ) +-- >>> let s = parse subdomain "" "x" +-- >>> DomainName s +-- DomainName (SubdomainSingleLabel (Label (LetDigLetter (Letter 'x')) Nothing)) +-- +data Domain = + DomainName Subdomain | + DomainRoot + deriving (Eq, Show) + + +-- | Pretty-print a 'Domain'. +-- +-- ==== _Examples_ +-- +-- >>> pretty_show $ DomainRoot +-- "" +-- +-- >>> import Text.Parsec ( parse ) +-- >>> let s = parse subdomain "" "x" +-- >>> pretty_print $ DomainName s +-- x +-- +instance Pretty Domain where + pretty_show DomainRoot = "" + pretty_show (DomainName s) = pretty_show s + + +-- | The maximum number of characters (octets, really) allowed in a +-- label. Quoting Section 3.1, \"Name space definitions\", of +-- RFC1035: +-- +-- To simplify implementations, the total length of a domain name +-- (i.e., label octets and label length octets) is restricted to 255 +-- octets or less. +-- +domain_max_length :: Int +domain_max_length = 255 + + +-- | Parse an RFC1035 \"domain\" +-- +-- ==== _Examples_ +-- +-- >>> import Text.Parsec ( parse, parseTest ) +-- +-- Make sure we can parse a single character: +-- +-- >>> pretty_print $ parse domain "" "a" +-- a +-- +-- And the empty domain: +-- +-- >>> parseTest domain "" +-- DomainRoot +-- +-- We will in fact parse the \"empty\" domain off the front of +-- pretty much anything: +-- +-- >>> parseTest domain "!8===D" +-- DomainRoot +-- +-- Equality of domains is case-insensitive: +-- +-- >>> let (Right r1) = parse domain "" "example.com" +-- >>> let (Right r2) = parse domain "" "ExaMPle.coM" +-- >>> r1 == r2 +-- True +-- +-- A single dot IS parsed as the root, but the dot isn't consumed: +-- +-- >>> parseTest domain "." +-- DomainRoot +-- +-- Anything over domain_max_length characters is an error, so the +-- root will be parsed: +-- +-- >>> let big_l1 = replicate 63 'x' +-- >>> let big_l2 = replicate 63 'y' -- Avoid equal neighboring labels! +-- >>> let big_labels = big_l1 ++ "." ++ big_l2 ++ "." +-- >>> let big_subdomain = concat $ replicate 3 big_labels +-- >>> parseTest domain big_subdomain +-- DomainRoot +-- +-- But exactly domain_max_length is allowed: +-- +-- >>> import Data.List ( intercalate ) +-- >>> let l1 = replicate 63 'w' +-- >>> let l2 = replicate 63 'x' +-- >>> let l3 = replicate 63 'y' +-- >>> let l4 = replicate 63 'z' +-- >>> let big_subdomain = intercalate "." [l1,l2,l3,l4] +-- >>> let (Right r) = parse domain "" big_subdomain +-- >>> length (pretty_show r) +-- 255 +-- +domain :: Parser Domain +domain = try parse_subdomain <|> parse_empty + where + parse_subdomain :: Parser Domain + parse_subdomain = do + s <- subdomain + if length (pretty_show s) <= domain_max_length + then return $ DomainName s + else fail $ "subdomains can be at most " ++ + (show domain_max_length) ++ + " characters" + + parse_empty :: Parser Domain + parse_empty = string "" >> return DomainRoot + + +instance Reversible Domain where + -- | Reverse the labels of a 'Domain'. + -- + -- -- ==== _Examples_ + -- + -- >>> import Text.Parsec ( parse ) + -- + -- The root reverses to itself: + -- + -- >>> let (Right r) = parse domain "" "" + -- >>> backwards r + -- DomainRoot + -- + -- >>> let (Right r) = parse domain "" "new.www.example.com" + -- >>> pretty_print $ backwards r + -- com.example.www.new + -- + backwards DomainRoot = DomainRoot + backwards (DomainName s) = DomainName $ backwards s