import qualified Text.Parsec as Parsec ( digit, letter)
import Text.Parsec.String ( Parser )
-newtype Domain = Domain String deriving Show
+import Pretty
+-- * Digits
--- | The derived instance of 'Eq' for domain names is incorrect. All
--- comparisons are currently made case-insensitively. The following is
--- an excerpt from RFC1035:
---
--- 2.3.3. Character Case
---
--- For all parts of the DNS that are part of the official
--- protocol, all comparisons between character strings (e.g.,
--- labels, domain names, etc.) are done in a case-insensitive
--- manner...
+-- | A wrapper around a digit character.
--
--- So to compare two DNS names, we compare their lower-case
--- counterparts.
---
-instance Eq Domain where
- (Domain d1) == (Domain d2) =
- (map toLower d1) == (map toLower d2)
-
-
--- * Digits
newtype Digit = Digit Char deriving (Eq, Show)
+instance Pretty Digit where pretty_show (Digit d) = [d]
+-- | Parse a single digit, but wrap it in our 'Digit' type.
+--
digit :: Parser Digit
digit = fmap Digit Parsec.digit
-- * Letters
-newtype Letter = Letter Char deriving (Eq, Show)
+-- | A wrapper around a letter character.
+--
+newtype Letter = Letter Char deriving (Show)
+instance Pretty Letter where pretty_show (Letter l) = [l]
+
+
+-- | Parse a single letter, but wrap it in our 'Letter' type.
+--
letter :: Parser Letter
letter = fmap Letter Parsec.letter
+-- | The derived instance of 'Eq' for letters is incorrect. All
+-- comparisons should be made case-insensitively. The following
+-- is an excerpt from RFC1035:
+--
+-- 2.3.3. Character Case
+--
+-- For all parts of the DNS that are part of the official
+-- protocol, all comparisons between character strings (e.g.,
+-- labels, domain names, etc.) are done in a case-insensitive
+-- manner...
+--
+-- Since each part of DNS name is composed of our custom types, it
+-- suffices to munge the equality for 'Letter'.
+--
+instance Eq Letter where
+ (Letter l1) == (Letter l2) = (toLower l1) == (toLower l2)
-- * Letters/Digits
-data LetDig = LetDigLetter Letter | LetDigDigit Digit deriving (Eq, Show)
+-- | A sum type representing either a letter or a digit.
+--
+data LetDig =
+ LetDigLetter Letter |
+ LetDigDigit Digit
+ deriving (Eq, Show)
+
+instance Pretty LetDig where
+ pretty_show (LetDigLetter l) = pretty_show l
+ pretty_show (LetDigDigit d) = pretty_show d
+
+-- | Parse a letter or a digit and wrap it in our 'LetDig' type.
+--
let_dig :: Parser LetDig
let_dig = (fmap LetDigLetter letter) <|> (fmap LetDigDigit digit)
-- * Hyphens
+
+-- | A wrapper around a single hyphen character.
+--
newtype Hyphen = Hyphen Char deriving (Eq, Show)
+instance Pretty Hyphen where pretty_show (Hyphen h) = [h]
+-- | Parse a single hyphen and wrap it in our 'Hyphen' type.
+--
hyphen :: Parser Hyphen
hyphen = fmap Hyphen (char '-')
-- * Letter, Digit, or Hyphen.
-data LetDigHyp = LetDigHypLetDig LetDig
- | LetDigHypHyphen Hyphen
+-- | A sum type representing a letter, digit, or hyphen.
+--
+data LetDigHyp =
+ LetDigHypLetDig LetDig |
+ LetDigHypHyphen Hyphen
deriving (Eq, Show)
+instance Pretty LetDigHyp where
+ pretty_show (LetDigHypLetDig ld) = pretty_show ld
+ pretty_show (LetDigHypHyphen h) = pretty_show h
+
-- | The following is the simplest type in the domain grammar that
-- isn't already implemented for us.
-- These are represented as either a single instance of a
-- 'LetDigHyp', or a string of them (recursive).
--
+data LdhStr =
+ LdhStrSingleLdh LetDigHyp |
+ LdhStrMultipleLdh LetDigHyp LdhStr
+ deriving (Eq, Show)
+
+instance Pretty LdhStr where
+ pretty_show (LdhStrSingleLdh ldh) = pretty_show ldh
+ pretty_show (LdhStrMultipleLdh ldh s) = (pretty_show ldh) ++ (pretty_show s)
+
+-- | Parse a string of letters, digits, and hyphens (an 'LdhStr').
+--
-- ==== _Examples_
--
-- >>> import Text.Parsec ( parseTest )
--
-- As well as strings of them:
--
--- LdhStr (LetDigHypLetter (Letter 'a')) (LdhStr (LetDigHypDigit (Digit '0')) (LdhStr (LetDigHypHyphen (Hyphen '-')) (LdhStrSingleLdh (LetDigHypLetter (Letter 'b')))))
+-- >>> pretty_print $ parse ldh_str "" "a0-b"
+-- a0-b
--
-data LdhStr =
- LdhStrSingleLdh LetDigHyp | LdhStrMultipleLdh LetDigHyp LdhStr
- deriving (Eq, Show)
-
ldh_str :: Parser LdhStr
ldh_str = try both <|> just_one
where
data LdhStrLetDig = LdhStrLetDig (Maybe LdhStr) LetDig
deriving (Eq, Show)
+instance Pretty LdhStrLetDig where
+ pretty_show (LdhStrLetDig Nothing ld) = pretty_show ld
+ pretty_show (LdhStrLetDig (Just s) ld) = (pretty_show s) ++ (pretty_show ld)
+
-- | Parse an 'LdhStrLetDig'. This isn't in the grammar, but we might
-- as well define the parser for it independently since we gave it
-- its own data type.
--
-- And longer strings:
--
--- >>> parseTest ldh_str_let_dig "ab"
--- LdhStrLetDig (Just (LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a'))))) (LetDigLetter (Letter 'b'))
+-- >>> pretty_print $ parse ldh_str_let_dig "" "ab"
+-- ab
--
--- >>> parseTest ldh_str_let_dig "-b"
--- LdhStrLetDig (Just (LdhStrSingleLdh (LetDigHypHyphen (Hyphen '-')))) (LetDigLetter (Letter 'b'))
+-- >>> pretty_print $ parse ldh_str_let_dig "" "-b"
+-- -b
--
-- >>> parseTest ldh_str_let_dig "b-"
-- parse error at (line 1, column 3):
data Label = Label Letter (Maybe LdhStrLetDig)
deriving (Eq, Show)
+instance Pretty Label where
+ pretty_show (Label l Nothing) = pretty_show l
+ pretty_show (Label l (Just s)) = (pretty_show l) ++ (pretty_show s)
-- | Parse a 'Label'.
--
--
-- And longer strings:
--
--- >>> parseTest label "abc-def"
--- Label (Letter 'a') (Just (LdhStrLetDig (Just (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'b'))) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'c'))) (LdhStrMultipleLdh (LetDigHypHyphen (Hyphen '-')) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'd'))) (LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'e'))))))))) (LetDigLetter (Letter 'f'))))
+-- >>> pretty_print $ parse label "" "abc-def"
+-- abc-def
--
-- But not anything ending in a hyphen:
--
--
-- However, /exactly/ 63 characters is acceptable:
--
--- TODO
+-- >>> pretty_print $ parse label "" (replicate 63 'x')
+-- xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
--
label :: Parser Label
label = do
else fail "labels must be 63 or fewer characters"
+
+-- * Subdomains
+
+
+-- | The data type representing a \"subdomain\" from RFC1035:
+--
+-- <subdomain> ::= <label> | <subdomain> "." <label>
+--
+-- We have reversed the order of the subdomain and label in the
+-- second option, however. This is explained in 'subdomain'.
+--
+data Subdomain =
+ SubdomainSingleLabel Label |
+ SubdomainMultipleLabel Label Subdomain
+ deriving (Eq, Show)
+
+instance Pretty Subdomain where
+ pretty_show (SubdomainSingleLabel l) = pretty_show l
+ pretty_show (SubdomainMultipleLabel l s) =
+ (pretty_show l) ++ "." ++ (pretty_show s)
+
+-- | Parse an RFC1035 \"subdomain\". The given grammar is,
+--
+-- <subdomain> ::= <label> | <subdomain> "." <label>
+--
+-- However, we have reversed the order of the subdomain and label to
+-- prevent infinite recursion. The second option (subdomain + label)
+-- is obviously more specific, we we need to try it first. This
+-- presents a problem: we're trying to parse a subdomain in terms of
+-- a subdomain! The given grammar represents subdomains how we like
+-- to think of them; from right to left. But it's better to parse
+-- from left to right, so we pick off the leading label and then
+-- recurse into the definition of subdomain.
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parseTest )
+--
+-- Make sure we can parse a single character:
+--
+-- >>> parseTest subdomain "a"
+-- SubdomainSingleLabel (Label (Letter 'a') Nothing)
+--
+-- >>> pretty_print $ parse subdomain "" "example.com"
+-- example.com
+--
+-- >>> pretty_print $ parse subdomain "" "www.example.com"
+-- www.example.com
+--
+subdomain :: Parser Subdomain
+subdomain = try both <|> just_one
+ where
+ both :: Parser Subdomain
+ both = do
+ l <- label
+ char '.'
+ s <- subdomain
+ return (SubdomainMultipleLabel l s)
+
+ just_one :: Parser Subdomain
+ just_one = fmap SubdomainSingleLabel label
+
+
+
+-- * Domains
+
+-- | An RFC1035 domain. According to RFC1035 a domain can be either a
+-- subdomain or \" \", which according to RFC2181
+-- <https://tools.ietf.org/html/rfc2181#section-11> means the root:
+--
+-- The zero length full name is defined as representing the root
+-- of the DNS tree, and is typically written and displayed as
+-- \".\".
+--
+-- We let the 'Domain' type remain true to those RFCs, even though
+-- they don't support an absolute domain name of e.g. a single dot.
+-- We have one more data type 'UserDomain' which handles the possibility
+-- of an absolute path.
+--
+data Domain =
+ DomainName Subdomain |
+ DomainRoot
+ deriving (Eq, Show)
+
+instance Pretty Domain where
+ pretty_show DomainRoot = ""
+ pretty_show (DomainName s) = pretty_show s
+
+-- | Parse an RFC1035 \"domain\"
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parseTest )
+--
+-- Make sure we can parse a single character:
+--
+-- >>> parseTest domain "a"
+-- DomainName (SubdomainSingleLabel (Label (Letter 'a') Nothing))
+--
+-- And the empty domain:
+--
+-- >>> parseTest domain ""
+-- DomainRoot
+--
+-- We will in fact parse the \"empty\" domain off the front of
+-- pretty much anything:
+--
+-- >>> parseTest domain "8===D"
+-- DomainRoot
+--
+-- Equality of domains is case-insensitive:
+--
+-- >>> let (Right r1) = parse domain "" "example.com"
+-- >>> let (Right r2) = parse domain "" "ExaMPle.coM"
+-- >>> r1 == r2
+-- True
+--
+-- A single dot IS parsed as the root, but the dot isn't consumed:
+--
+-- >>> parseTest domain "."
+-- DomainRoot
+--
domain :: Parser Domain
-domain = undefined
+domain = try parse_subdomain <|> parse_empty
+ where
+ parse_subdomain = fmap DomainName subdomain
+ parse_empty = string "" >> return DomainRoot
+
+
+
+-- * User domains
+
+-- | This type helps clarify some murkiness in the DNS \"domain\" name
+-- specification. In RFC1034, it is acknowledged that a domain name
+-- input with a trailing \".\" will represent an absolute domain
+-- name (i.e. with respect to the DNS root). However, the grammar in
+-- RFC1035 disallows a trailing dot.
+--
+-- This makes some sense: within the DNS, everything knows its
+-- position in the tree. The relative/absolute distinction only
+-- makes sense on the client side, where a user's resolver might
+-- decide to append some suffix to a relative
+-- request. Unfortunately, that's where we live. So we have to deal
+-- with the possibility of having a trailing dot at the end of any
+-- domain name.
+--
+data UserDomain =
+ UserDomainRelative Domain |
+ UserDomainAbsolute Domain
+ deriving (Eq, Show)
+
+instance Pretty UserDomain where
+ pretty_show (UserDomainRelative d) = pretty_show d
+ pretty_show (UserDomainAbsolute d) = (pretty_show d) ++ "."
+
---subdomain :: Parser Subdomain
---subdomain = undefined
+-- | Parse a 'UserDomain'. This is what we'll be using to read user
+-- input, since it supports both relative and absolute domain names
+-- (unlike the implicitly-absolute 'Domain').
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parseTest )
+--
+-- We can really parse the root now!
+--
+-- >>> parseTest user_domain "."
+-- UserDomainAbsolute DomainRoot
+--
+-- But multiple dots aren't (only the first):
+--
+-- >>> pretty_print $ parse user_domain "" ".."
+-- .
+--
+-- We can also optionally have a trailing dot at the end of a
+-- non-empty name:
+--
+-- >>> pretty_print $ parse user_domain "" "www.example.com"
+-- www.example.com
+--
+-- >>> pretty_print $ parse user_domain "" "www.example.com."
+-- www.example.com.
+--
+-- A \"relative root\" can also be parsed, letting the user's
+-- resolver deal with it:
+--
+-- >>> parseTest user_domain ""
+-- UserDomainRelative DomainRoot
+--
+user_domain :: Parser UserDomain
+user_domain = try absolute <|> relative
+ where
+ absolute :: Parser UserDomain
+ absolute = do
+ d <- domain
+ r <- char '.'
+ return $ UserDomainAbsolute d
+
+ relative :: Parser UserDomain
+ relative = fmap UserDomainRelative domain