-- | The 'Domain' data type and its parser. A 'Domain' represents a -- name in the domain name system (DNS) as described by -- RFC1035. In particular, we enforce the restrictions from Section -- 2.3.1 \"Preferred name syntax\". See for example, -- -- -- module Domain where import Data.Char ( toLower ) import Text.Parsec ( ParseError, (<|>), alphaNum, char, eof, many1, option, optionMaybe, parse, string, try, unexpected ) import qualified Text.Parsec as Parsec ( digit, letter) import Text.Parsec.String ( Parser ) import Pretty -- * Digits -- | A wrapper around a digit character. -- newtype Digit = Digit Char deriving (Eq, Show) instance Pretty Digit where pretty_show (Digit d) = [d] -- | Parse a single digit, but wrap it in our 'Digit' type. -- digit :: Parser Digit digit = fmap Digit Parsec.digit -- * Letters -- | A wrapper around a letter character. -- newtype Letter = Letter Char deriving (Show) instance Pretty Letter where pretty_show (Letter l) = [l] -- | Parse a single letter, but wrap it in our 'Letter' type. -- letter :: Parser Letter letter = fmap Letter Parsec.letter -- | The derived instance of 'Eq' for letters is incorrect. All -- comparisons should be made case-insensitively. The following -- is an excerpt from RFC1035: -- -- 2.3.3. Character Case -- -- For all parts of the DNS that are part of the official -- protocol, all comparisons between character strings (e.g., -- labels, domain names, etc.) are done in a case-insensitive -- manner... -- -- Since each part of DNS name is composed of our custom types, it -- suffices to munge the equality for 'Letter'. -- instance Eq Letter where (Letter l1) == (Letter l2) = (toLower l1) == (toLower l2) -- * Letters/Digits -- | A sum type representing either a letter or a digit. -- data LetDig = LetDigLetter Letter | LetDigDigit Digit deriving (Eq, Show) instance Pretty LetDig where pretty_show (LetDigLetter l) = pretty_show l pretty_show (LetDigDigit d) = pretty_show d -- | Parse a letter or a digit and wrap it in our 'LetDig' type. -- let_dig :: Parser LetDig let_dig = (fmap LetDigLetter letter) <|> (fmap LetDigDigit digit) -- * Hyphens -- | A wrapper around a single hyphen character. -- newtype Hyphen = Hyphen Char deriving (Eq, Show) instance Pretty Hyphen where pretty_show (Hyphen h) = [h] -- | Parse a single hyphen and wrap it in our 'Hyphen' type. -- hyphen :: Parser Hyphen hyphen = fmap Hyphen (char '-') -- * Letter, Digit, or Hyphen. -- | A sum type representing a letter, digit, or hyphen. -- data LetDigHyp = LetDigHypLetDig LetDig | LetDigHypHyphen Hyphen deriving (Eq, Show) instance Pretty LetDigHyp where pretty_show (LetDigHypLetDig ld) = pretty_show ld pretty_show (LetDigHypHyphen h) = pretty_show h -- | The following is the simplest type in the domain grammar that -- isn't already implemented for us. -- -- ::= | -- -- ==== _Examples_ -- -- >>> import Text.Parsec ( parseTest ) -- -- Letters, digits, and hyphens are all parsed: -- -- >>> parseTest let_dig_hyp "a" -- LetDigHypLetDig (LetDigLetter (Letter 'a')) -- -- >>> parseTest let_dig_hyp "7" -- LetDigHypLetDig (LetDigDigit (Digit '7')) -- -- >>> parseTest let_dig_hyp "-" -- LetDigHypHyphen (Hyphen '-') -- -- However, an underscore (for example) is not: -- -- >>> parseTest let_dig_hyp "_" -- parse error at (line 1, column 1): -- unexpected "_" -- expecting letter, digit or "-" -- let_dig_hyp :: Parser LetDigHyp let_dig_hyp = parse_letdig <|> parse_hyphen where parse_letdig :: Parser LetDigHyp parse_letdig = fmap LetDigHypLetDig let_dig parse_hyphen :: Parser LetDigHyp parse_hyphen = fmap LetDigHypHyphen hyphen -- * Letter/Digit/Hyphen strings -- | A string of letters, digits, and hyphens from the RFC1035 grammar: -- -- ::= | -- -- These are represented as either a single instance of a -- 'LetDigHyp', or a string of them (recursive). -- data LdhStr = LdhStrSingleLdh LetDigHyp | LdhStrMultipleLdh LetDigHyp LdhStr deriving (Eq, Show) instance Pretty LdhStr where pretty_show (LdhStrSingleLdh ldh) = pretty_show ldh pretty_show (LdhStrMultipleLdh ldh s) = (pretty_show ldh) ++ (pretty_show s) -- | Parse a string of letters, digits, and hyphens (an 'LdhStr'). -- -- ==== _Examples_ -- -- >>> import Text.Parsec ( parseTest ) -- -- Single letters, digits, and hyphens are parsed: -- -- >>> parseTest ldh_str "a" -- LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a'))) -- -- >>> parseTest ldh_str "0" -- LdhStrSingleLdh (LetDigHypLetDig (LetDigDigit (Digit '0'))) -- -- >>> parseTest ldh_str "-" -- LdhStrSingleLdh (LetDigHypHyphen (Hyphen '-')) -- -- As well as strings of them: -- -- >>> pretty_print $ parse ldh_str "" "a0-b" -- a0-b -- ldh_str :: Parser LdhStr ldh_str = try both <|> just_one where both :: Parser LdhStr both = do ldh1 <- let_dig_hyp ldh_tail <- ldh_str return $ LdhStrMultipleLdh ldh1 ldh_tail just_one :: Parser LdhStr just_one = fmap LdhStrSingleLdh let_dig_hyp -- | A version of 'last' that works on a 'LdhStr' rather than a -- list. That is, it returns the last 'LetDigHyp' in the -- string. Since 'LdhStr' contains at least one character, there's -- no \"nil\" case here. -- -- ==== _Examples_ -- -- >>> let (Right r) = parse ldh_str "" "a" -- >>> last_ldh_str r -- LetDigHypLetDig (LetDigLetter (Letter 'a')) -- -- >>> let (Right r) = parse ldh_str "" "abc-def" -- >>> last_ldh_str r -- LetDigHypLetDig (LetDigLetter (Letter 'f')) -- last_ldh_str :: LdhStr -> LetDigHyp last_ldh_str (LdhStrSingleLdh x) = x last_ldh_str (LdhStrMultipleLdh _ x) = last_ldh_str x -- | A version of 'init' that works on a 'LdhStr' rather than a -- list. That is, it returns everything /except/ the last character in -- the string. -- -- Since an 'LdhStr' must contain at least one character, this might -- not be opssible (when the input is of length one). So, we return -- a 'Maybe' value. -- -- ==== _Examples_ -- -- >>> let (Right r) = parse ldh_str "" "a" -- >>> init_ldh_str r -- Nothing -- -- >>> let (Right r) = parse ldh_str "" "ab" -- >>> init_ldh_str r -- Just (LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a')))) -- -- >>> let (Right r) = parse ldh_str "" "abc-def" -- >>> init_ldh_str r -- Just (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a'))) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'b'))) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'c'))) (LdhStrMultipleLdh (LetDigHypHyphen (Hyphen '-')) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'd'))) (LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'e'))))))))) -- init_ldh_str :: LdhStr -> Maybe LdhStr init_ldh_str (LdhStrSingleLdh _) = Nothing init_ldh_str (LdhStrMultipleLdh h t) = Just $ case (init_ldh_str t) of -- We just got the second-to-last character, we're done. Nothing -> LdhStrSingleLdh h -- There's still more stuff. Recurse. Just rest -> LdhStrMultipleLdh h rest -- | Compute the length of an 'LdhStr'. It will be at least one, since -- 'LdhStr's are non-empty. And if there's something other than the -- first character present, we simply recurse. -- -- ==== _Examples_ -- -- >>> let (Right r) = parse ldh_str "" "a" -- >>> length_ldh_str r -- 1 -- -- >>> let (Right r) = parse ldh_str "" "abc-def" -- >>> length_ldh_str r -- 7 -- length_ldh_str :: LdhStr -> Int length_ldh_str (LdhStrSingleLdh _) = 1 length_ldh_str (LdhStrMultipleLdh _ t) = 1 + (length_ldh_str t) -- * Letter/Digit/Hyphen string followed by a trailing Letter/Digit -- | This type isn't explicitly part of the grammar, but it's what -- shows up in the square brackets of, -- --