X-Git-Url: http://gitweb.michael.orlitzky.com/?p=dead%2Fharbl.git;a=blobdiff_plain;f=src%2FDomain.hs;h=87a63d9757f8bb2c6cfda6bac49651101a3153ab;hp=ce20aae25c078f8ceec150a319cd39a4eb38b52d;hb=80b389fd4d76bc8b2cb5dfad0f066fd7a838bdfb;hpb=e1060ef815f35309c7ca0800a345d7c54ce346bd diff --git a/src/Domain.hs b/src/Domain.hs index ce20aae..87a63d9 100644 --- a/src/Domain.hs +++ b/src/Domain.hs @@ -1,3 +1,5 @@ +{-# LANGUAGE DoAndIfThenElse #-} + -- | The 'Domain' data type and its parser. A 'Domain' represents a -- name in the domain name system (DNS) as described by -- RFC1035. In particular, we enforce the restrictions from Section @@ -5,82 +7,122 @@ -- -- -- -module Domain +-- We basically work with strings and characters everywhere, even +-- though this isn't really correct. The length specifications in +-- the RFCs are all in terms of octets, so really a ByteString.Char8 +-- would be more appropriate. With strings, for example, we could +-- have a unicode mumbo jumbo character that takes up two bytes +-- (octets). +-- +module Domain ( + UserDomain, + user_domain ) where import Data.Char ( toLower ) import Text.Parsec ( - ParseError, (<|>), - alphaNum, char, - eof, - many1, - option, optionMaybe, - parse, string, - try, - unexpected ) + try ) import qualified Text.Parsec as Parsec ( digit, letter) import Text.Parsec.String ( Parser ) -newtype Domain = Domain String deriving Show +import Pretty ( Pretty(..) ) +-- * Digits --- | The derived instance of 'Eq' for domain names is incorrect. All --- comparisons are currently made case-insensitively. The following is --- an excerpt from RFC1035: --- --- 2.3.3. Character Case --- --- For all parts of the DNS that are part of the official --- protocol, all comparisons between character strings (e.g., --- labels, domain names, etc.) are done in a case-insensitive --- manner... --- --- So to compare two DNS names, we compare their lower-case --- counterparts. +-- | A wrapper around a digit character. -- -instance Eq Domain where - (Domain d1) == (Domain d2) = - (map toLower d1) == (map toLower d2) - - --- * Digits newtype Digit = Digit Char deriving (Eq, Show) +instance Pretty Digit where pretty_show (Digit d) = [d] +-- | Parse a single digit, but wrap it in our 'Digit' type. +-- digit :: Parser Digit digit = fmap Digit Parsec.digit -- * Letters -newtype Letter = Letter Char deriving (Eq, Show) +-- | A wrapper around a letter character. +-- +newtype Letter = Letter Char deriving (Show) +instance Pretty Letter where pretty_show (Letter l) = [l] + + +-- | Parse a single letter, but wrap it in our 'Letter' type. +-- letter :: Parser Letter letter = fmap Letter Parsec.letter +-- | The derived instance of 'Eq' for letters is incorrect. All +-- comparisons should be made case-insensitively. The following +-- is an excerpt from RFC1035: +-- +-- 2.3.3. Character Case +-- +-- For all parts of the DNS that are part of the official +-- protocol, all comparisons between character strings (e.g., +-- labels, domain names, etc.) are done in a case-insensitive +-- manner... +-- +-- Since each part of DNS name is composed of our custom types, it +-- suffices to munge the equality for 'Letter'. RFC4343 +-- clarifies the +-- case-insensitivity rules, but the fact that we're treating DNS +-- names as strings makes most of those problems go away (in +-- exchange for new ones). +-- +instance Eq Letter where + (Letter l1) == (Letter l2) = (toLower l1) == (toLower l2) -- * Letters/Digits -data LetDig = LetDigLetter Letter | LetDigDigit Digit deriving (Eq, Show) +-- | A sum type representing either a letter or a digit. +-- +data LetDig = + LetDigLetter Letter | + LetDigDigit Digit + deriving (Eq, Show) + +instance Pretty LetDig where + pretty_show (LetDigLetter l) = pretty_show l + pretty_show (LetDigDigit d) = pretty_show d + +-- | Parse a letter or a digit and wrap it in our 'LetDig' type. +-- let_dig :: Parser LetDig let_dig = (fmap LetDigLetter letter) <|> (fmap LetDigDigit digit) -- * Hyphens + +-- | A wrapper around a single hyphen character. +-- newtype Hyphen = Hyphen Char deriving (Eq, Show) +instance Pretty Hyphen where pretty_show (Hyphen h) = [h] +-- | Parse a single hyphen and wrap it in our 'Hyphen' type. +-- hyphen :: Parser Hyphen hyphen = fmap Hyphen (char '-') -- * Letter, Digit, or Hyphen. -data LetDigHyp = LetDigHypLetDig LetDig - | LetDigHypHyphen Hyphen +-- | A sum type representing a letter, digit, or hyphen. +-- +data LetDigHyp = + LetDigHypLetDig LetDig | + LetDigHypHyphen Hyphen deriving (Eq, Show) +instance Pretty LetDigHyp where + pretty_show (LetDigHypLetDig ld) = pretty_show ld + pretty_show (LetDigHypHyphen h) = pretty_show h + -- | The following is the simplest type in the domain grammar that -- isn't already implemented for us. @@ -129,6 +171,17 @@ let_dig_hyp = -- These are represented as either a single instance of a -- 'LetDigHyp', or a string of them (recursive). -- +data LdhStr = + LdhStrSingleLdh LetDigHyp | + LdhStrMultipleLdh LetDigHyp LdhStr + deriving (Eq, Show) + +instance Pretty LdhStr where + pretty_show (LdhStrSingleLdh ldh) = pretty_show ldh + pretty_show (LdhStrMultipleLdh ldh s) = (pretty_show ldh) ++ (pretty_show s) + +-- | Parse a string of letters, digits, and hyphens (an 'LdhStr'). +-- -- ==== _Examples_ -- -- >>> import Text.Parsec ( parseTest ) @@ -146,12 +199,10 @@ let_dig_hyp = -- -- As well as strings of them: -- --- LdhStr (LetDigHypLetter (Letter 'a')) (LdhStr (LetDigHypDigit (Digit '0')) (LdhStr (LetDigHypHyphen (Hyphen '-')) (LdhStrSingleLdh (LetDigHypLetter (Letter 'b'))))) +-- >>> import Text.Parsec ( parse ) +-- >>> pretty_print $ parse ldh_str "" "a0-b" +-- a0-b -- -data LdhStr = - LdhStrSingleLdh LetDigHyp | LdhStrMultipleLdh LetDigHyp LdhStr - deriving (Eq, Show) - ldh_str :: Parser LdhStr ldh_str = try both <|> just_one where @@ -173,6 +224,8 @@ ldh_str = try both <|> just_one -- -- ==== _Examples_ -- +-- >>> import Text.Parsec ( parse ) +-- -- >>> let (Right r) = parse ldh_str "" "a" -- >>> last_ldh_str r -- LetDigHypLetDig (LetDigLetter (Letter 'a')) @@ -196,6 +249,8 @@ last_ldh_str (LdhStrMultipleLdh _ x) = last_ldh_str x -- -- ==== _Examples_ -- +-- >>> import Text.Parsec ( parse ) +-- -- >>> let (Right r) = parse ldh_str "" "a" -- >>> init_ldh_str r -- Nothing @@ -225,6 +280,8 @@ init_ldh_str (LdhStrMultipleLdh h t) = -- -- ==== _Examples_ -- +-- >>> import Text.Parsec ( parse ) +-- -- >>> let (Right r) = parse ldh_str "" "a" -- >>> length_ldh_str r -- 1 @@ -252,13 +309,17 @@ length_ldh_str (LdhStrMultipleLdh _ t) = 1 + (length_ldh_str t) data LdhStrLetDig = LdhStrLetDig (Maybe LdhStr) LetDig deriving (Eq, Show) +instance Pretty LdhStrLetDig where + pretty_show (LdhStrLetDig Nothing ld) = pretty_show ld + pretty_show (LdhStrLetDig (Just s) ld) = (pretty_show s) ++ (pretty_show ld) + -- | Parse an 'LdhStrLetDig'. This isn't in the grammar, but we might -- as well define the parser for it independently since we gave it -- its own data type. -- -- ==== _Examples_ -- --- >>> import Text.Parsec ( parseTest ) +-- >>> import Text.Parsec ( parse, parseTest ) -- -- Make sure we can parse a single character: -- @@ -267,11 +328,11 @@ data LdhStrLetDig = LdhStrLetDig (Maybe LdhStr) LetDig -- -- And longer strings: -- --- >>> parseTest ldh_str_let_dig "ab" --- LdhStrLetDig (Just (LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a'))))) (LetDigLetter (Letter 'b')) +-- >>> pretty_print $ parse ldh_str_let_dig "" "ab" +-- ab -- --- >>> parseTest ldh_str_let_dig "-b" --- LdhStrLetDig (Just (LdhStrSingleLdh (LetDigHypHyphen (Hyphen '-')))) (LetDigLetter (Letter 'b')) +-- >>> pretty_print $ parse ldh_str_let_dig "" "-b" +-- -b -- -- >>> parseTest ldh_str_let_dig "b-" -- parse error at (line 1, column 3): @@ -309,6 +370,8 @@ ldh_str_let_dig = do -- -- ==== _Examples_ -- +-- >>> import Text.Parsec ( parse ) +-- -- >>> let (Right r) = parse ldh_str_let_dig "" "a" -- >>> length_ldh_str_let_dig r -- 1 @@ -329,9 +392,20 @@ length_ldh_str_let_dig (LdhStrLetDig (Just ldhstring) _) = -- --