X-Git-Url: http://gitweb.michael.orlitzky.com/?a=blobdiff_plain;f=src%2FNetwork%2FDNS%2FRBL%2FDomain.hs;fp=src%2FNetwork%2FDNS%2FRBL%2FDomain.hs;h=a405ff1bf6f20f2eec3165bb3a82b9bd68b37b93;hb=bc28c407970d9ff1bfeacc88363fd6d23c0af440;hp=0000000000000000000000000000000000000000;hpb=e093d003defb7948f17927091c3e73a250d53e6c;p=dead%2Fharbl.git diff --git a/src/Network/DNS/RBL/Domain.hs b/src/Network/DNS/RBL/Domain.hs new file mode 100644 index 0000000..a405ff1 --- /dev/null +++ b/src/Network/DNS/RBL/Domain.hs @@ -0,0 +1,789 @@ +{-# LANGUAGE DoAndIfThenElse #-} + +-- | The 'Domain' data type and its parser. A 'Domain' represents a +-- name in the domain name system (DNS) as described by +-- RFC1035. In particular, we enforce the restrictions from Section +-- 2.3.1 \"Preferred name syntax\". See for example, +-- +-- +-- +-- We basically work with strings and characters everywhere, even +-- though this isn't really correct. The length specifications in +-- the RFCs are all in terms of octets, so really a ByteString.Char8 +-- would be more appropriate. With strings, for example, we could +-- have a unicode mumbo jumbo character that takes up two bytes +-- (octets). +-- +module Network.DNS.RBL.Domain ( + UserDomain, + user_domain ) +where + +import Data.Char ( toLower ) +import Text.Parsec ( + (<|>), + char, + optionMaybe, + string, + try ) +import qualified Text.Parsec as Parsec ( digit, letter) +import Text.Parsec.String ( Parser ) + +import Network.DNS.RBL.Pretty ( Pretty(..) ) + +-- * Digits + +-- | A wrapper around a digit character. +-- +newtype Digit = Digit Char deriving (Eq, Show) +instance Pretty Digit where pretty_show (Digit d) = [d] + +-- | Parse a single digit, but wrap it in our 'Digit' type. +-- +digit :: Parser Digit +digit = fmap Digit Parsec.digit + + +-- * Letters + +-- | A wrapper around a letter character. +-- +newtype Letter = Letter Char deriving (Show) +instance Pretty Letter where pretty_show (Letter l) = [l] + + +-- | Parse a single letter, but wrap it in our 'Letter' type. +-- +letter :: Parser Letter +letter = fmap Letter Parsec.letter + +-- | The derived instance of 'Eq' for letters is incorrect. All +-- comparisons should be made case-insensitively. The following +-- is an excerpt from RFC1035: +-- +-- 2.3.3. Character Case +-- +-- For all parts of the DNS that are part of the official +-- protocol, all comparisons between character strings (e.g., +-- labels, domain names, etc.) are done in a case-insensitive +-- manner... +-- +-- Since each part of DNS name is composed of our custom types, it +-- suffices to munge the equality for 'Letter'. RFC4343 +-- clarifies the +-- case-insensitivity rules, but the fact that we're treating DNS +-- names as strings makes most of those problems go away (in +-- exchange for new ones). +-- +instance Eq Letter where + (Letter l1) == (Letter l2) = (toLower l1) == (toLower l2) + +-- * Letters/Digits + +-- | A sum type representing either a letter or a digit. +-- +data LetDig = + LetDigLetter Letter | + LetDigDigit Digit + deriving (Eq, Show) + +instance Pretty LetDig where + pretty_show (LetDigLetter l) = pretty_show l + pretty_show (LetDigDigit d) = pretty_show d + +-- | Parse a letter or a digit and wrap it in our 'LetDig' type. +-- +let_dig :: Parser LetDig +let_dig = (fmap LetDigLetter letter) <|> (fmap LetDigDigit digit) + + +-- * Hyphens + +-- | A wrapper around a single hyphen character. +-- +newtype Hyphen = Hyphen Char deriving (Eq, Show) +instance Pretty Hyphen where pretty_show (Hyphen h) = [h] + +-- | Parse a single hyphen and wrap it in our 'Hyphen' type. +-- +hyphen :: Parser Hyphen +hyphen = fmap Hyphen (char '-') + + +-- * Letter, Digit, or Hyphen. + +-- | A sum type representing a letter, digit, or hyphen. +-- +data LetDigHyp = + LetDigHypLetDig LetDig | + LetDigHypHyphen Hyphen + deriving (Eq, Show) + +instance Pretty LetDigHyp where + pretty_show (LetDigHypLetDig ld) = pretty_show ld + pretty_show (LetDigHypHyphen h) = pretty_show h + + +-- | The following is the simplest type in the domain grammar that +-- isn't already implemented for us. +-- +-- ::= | +-- +-- ==== _Examples_ +-- +-- >>> import Text.Parsec ( parseTest ) +-- +-- Letters, digits, and hyphens are all parsed: +-- +-- >>> parseTest let_dig_hyp "a" +-- LetDigHypLetDig (LetDigLetter (Letter 'a')) +-- +-- >>> parseTest let_dig_hyp "7" +-- LetDigHypLetDig (LetDigDigit (Digit '7')) +-- +-- >>> parseTest let_dig_hyp "-" +-- LetDigHypHyphen (Hyphen '-') +-- +-- However, an underscore (for example) is not: +-- +-- >>> parseTest let_dig_hyp "_" +-- parse error at (line 1, column 1): +-- unexpected "_" +-- expecting letter, digit or "-" +-- +let_dig_hyp :: Parser LetDigHyp +let_dig_hyp = + parse_letdig <|> parse_hyphen + where + parse_letdig :: Parser LetDigHyp + parse_letdig = fmap LetDigHypLetDig let_dig + + parse_hyphen :: Parser LetDigHyp + parse_hyphen = fmap LetDigHypHyphen hyphen + + +-- * Letter/Digit/Hyphen strings + +-- | A string of letters, digits, and hyphens from the RFC1035 grammar: +-- +-- ::= | +-- +-- These are represented as either a single instance of a +-- 'LetDigHyp', or a string of them (recursive). +-- +data LdhStr = + LdhStrSingleLdh LetDigHyp | + LdhStrMultipleLdh LetDigHyp LdhStr + deriving (Eq, Show) + +instance Pretty LdhStr where + pretty_show (LdhStrSingleLdh ldh) = pretty_show ldh + pretty_show (LdhStrMultipleLdh ldh s) = (pretty_show ldh) ++ (pretty_show s) + +-- | Parse a string of letters, digits, and hyphens (an 'LdhStr'). +-- +-- ==== _Examples_ +-- +-- >>> import Text.Parsec ( parseTest ) +-- +-- Single letters, digits, and hyphens are parsed: +-- +-- >>> parseTest ldh_str "a" +-- LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a'))) +-- +-- >>> parseTest ldh_str "0" +-- LdhStrSingleLdh (LetDigHypLetDig (LetDigDigit (Digit '0'))) +-- +-- >>> parseTest ldh_str "-" +-- LdhStrSingleLdh (LetDigHypHyphen (Hyphen '-')) +-- +-- As well as strings of them: +-- +-- >>> import Text.Parsec ( parse ) +-- >>> pretty_print $ parse ldh_str "" "a0-b" +-- a0-b +-- +ldh_str :: Parser LdhStr +ldh_str = try both <|> just_one + where + both :: Parser LdhStr + both = do + ldh1 <- let_dig_hyp + ldh_tail <- ldh_str + return $ LdhStrMultipleLdh ldh1 ldh_tail + + just_one :: Parser LdhStr + just_one = fmap LdhStrSingleLdh let_dig_hyp + + + +-- | A version of 'last' that works on a 'LdhStr' rather than a +-- list. That is, it returns the last 'LetDigHyp' in the +-- string. Since 'LdhStr' contains at least one character, there's +-- no \"nil\" case here. +-- +-- ==== _Examples_ +-- +-- >>> import Text.Parsec ( parse ) +-- +-- >>> let (Right r) = parse ldh_str "" "a" +-- >>> last_ldh_str r +-- LetDigHypLetDig (LetDigLetter (Letter 'a')) +-- +-- >>> let (Right r) = parse ldh_str "" "abc-def" +-- >>> last_ldh_str r +-- LetDigHypLetDig (LetDigLetter (Letter 'f')) +-- +last_ldh_str :: LdhStr -> LetDigHyp +last_ldh_str (LdhStrSingleLdh x) = x +last_ldh_str (LdhStrMultipleLdh _ x) = last_ldh_str x + + +-- | A version of 'init' that works on a 'LdhStr' rather than a +-- list. That is, it returns everything /except/ the last character in +-- the string. +-- +-- Since an 'LdhStr' must contain at least one character, this might +-- not be opssible (when the input is of length one). So, we return +-- a 'Maybe' value. +-- +-- ==== _Examples_ +-- +-- >>> import Text.Parsec ( parse ) +-- +-- >>> let (Right r) = parse ldh_str "" "a" +-- >>> init_ldh_str r +-- Nothing +-- +-- >>> let (Right r) = parse ldh_str "" "ab" +-- >>> init_ldh_str r +-- Just (LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a')))) +-- +-- >>> let (Right r) = parse ldh_str "" "abc-def" +-- >>> init_ldh_str r +-- Just (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a'))) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'b'))) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'c'))) (LdhStrMultipleLdh (LetDigHypHyphen (Hyphen '-')) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'd'))) (LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'e'))))))))) +-- +init_ldh_str :: LdhStr -> Maybe LdhStr +init_ldh_str (LdhStrSingleLdh _) = Nothing +init_ldh_str (LdhStrMultipleLdh h t) = + Just $ case (init_ldh_str t) of + -- We just got the second-to-last character, we're done. + Nothing -> LdhStrSingleLdh h + + -- There's still more stuff. Recurse. + Just rest -> LdhStrMultipleLdh h rest + + +-- | Compute the length of an 'LdhStr'. It will be at least one, since +-- 'LdhStr's are non-empty. And if there's something other than the +-- first character present, we simply recurse. +-- +-- ==== _Examples_ +-- +-- >>> import Text.Parsec ( parse ) +-- +-- >>> let (Right r) = parse ldh_str "" "a" +-- >>> length_ldh_str r +-- 1 +-- +-- >>> let (Right r) = parse ldh_str "" "abc-def" +-- >>> length_ldh_str r +-- 7 +-- +length_ldh_str :: LdhStr -> Int +length_ldh_str (LdhStrSingleLdh _) = 1 +length_ldh_str (LdhStrMultipleLdh _ t) = 1 + (length_ldh_str t) + +-- * Letter/Digit/Hyphen string followed by a trailing Letter/Digit + +-- | This type isn't explicitly part of the grammar, but it's what +-- shows up in the square brackets of, +-- +--