From: Michael Orlitzky Date: Sat, 11 Jul 2015 23:21:02 +0000 (-0400) Subject: Begin moving the name parsers to the Network.DNS.RBL.Domain namespace. X-Git-Url: https://gitweb.michael.orlitzky.com/?a=commitdiff_plain;h=4dd314687c806419fac1fc88c96df6541e1dff4b;p=dead%2Fharbl.git Begin moving the name parsers to the Network.DNS.RBL.Domain namespace. --- diff --git a/harbl.cabal b/harbl.cabal index 8a5191b..1753ed3 100644 --- a/harbl.cabal +++ b/harbl.cabal @@ -29,6 +29,12 @@ library other-modules: Network.DNS.RBL.Domain + Network.DNS.RBL.Domain.Digit + Network.DNS.RBL.Domain.Hyphen + Network.DNS.RBL.Domain.LdhStr + Network.DNS.RBL.Domain.LetDig + Network.DNS.RBL.Domain.LetDigHyp + Network.DNS.RBL.Domain.Letter Network.DNS.RBL.Host Network.DNS.RBL.IPv4Pattern Network.DNS.RBL.Pretty diff --git a/harbl/src/Network/DNS/RBL/Domain.hs b/harbl/src/Network/DNS/RBL/Domain.hs index 4dd5d1d..7adc1a9 100644 --- a/harbl/src/Network/DNS/RBL/Domain.hs +++ b/harbl/src/Network/DNS/RBL/Domain.hs @@ -19,281 +19,23 @@ module Network.DNS.RBL.Domain ( domain ) where -import Data.Char ( toLower ) import Text.Parsec ( (<|>), char, optionMaybe, string, try ) -import qualified Text.Parsec as Parsec ( digit, letter) import Text.Parsec.String ( Parser ) +import Network.DNS.RBL.Domain.LdhStr ( + LdhStr(..), + ldh_str, + ldh_str_length ) +import Network.DNS.RBL.Domain.LetDig ( LetDig, let_dig ) +import Network.DNS.RBL.Domain.LetDigHyp ( LetDigHyp(..) ) import Network.DNS.RBL.Pretty ( Pretty(..) ) import Network.DNS.RBL.Reversible ( Reversible(..) ) --- * Digits - --- | A wrapper around a digit character. --- -newtype Digit = Digit Char deriving (Eq, Show) -instance Pretty Digit where pretty_show (Digit d) = [d] - --- | Parse a single digit, but wrap it in our 'Digit' type. --- -digit :: Parser Digit -digit = fmap Digit Parsec.digit - - --- * Letters - --- | A wrapper around a letter character. --- -newtype Letter = Letter Char deriving (Show) -instance Pretty Letter where pretty_show (Letter l) = [l] - - --- | Parse a single letter, but wrap it in our 'Letter' type. --- -letter :: Parser Letter -letter = fmap Letter Parsec.letter - --- | The derived instance of 'Eq' for letters is incorrect. All --- comparisons should be made case-insensitively. The following --- is an excerpt from RFC1035: --- --- 2.3.3. Character Case --- --- For all parts of the DNS that are part of the official --- protocol, all comparisons between character strings (e.g., --- labels, domain names, etc.) are done in a case-insensitive --- manner... --- --- Since each part of DNS name is composed of our custom types, it --- suffices to munge the equality for 'Letter'. RFC4343 --- clarifies the --- case-insensitivity rules, but the fact that we're treating DNS --- names as strings makes most of those problems go away (in --- exchange for new ones). --- -instance Eq Letter where - (Letter l1) == (Letter l2) = (toLower l1) == (toLower l2) - --- * Letters/Digits - --- | A sum type representing either a letter or a digit. --- -data LetDig = - LetDigLetter Letter | - LetDigDigit Digit - deriving (Eq, Show) - -instance Pretty LetDig where - pretty_show (LetDigLetter l) = pretty_show l - pretty_show (LetDigDigit d) = pretty_show d - --- | Parse a letter or a digit and wrap it in our 'LetDig' type. --- -let_dig :: Parser LetDig -let_dig = (fmap LetDigLetter letter) <|> (fmap LetDigDigit digit) - - --- * Hyphens - --- | A wrapper around a single hyphen character. --- -newtype Hyphen = Hyphen Char deriving (Eq, Show) -instance Pretty Hyphen where pretty_show (Hyphen h) = [h] - --- | Parse a single hyphen and wrap it in our 'Hyphen' type. --- -hyphen :: Parser Hyphen -hyphen = fmap Hyphen (char '-') - - --- * Letter, Digit, or Hyphen. - --- | A sum type representing a letter, digit, or hyphen. --- -data LetDigHyp = - LetDigHypLetDig LetDig | - LetDigHypHyphen Hyphen - deriving (Eq, Show) - -instance Pretty LetDigHyp where - pretty_show (LetDigHypLetDig ld) = pretty_show ld - pretty_show (LetDigHypHyphen h) = pretty_show h - - --- | The following is the simplest type in the domain grammar that --- isn't already implemented for us. --- --- ::= | --- --- ==== _Examples_ --- --- >>> import Text.Parsec ( parseTest ) --- --- Letters, digits, and hyphens are all parsed: --- --- >>> parseTest let_dig_hyp "a" --- LetDigHypLetDig (LetDigLetter (Letter 'a')) --- --- >>> parseTest let_dig_hyp "7" --- LetDigHypLetDig (LetDigDigit (Digit '7')) --- --- >>> parseTest let_dig_hyp "-" --- LetDigHypHyphen (Hyphen '-') --- --- However, an underscore (for example) is not: --- --- >>> parseTest let_dig_hyp "_" --- parse error at (line 1, column 1): --- unexpected "_" --- expecting letter, digit or "-" --- -let_dig_hyp :: Parser LetDigHyp -let_dig_hyp = - parse_letdig <|> parse_hyphen - where - parse_letdig :: Parser LetDigHyp - parse_letdig = fmap LetDigHypLetDig let_dig - - parse_hyphen :: Parser LetDigHyp - parse_hyphen = fmap LetDigHypHyphen hyphen - - --- * Letter/Digit/Hyphen strings - --- | A string of letters, digits, and hyphens from the RFC1035 grammar: --- --- ::= | --- --- These are represented as either a single instance of a --- 'LetDigHyp', or a string of them (recursive). --- -data LdhStr = - LdhStrSingleLdh LetDigHyp | - LdhStrMultipleLdh LetDigHyp LdhStr - deriving (Eq, Show) - -instance Pretty LdhStr where - pretty_show (LdhStrSingleLdh ldh) = pretty_show ldh - pretty_show (LdhStrMultipleLdh ldh s) = (pretty_show ldh) ++ (pretty_show s) - --- | Parse a string of letters, digits, and hyphens (an 'LdhStr'). --- --- ==== _Examples_ --- --- >>> import Text.Parsec ( parseTest ) --- --- Single letters, digits, and hyphens are parsed: --- --- >>> parseTest ldh_str "a" --- LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a'))) --- --- >>> parseTest ldh_str "0" --- LdhStrSingleLdh (LetDigHypLetDig (LetDigDigit (Digit '0'))) --- --- >>> parseTest ldh_str "-" --- LdhStrSingleLdh (LetDigHypHyphen (Hyphen '-')) --- --- As well as strings of them: --- --- >>> import Text.Parsec ( parse ) --- >>> pretty_print $ parse ldh_str "" "a0-b" --- a0-b --- -ldh_str :: Parser LdhStr -ldh_str = try both <|> just_one - where - both :: Parser LdhStr - both = do - ldh1 <- let_dig_hyp - ldh_tail <- ldh_str - return $ LdhStrMultipleLdh ldh1 ldh_tail - - just_one :: Parser LdhStr - just_one = fmap LdhStrSingleLdh let_dig_hyp - - - --- | A version of 'last' that works on a 'LdhStr' rather than a --- list. That is, it returns the last 'LetDigHyp' in the --- string. Since 'LdhStr' contains at least one character, there's --- no \"nil\" case here. --- --- ==== _Examples_ --- --- >>> import Text.Parsec ( parse ) --- --- >>> let (Right r) = parse ldh_str "" "a" --- >>> last_ldh_str r --- LetDigHypLetDig (LetDigLetter (Letter 'a')) --- --- >>> let (Right r) = parse ldh_str "" "abc-def" --- >>> last_ldh_str r --- LetDigHypLetDig (LetDigLetter (Letter 'f')) --- -last_ldh_str :: LdhStr -> LetDigHyp -last_ldh_str (LdhStrSingleLdh x) = x -last_ldh_str (LdhStrMultipleLdh _ x) = last_ldh_str x - - --- | A version of 'init' that works on a 'LdhStr' rather than a --- list. That is, it returns everything /except/ the last character in --- the string. --- --- Since an 'LdhStr' must contain at least one character, this might --- not be opssible (when the input is of length one). So, we return --- a 'Maybe' value. --- --- ==== _Examples_ --- --- >>> import Text.Parsec ( parse ) --- --- >>> let (Right r) = parse ldh_str "" "a" --- >>> init_ldh_str r --- Nothing --- --- >>> let (Right r) = parse ldh_str "" "ab" --- >>> init_ldh_str r --- Just (LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a')))) --- --- >>> let (Right r) = parse ldh_str "" "abc-def" --- >>> init_ldh_str r --- Just (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a'))) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'b'))) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'c'))) (LdhStrMultipleLdh (LetDigHypHyphen (Hyphen '-')) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'd'))) (LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'e'))))))))) --- -init_ldh_str :: LdhStr -> Maybe LdhStr -init_ldh_str (LdhStrSingleLdh _) = Nothing -init_ldh_str (LdhStrMultipleLdh h t) = - Just $ case (init_ldh_str t) of - -- We just got the second-to-last character, we're done. - Nothing -> LdhStrSingleLdh h - - -- There's still more stuff. Recurse. - Just rest -> LdhStrMultipleLdh h rest - - --- | Compute the length of an 'LdhStr'. It will be at least one, since --- 'LdhStr's are non-empty. And if there's something other than the --- first character present, we simply recurse. --- --- ==== _Examples_ --- --- >>> import Text.Parsec ( parse ) --- --- >>> let (Right r) = parse ldh_str "" "a" --- >>> length_ldh_str r --- 1 --- --- >>> let (Right r) = parse ldh_str "" "abc-def" --- >>> length_ldh_str r --- 7 --- -length_ldh_str :: LdhStr -> Int -length_ldh_str (LdhStrSingleLdh _) = 1 -length_ldh_str (LdhStrMultipleLdh _ t) = 1 + (length_ldh_str t) -- * Letter/Digit/Hyphen string followed by a trailing Letter/Digit @@ -314,6 +56,8 @@ instance Pretty LdhStrLetDig where pretty_show (LdhStrLetDig Nothing ld) = pretty_show ld pretty_show (LdhStrLetDig (Just s) ld) = (pretty_show s) ++ (pretty_show ld) + + -- | Parse an 'LdhStrLetDig'. This isn't in the grammar, but we might -- as well define the parser for it independently since we gave it -- its own data type. @@ -345,23 +89,24 @@ ldh_str_let_dig = do full_ldh <- ldh_str -- So we have to go back and see what happened. - case (last_ldh_str full_ldh) of - (LetDigHypHyphen _) -> fail "label cannot end with a hyphen" - (LetDigHypLetDig ld) -> - -- Ok, the label didn't end with a hyphen; now we need to split - -- off the last letter/digit so we can pack it into our return - -- type separately. - return $ case (init_ldh_str full_ldh) of - -- We only parsed one letter/digit. This can happen - -- if the label contains two characters. For example, - -- if we try to parse the label "ab", then the "a" - -- will be eaten by the label parser, and this - -- function will be left with only "b". - Nothing -> LdhStrLetDig Nothing ld - - -- Usual case: there's was some leading let-dig-hyp junk, - -- return it too. - leading_ldhs -> LdhStrLetDig leading_ldhs ld + case (backwards full_ldh) of + + -- Fail on a single hyphen. + (LdhStrSingleLdh (LetDigHypHyphen _)) -> + fail "label cannot end with a hyphen" + + -- Fail for a hyphen followed by other stuff. + (LdhStrMultipleLdh (LetDigHypHyphen _) _) -> + fail "label cannot end with a hyphen" + + -- Simply return the thing if it's a single non-hyphen. + (LdhStrSingleLdh (LetDigHypLetDig ld)) -> return $ LdhStrLetDig Nothing ld + + -- And peel off the last character for a non-hyphen followed by + -- other stuff. We wind up reversing things twice, but whatever. + (LdhStrMultipleLdh (LetDigHypLetDig ld) init_ldh_rev) -> + let init_ldh = backwards init_ldh_rev + in return $ LdhStrLetDig (Just init_ldh) ld @@ -384,7 +129,7 @@ ldh_str_let_dig = do length_ldh_str_let_dig :: LdhStrLetDig -> Int length_ldh_str_let_dig (LdhStrLetDig Nothing _) = 1 length_ldh_str_let_dig (LdhStrLetDig (Just ldhstring) _) = - 1 + (length_ldh_str ldhstring) + 1 + (ldh_str_length ldhstring) -- * Labels @@ -532,30 +277,17 @@ instance Reversible Subdomain where backwards (SubdomainMultipleLabel l (SubdomainSingleLabel m)) = SubdomainMultipleLabel m (SubdomainSingleLabel l) - -- And now the hard case. If we reversed @s@, then the "head" of - -- the result (@last_s@) should be the last label in the entire - -- subdomain. Stick @last_s@ on the front of the result. That makes - -- enough sense. - -- - -- But what to do about the rest? We need to get \"init s\" - -- somehow. Well, we have the reverse of it... why not waste a bunch - -- of time and reverse that, too? With @init s@ in hand, we can - -- prepend @l@ to that, and THEN reverse the entire thing. What we'll - -- wind up with looks like @[last_s, init_s_rev, l]@ which you can - -- pretend you recognize as the subdomain in reverse. + -- And now the hard case. See the 'LdhStr' implementation for an + -- explanation. -- - backwards (SubdomainMultipleLabel l s) = - case (backwards s) of - SubdomainMultipleLabel last_s init_s_rev -> - let init_s = backwards init_s_rev - in - SubdomainMultipleLabel - last_s - (backwards (SubdomainMultipleLabel l init_s)) - - -- Reversing a multiple label thing gives you back a multiple - -- label thing but there's no way to promise that. - impossible -> impossible + backwards (SubdomainMultipleLabel l s) = build (SubdomainSingleLabel l) s + where + -- Build up the first Subdomain on the left by peeling off the + -- leading elements of the second Subdomain. + build :: Subdomain -> Subdomain -> Subdomain + build dst (SubdomainSingleLabel final) = SubdomainMultipleLabel final dst + build dst (SubdomainMultipleLabel leading rest) = + build (SubdomainMultipleLabel leading dst) rest diff --git a/harbl/src/Network/DNS/RBL/Domain/Digit.hs b/harbl/src/Network/DNS/RBL/Domain/Digit.hs new file mode 100644 index 0000000..fd7ce06 --- /dev/null +++ b/harbl/src/Network/DNS/RBL/Domain/Digit.hs @@ -0,0 +1,71 @@ +-- | The simplest module you'll ever see. It contains the 'Digit' type +-- and a Parsec parser to parse one. We don't export its constructor +-- because then you could do something dumb like stick a letter +-- inside one. +-- +-- These are defined in RFC1035, Section 2.3.1, \"Preferred name +-- syntax\" : +-- +-- ::= any one of the ten digits 0 through 9 +-- +module Network.DNS.RBL.Domain.Digit ( + Digit, + digit ) +where + +import qualified Text.Parsec as Parsec ( digit ) +import Text.Parsec.String ( Parser ) + +import Network.DNS.RBL.Pretty ( Pretty(..) ) + + +-- | A wrapper around a single digit character. +-- +-- ==== _Examples_ +-- +-- >>> Digit '1' +-- Digit '1' +-- +-- >>> let d1 = Digit '2' +-- >>> let d2 = Digit '2' +-- >>> let d3 = Digit '3' +-- >>> d1 == d2 +-- True +-- >>> d1 == d3 +-- False +-- +newtype Digit = Digit Char deriving (Eq, Show) + + +-- | Pretty-printing for digits that we've already parsed. Just +-- shows/prints the digit character. +-- +-- ==== _Examples_ +-- +-- >>> let d = Digit '1' +-- >>> pretty_print d +-- 1 +-- +instance Pretty Digit where pretty_show (Digit d) = [d] + + +-- | Parse a single digit, but wrap it in our 'Digit' type. +-- +-- ==== _Examples_ +-- +-- >>> import Text.Parsec ( parseTest ) +-- +-- Digits are parsed correctly: +-- +-- >>> parseTest digit "3" +-- Digit '3' +-- +-- But letters are not: +-- +-- >>> parseTest digit "x" +-- parse error at (line 1, column 1): +-- unexpected "x" +-- expecting digit +-- +digit :: Parser Digit +digit = fmap Digit Parsec.digit diff --git a/harbl/src/Network/DNS/RBL/Domain/Hyphen.hs b/harbl/src/Network/DNS/RBL/Domain/Hyphen.hs new file mode 100644 index 0000000..65793ff --- /dev/null +++ b/harbl/src/Network/DNS/RBL/Domain/Hyphen.hs @@ -0,0 +1,117 @@ +-- | OK, I lied about "Network.DNS.RBL.Domain.Digit" and +-- "Network.DNS.RBL.Domain.Letter" being the simplest modules you'd +-- ever see. Because this is. It contains the 'Hyphen' type and a +-- Parsec parser to parse one. We don't export its constructor because +-- then you could do something dumb like stick a letter inside one. +-- +module Network.DNS.RBL.Domain.Hyphen ( + Hyphen, + hyphen ) +where + +import Text.Parsec ( char ) +import Text.Parsec.String ( Parser ) + +import Network.DNS.RBL.Pretty ( Pretty(..) ) + + + +-- | A wrapper around a single hyphen character. +-- +-- ==== _Examples_ +-- +-- >>> Hyphen '-' +-- Hyphen '-' +-- +-- >>> let h1 = Hyphen '-' +-- >>> let h2 = Hyphen '-' +-- >>> h1 == h2 +-- True +-- +newtype Hyphen = Hyphen Char + + +-- | Equality is defined semantically (all hyphens are equal). +-- +-- ==== _Examples_ +-- +-- >>> let h1 = Hyphen '-' +-- >>> let h2 = Hyphen '-' +-- >>> h1 == h2 +-- True +-- +-- If you do something stupid, that's your fault: +-- +-- >>> let h1 = Hyphen '-' +-- >>> let h2 = Hyphen 'x' +-- >>> h1 == h2 +-- True +-- +instance Eq Hyphen where _ == _ = True + + +-- | 'Show' is defined semantically; all hyphens display as \'-\'. +-- The implementation is based on what GHC derives, discovered via +-- @ghci -ddump-deriv@. +-- +-- ==== _Examples_ +-- +-- >>> let h = Hyphen '-' +-- >>> h +-- Hyphen '-' +-- +-- If you do something stupid, that's your fault: +-- +-- >>> let h = Hyphen 'x' +-- >>> h +-- Hyphen '-' +-- +instance Show Hyphen where + showsPrec d _ = + showParen (d > application_precedence) (showString "Hyphen '-'") + where + application_precedence = 10 + + +-- | 'Pretty' is defined semantically; all hyphens display as \'-\'. +-- +-- ==== _Examples_ +-- +-- >>> let h = Hyphen '-' +-- >>> pretty_print h +-- - +-- +-- If you do something stupid, that's your fault: +-- +-- >>> let h = Hyphen 'x' +-- >>> pretty_print h +-- - +-- +instance Pretty Hyphen where pretty_show _ = "-" + + +-- | Parse a single hyphen and wrap it in our 'Hyphen' type. +-- +-- ==== _Examples_ +-- +-- >>> import Text.Parsec ( parseTest ) +-- +-- Hyphens are parsed: +-- +-- >>> parseTest hyphen "-" +-- Hyphen '-' +-- +-- But not letters or digits: +-- +-- >>> parseTest hyphen "1" +-- parse error at (line 1, column 1): +-- unexpected "1" +-- expecting "-" +-- +-- >>> parseTest hyphen "x" +-- parse error at (line 1, column 1): +-- unexpected "x" +-- expecting "-" +-- +hyphen :: Parser Hyphen +hyphen = fmap Hyphen (char '-') diff --git a/harbl/src/Network/DNS/RBL/Domain/LdhStr.hs b/harbl/src/Network/DNS/RBL/Domain/LdhStr.hs new file mode 100644 index 0000000..28deb1f --- /dev/null +++ b/harbl/src/Network/DNS/RBL/Domain/LdhStr.hs @@ -0,0 +1,212 @@ +-- | The 'LdhStr' type and a Parsec parser to parse one. We don't +-- export its constructor because then you could do something dumb +-- like stick a semicolon inside one. +-- +-- These are defined in RFC1035, Section 2.3.1, \"Preferred name +-- syntax\" : +-- +-- ::= | +-- +-- We export our constructors so that we can pattern match to find +-- out whether or not we have a hyphen at the end of a label. +-- +module Network.DNS.RBL.Domain.LdhStr ( + LdhStr(..), + ldh_str, + ldh_str_length ) +where + +import Text.Parsec ( (<|>), try ) +import Text.Parsec.String ( Parser ) + +import Network.DNS.RBL.Domain.LetDigHyp ( LetDigHyp, let_dig_hyp ) +import Network.DNS.RBL.Pretty ( Pretty(..) ) +import Network.DNS.RBL.Reversible ( Reversible(..) ) + + +-- | A string of letters, digits, and hyphens from the RFC1035 grammar: +-- +-- ::= | +-- +-- These are represented as either a single instance of a +-- 'LetDigHyp', or a string of them (recursive). +-- +-- ==== _Examples_ +-- +-- >>> import Text.Parsec ( parse ) +-- +-- We can create an 'LdhStrSingleLdh' from a single (let-dig-hyp) +-- character: +-- +-- >>> let (Right r) = parse let_dig_hyp "" "x" +-- >>> LdhStrSingleLdh r +-- LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'x'))) +-- +-- >>> let (Right r) = parse let_dig_hyp "" "1" +-- >>> LdhStrSingleLdh r +-- LdhStrSingleLdh (LetDigHypLetDig (LetDigDigit (Digit '1'))) +-- +-- >>> let (Right r) = parse let_dig_hyp "" "-" +-- >>> LdhStrSingleLdh r +-- LdhStrSingleLdh (LetDigHypHyphen (Hyphen '-')) +-- +-- We can create an 'LdhStrMultipleLdh' from multiple (let-dig-hyp) +-- characters: +-- +-- >>> let (Right r) = parse let_dig_hyp "" "x" +-- >>> let (Right r2) = parse let_dig_hyp "" "-" +-- >>> let (Right r3) = parse let_dig_hyp "" "1" +-- >>> let rs = LdhStrMultipleLdh r2 (LdhStrSingleLdh r3) +-- >>> pretty_print $ LdhStrMultipleLdh r rs +-- x-1 +-- +data LdhStr = + LdhStrSingleLdh LetDigHyp | + LdhStrMultipleLdh LetDigHyp LdhStr + deriving (Eq, Show) + + +-- | Pretty-printing for strings of letters, digits, and hyphens that +-- we've already parsed. Just shows/prints the underlying characters +-- (structural) recursively. +-- +-- ==== _Examples_ +-- +-- >>> import Text.Parsec ( parse ) +-- +-- >>> let (Right r) = parse let_dig_hyp "" "x" +-- >>> pretty_print $ LdhStrSingleLdh r +-- x +-- +-- >>> let (Right r) = parse let_dig_hyp "" "1" +-- >>> pretty_print $ LdhStrSingleLdh r +-- 1 +-- +-- >>> let (Right r) = parse let_dig_hyp "" "-" +-- >>> pretty_print $ LdhStrSingleLdh r +-- - +-- +-- >>> let (Right r) = parse ldh_str "" "123" +-- >>> pretty_print $ r +-- 123 +-- +instance Pretty LdhStr where + pretty_show (LdhStrSingleLdh ldh) = pretty_show ldh + pretty_show (LdhStrMultipleLdh ldh s) = (pretty_show ldh) ++ (pretty_show s) + + +instance Reversible LdhStr where + -- | Reverse the characters of the given 'LdhStr'. We are bordering + -- on redundancy here, since the implementation of this is exactly + -- the same as for 'Subdomain'. However, if we were to generalize + -- 'LdhStr' to e.g. @Str LetDigHyp@ and 'Subdomain' to @Str + -- Label@, then we would get some semantic weirdness, like the + -- fact that the length of a subdomain includes the \".\" + -- characters. So pretty much only 'backwards' would be + -- generalizable. + -- + -- ==== _Examples_ + -- + -- >>> import Text.Parsec ( parse ) + -- + -- Standard usage: + -- + -- >>> let (Right r) = parse ldh_str "" "x" + -- >>> pretty_print $ backwards r + -- x + -- + -- >>> let (Right r) = parse ldh_str "" "com" + -- >>> pretty_print $ backwards r + -- moc + -- + -- >>> let (Right r) = parse ldh_str "" "example-com" + -- >>> pretty_print $ backwards r + -- moc-elpmaxe + -- + -- >>> let (Right r) = parse ldh_str "" "www-example-com" + -- >>> pretty_print $ backwards r + -- moc-elpmaxe-www + -- + + -- The easy case, reversing a one-character string. + backwards s@(LdhStrSingleLdh _) = s + + -- For multiple-character strings, we have two cases. The first is + -- where we have exactly two characters, and we just need to swap them. + backwards (LdhStrMultipleLdh l (LdhStrSingleLdh m)) = + LdhStrMultipleLdh m (LdhStrSingleLdh l) + + -- And now the hard case. We do this in terms of another function, + -- 'build'. The 'build' function works on two strings at a time: the + -- first one, @dst@, is the one we're building. We start with @l@ as + -- our @dst@, and then append characters to it on the left from + -- another string. What's that other string? Just @s@! If we peel + -- things off the left of @s@ and stick them to the left of @l@ and + -- do that until we can't anymore, we will have reversed the string. + backwards (LdhStrMultipleLdh l s) = build (LdhStrSingleLdh l) s + where + -- Build up the first LdhStr on the left by peeling off elements + -- of the second from the left. + build :: LdhStr -> LdhStr -> LdhStr + build dst (LdhStrSingleLdh final) = LdhStrMultipleLdh final dst + build dst (LdhStrMultipleLdh leading rest) = + build (LdhStrMultipleLdh leading dst) rest + + +-- | Parse a string of letters, digits, and hyphens (an 'LdhStr'). +-- +-- ==== _Examples_ +-- +-- >>> import Text.Parsec ( parseTest ) +-- +-- Single letters, digits, and hyphens are parsed: +-- +-- >>> parseTest ldh_str "a" +-- LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a'))) +-- +-- >>> parseTest ldh_str "0" +-- LdhStrSingleLdh (LetDigHypLetDig (LetDigDigit (Digit '0'))) +-- +-- >>> parseTest ldh_str "-" +-- LdhStrSingleLdh (LetDigHypHyphen (Hyphen '-')) +-- +-- As well as strings of them: +-- +-- >>> import Text.Parsec ( parse ) +-- >>> pretty_print $ parse ldh_str "" "a0-b" +-- a0-b +-- +ldh_str :: Parser LdhStr +ldh_str = try both <|> just_one + where + both :: Parser LdhStr + both = do + ldh1 <- let_dig_hyp + ldh_tail <- ldh_str + return $ LdhStrMultipleLdh ldh1 ldh_tail + + just_one :: Parser LdhStr + just_one = fmap LdhStrSingleLdh let_dig_hyp + + + + +-- | Compute the length of an 'LdhStr'. It will be at least one, since +-- 'LdhStr's are non-empty. And if there's something other than the +-- first character present, we simply recurse. +-- +-- ==== _Examples_ +-- +-- >>> import Text.Parsec ( parse ) +-- +-- >>> let (Right r) = parse ldh_str "" "a" +-- >>> ldh_str_length r +-- 1 +-- +-- >>> let (Right r) = parse ldh_str "" "abc-def" +-- >>> ldh_str_length r +-- 7 +-- +ldh_str_length :: LdhStr -> Int +ldh_str_length (LdhStrSingleLdh _) = 1 +ldh_str_length (LdhStrMultipleLdh _ t) = 1 + (ldh_str_length t) diff --git a/harbl/src/Network/DNS/RBL/Domain/LetDig.hs b/harbl/src/Network/DNS/RBL/Domain/LetDig.hs new file mode 100644 index 0000000..6ffcde5 --- /dev/null +++ b/harbl/src/Network/DNS/RBL/Domain/LetDig.hs @@ -0,0 +1,95 @@ +-- | The second-simplest module you'll ever see. It contains the +-- 'LetDig' type and a Parsec parser to parse one. We don't export +-- its constructor because then you could do something dumb like +-- stick a hyphen inside one. +-- +-- These are defined in RFC1035, Section 2.3.1, \"Preferred name +-- syntax\" : +-- +-- ::= | +-- +module Network.DNS.RBL.Domain.LetDig ( + LetDig, + let_dig ) +where + +import Text.Parsec ( (<|>) ) +import Text.Parsec.String ( Parser ) + +import Network.DNS.RBL.Domain.Digit ( Digit, digit ) +import Network.DNS.RBL.Domain.Letter ( Letter, letter ) +import Network.DNS.RBL.Pretty ( Pretty(..) ) + + +-- | A sum type representing either a letter or a digit. +-- +-- ==== _Examples_ +-- +-- >>> import Text.Parsec ( parse ) +-- +-- >>> let (Right r) = parse letter "" "x" +-- >>> LetDigLetter r +-- LetDigLetter (Letter 'x') +-- +-- >>> let (Right r) = parse digit "" "1" +-- >>> LetDigDigit r +-- LetDigDigit (Digit '1') +-- +-- Case-insensitive equality is derived from that of 'Letter': +-- +-- >>> let (Right r1) = parse letter "" "x" +-- >>> let (Right r2) = parse letter "" "X" +-- >>> LetDigLetter r1 == LetDigLetter r2 +-- True +-- +data LetDig = + LetDigLetter Letter | + LetDigDigit Digit + deriving (Eq, Show) + + +-- | Pretty-printing for letters that we've already parsed. Just +-- shows/prints the letter or digit character. +-- +-- ==== _Examples_ +-- +-- >>> import Text.Parsec ( parse ) +-- +-- >>> let (Right r) = parse letter "" "x" +-- >>> pretty_print $ LetDigLetter r +-- x +-- +-- >>> let (Right r) = parse digit "" "1" +-- >>> pretty_print $ LetDigDigit r +-- 1 +-- +instance Pretty LetDig where + pretty_show (LetDigLetter l) = pretty_show l + pretty_show (LetDigDigit d) = pretty_show d + + +-- | Parse a letter or a digit and wrap it in our 'LetDig' type. +-- +-- ==== _Examples_ +-- +-- >>> import Text.Parsec ( parseTest ) +-- +-- Letters are parsed correctly: +-- +-- >>> parseTest let_dig "x" +-- LetDigLetter (Letter 'x') +-- +-- Digits are too: +-- +-- >>> parseTest let_dig "1" +-- LetDigDigit (Digit '1') +-- +-- But not, for example, hyphens: +-- +-- >>> parseTest let_dig "-" +-- parse error at (line 1, column 1): +-- unexpected "-" +-- expecting letter or digit +-- +let_dig :: Parser LetDig +let_dig = (fmap LetDigLetter letter) <|> (fmap LetDigDigit digit) diff --git a/harbl/src/Network/DNS/RBL/Domain/LetDigHyp.hs b/harbl/src/Network/DNS/RBL/Domain/LetDigHyp.hs new file mode 100644 index 0000000..06e420a --- /dev/null +++ b/harbl/src/Network/DNS/RBL/Domain/LetDigHyp.hs @@ -0,0 +1,110 @@ +-- | This module contains the 'LetDigHyp' type and a Parsec parser to +-- parse one. We don't export its constructor because then you could +-- do something dumb like stick a semicolon inside one. +-- +-- These are defined in RFC1035, Section 2.3.1, \"Preferred name +-- syntax\" : +-- +-- ::= | "-" +-- +-- We export the constructors of 'LetDigHyp' so that we can pattern +-- match against them when checking to see if a label ends with a +-- hyphen. +-- +module Network.DNS.RBL.Domain.LetDigHyp ( + LetDigHyp(..), + let_dig_hyp ) +where + +import Text.Parsec ( (<|>) ) +import Text.Parsec.String ( Parser ) + +import Network.DNS.RBL.Domain.Hyphen ( Hyphen, hyphen ) +import Network.DNS.RBL.Domain.LetDig ( LetDig, let_dig ) +import Network.DNS.RBL.Pretty ( Pretty(..) ) + + +-- | A sum type representing a letter, digit, or hyphen. +-- +-- ==== _Examples_ +-- +-- >>> import Text.Parsec ( parse ) +-- +-- We can create a 'LetDigHyp' from any appropriate value: +-- +-- >>> let (Right r) = parse let_dig "" "1" +-- >>> LetDigHypLetDig r +-- LetDigHypLetDig (LetDigDigit (Digit '1')) +-- +-- >>> let (Right r) = parse let_dig "" "x" +-- >>> LetDigHypLetDig r +-- LetDigHypLetDig (LetDigLetter (Letter 'x')) +-- +-- >>> let (Right r) = parse hyphen "" "-" +-- >>> LetDigHypHyphen r +-- LetDigHypHyphen (Hyphen '-') +-- +data LetDigHyp = + LetDigHypLetDig LetDig | + LetDigHypHyphen Hyphen + deriving (Eq, Show) + + +-- | Pretty-printing for letters, digits, or hyphens that we've +-- already parsed. Just shows/prints the underlying character. +-- +-- ==== _Examples_ +-- +-- >>> import Text.Parsec ( parse ) +-- +-- >>> let (Right r) = parse let_dig "" "1" +-- >>> pretty_print $ LetDigHypLetDig r +-- 1 +-- +-- >>> let (Right r) = parse let_dig "" "x" +-- >>> pretty_print $ LetDigHypLetDig r +-- x +-- +-- >>> let (Right r) = parse hyphen "" "-" +-- >>> pretty_print $ LetDigHypHyphen r +-- - +-- +instance Pretty LetDigHyp where + pretty_show (LetDigHypLetDig ld) = pretty_show ld + pretty_show (LetDigHypHyphen h) = pretty_show h + + +-- | A parser that will parse either a 'LetDig', or a 'Hyphen'. The +-- result is packed in a 'LetDigHyp'. +-- +-- ==== _Examples_ +-- +-- >>> import Text.Parsec ( parseTest ) +-- +-- Letters, digits, and hyphens are all parsed: +-- +-- >>> parseTest let_dig_hyp "a" +-- LetDigHypLetDig (LetDigLetter (Letter 'a')) +-- +-- >>> parseTest let_dig_hyp "7" +-- LetDigHypLetDig (LetDigDigit (Digit '7')) +-- +-- >>> parseTest let_dig_hyp "-" +-- LetDigHypHyphen (Hyphen '-') +-- +-- However, an underscore (for example) is not: +-- +-- >>> parseTest let_dig_hyp "_" +-- parse error at (line 1, column 1): +-- unexpected "_" +-- expecting letter, digit or "-" +-- +let_dig_hyp :: Parser LetDigHyp +let_dig_hyp = + parse_letdig <|> parse_hyphen + where + parse_letdig :: Parser LetDigHyp + parse_letdig = fmap LetDigHypLetDig let_dig + + parse_hyphen :: Parser LetDigHyp + parse_hyphen = fmap LetDigHypHyphen hyphen diff --git a/harbl/src/Network/DNS/RBL/Domain/Letter.hs b/harbl/src/Network/DNS/RBL/Domain/Letter.hs new file mode 100644 index 0000000..bd26786 --- /dev/null +++ b/harbl/src/Network/DNS/RBL/Domain/Letter.hs @@ -0,0 +1,104 @@ +-- | The... also... the simplest module you'll ever see. It contains +-- the 'Letter' type and a Parsec parser to parse one. We don't +-- export its constructor because then you could do something dumb +-- like stick a digit inside one. +-- +-- These are defined in RFC1035, Section 2.3.1, \"Preferred name +-- syntax\" : +-- +-- ::= any one of the 52 alphabetic characters A through +-- Z in upper case and a through z in lower case +-- +module Network.DNS.RBL.Domain.Letter ( + Letter, + letter ) +where + +import Data.Char ( toLower ) +import qualified Text.Parsec as Parsec ( letter ) +import Text.Parsec.String ( Parser ) + +import Network.DNS.RBL.Pretty ( Pretty(..) ) + +-- * Letters + +-- | A wrapper around a letter character. +-- +-- ==== _Examples_ +-- +-- >>> Letter 'x' +-- Letter 'x' +-- +newtype Letter = Letter Char deriving (Show) + + +-- | Pretty-printing for letters that we've already parsed. Just +-- shows/prints the letter character. +-- +-- ==== _Examples_ +-- +-- >>> let l = Letter 'x' +-- >>> pretty_print l +-- x +-- +instance Pretty Letter where pretty_show (Letter l) = [l] + + +-- | Parse a single letter, but wrap it in our 'Letter' type. +-- +-- ==== _Examples_ +-- +-- >>> import Text.Parsec ( parseTest ) +-- +-- Letters are parsed correctly: +-- +-- >>> parseTest letter "x" +-- Letter 'x' +-- +-- But digits are not: +-- +-- >>> parseTest letter "1" +-- parse error at (line 1, column 1): +-- unexpected "1" +-- expecting letter +-- +letter :: Parser Letter +letter = fmap Letter Parsec.letter + + + +-- | The derived instance of 'Eq' for letters is incorrect. All +-- comparisons should be made case-insensitively. The following +-- is an excerpt from RFC1035: +-- +-- 2.3.3. Character Case +-- +-- For all parts of the DNS that are part of the official +-- protocol, all comparisons between character strings (e.g., +-- labels, domain names, etc.) are done in a case-insensitive +-- manner... +-- +-- Since each part of DNS name is composed of our custom types, it +-- suffices to munge the equality for 'Letter'. RFC4343 +-- clarifies the +-- case-insensitivity rules, but the fact that we're treating DNS +-- names as strings makes most of those problems go away (in +-- exchange for new ones). +-- +-- ==== _Examples_ +-- +-- >>> let l1 = Letter 'x' +-- >>> let l2 = Letter 'x' +-- >>> let l3 = Letter 'X' +-- >>> let l4 = Letter 'X' +-- >>> l1 == l2 +-- True +-- >>> l1 == l3 +-- True +-- >>> l1 == l4 +-- True +-- >>> l3 == l4 +-- True +-- +instance Eq Letter where + (Letter l1) == (Letter l2) = (toLower l1) == (toLower l2)