Remove underlying Char from Hyphen type.

[dead/harbl.git] / harbl / src / Network / DNS / RBL / Domain.hs
diff --git a/harbl/src/Network/DNS/RBL/Domain.hs b/harbl/src/Network/DNS/RBL/Domain.hs

index 75170a7675bf09ab3c509d41b1ada58b16fbac05..7adc1a99f6d67422deabb4f99441484d26820939 100644 (file)
--- a/harbl/src/Network/DNS/RBL/Domain.hs
+++ b/harbl/src/Network/DNS/RBL/Domain.hs
@@ -15,284 +15,27 @@
  --   (octets).
  --
  module Network.DNS.RBL.Domain (
-  Domain,
+  Domain(..),
    domain )
  where
  
-import Data.Char ( toLower )
  import Text.Parsec (
    (<|>),
    char,
    optionMaybe,
    string,
    try )
-import qualified Text.Parsec as Parsec ( digit, letter)
  import Text.Parsec.String ( Parser )
  
+import Network.DNS.RBL.Domain.LdhStr (
+  LdhStr(..),
+  ldh_str,
+  ldh_str_length )
+import Network.DNS.RBL.Domain.LetDig ( LetDig, let_dig )
+import Network.DNS.RBL.Domain.LetDigHyp ( LetDigHyp(..) )
  import Network.DNS.RBL.Pretty ( Pretty(..) )
+import Network.DNS.RBL.Reversible ( Reversible(..) )
  
--- * Digits
-
--- | A wrapper around a digit character.
---
-newtype Digit = Digit Char deriving (Eq, Show)
-instance Pretty Digit where pretty_show (Digit d) = [d]
-
--- | Parse a single digit, but wrap it in our 'Digit' type.
---
-digit :: Parser Digit
-digit = fmap Digit Parsec.digit
-
-
--- * Letters
-
--- | A wrapper around a letter character.
---
-newtype Letter = Letter Char deriving (Show)
-instance Pretty Letter where pretty_show (Letter l) = [l]
-
-
--- | Parse a single letter, but wrap it in our 'Letter' type.
---
-letter :: Parser Letter
-letter = fmap Letter Parsec.letter
-
--- | The derived instance of 'Eq' for letters is incorrect. All
---   comparisons should be made case-insensitively. The following
---   is an excerpt from RFC1035:
---
---     2.3.3. Character Case
---
---     For all parts of the DNS that are part of the official
---     protocol, all comparisons between character strings (e.g.,
---     labels, domain names, etc.)  are done in a case-insensitive
---     manner...
---
---   Since each part of DNS name is composed of our custom types, it
---   suffices to munge the equality for 'Letter'. RFC4343
---   <https://tools.ietf.org/html/rfc4343> clarifies the
---   case-insensitivity rules, but the fact that we're treating DNS
---   names as strings makes most of those problems go away (in
---   exchange for new ones).
---
-instance Eq Letter where
-  (Letter l1) == (Letter l2) = (toLower l1) == (toLower l2)
-
--- * Letters/Digits
-
--- | A sum type representing either a letter or a digit.
---
-data LetDig =
-  LetDigLetter Letter |
-  LetDigDigit  Digit
-  deriving (Eq, Show)
-
-instance Pretty LetDig where
-  pretty_show (LetDigLetter l) = pretty_show l
-  pretty_show (LetDigDigit d) = pretty_show d
-
--- | Parse a letter or a digit and wrap it in our 'LetDig' type.
---
-let_dig :: Parser LetDig
-let_dig = (fmap LetDigLetter letter) <|> (fmap LetDigDigit digit)
-
-
--- * Hyphens
-
--- | A wrapper around a single hyphen character.
---
-newtype Hyphen = Hyphen Char deriving (Eq, Show)
-instance Pretty Hyphen where pretty_show (Hyphen h) = [h]
-
--- | Parse a single hyphen and wrap it in our 'Hyphen' type.
---
-hyphen :: Parser Hyphen
-hyphen = fmap Hyphen (char '-')
-
-
--- * Letter, Digit, or Hyphen.
-
--- | A sum type representing a letter, digit, or hyphen.
---
-data LetDigHyp =
-  LetDigHypLetDig LetDig |
-  LetDigHypHyphen Hyphen
-  deriving (Eq, Show)
-
-instance Pretty LetDigHyp where
-  pretty_show (LetDigHypLetDig ld) = pretty_show ld
-  pretty_show (LetDigHypHyphen h) = pretty_show h
-
-
--- | The following is the simplest type in the domain grammar that
---   isn't already implemented for us.
---
---     <let-dig> ::= <letter> | <digit>
---
---   ==== _Examples_
---
---   >>> import Text.Parsec ( parseTest )
---
---   Letters, digits, and hyphens are all parsed:
---
---   >>> parseTest let_dig_hyp "a"
---   LetDigHypLetDig (LetDigLetter (Letter 'a'))
---
---   >>> parseTest let_dig_hyp "7"
---   LetDigHypLetDig (LetDigDigit (Digit '7'))
---
---   >>> parseTest let_dig_hyp "-"
---   LetDigHypHyphen (Hyphen '-')
---
---   However, an underscore (for example) is not:
---
---   >>> parseTest let_dig_hyp "_"
---   parse error at (line 1, column 1):
---   unexpected "_"
---   expecting letter, digit or "-"
---
-let_dig_hyp :: Parser LetDigHyp
-let_dig_hyp =
-  parse_letdig <|> parse_hyphen
-  where
-    parse_letdig :: Parser LetDigHyp
-    parse_letdig = fmap LetDigHypLetDig let_dig
-
-    parse_hyphen :: Parser LetDigHyp
-    parse_hyphen = fmap LetDigHypHyphen hyphen
-
-
--- * Letter/Digit/Hyphen strings
-
--- | A string of letters, digits, and hyphens from the RFC1035 grammar:
---
---     <ldh-str> ::= <let-dig-hyp> | <let-dig-hyp> <ldh-str>
---
---   These are represented as either a single instance of a
---   'LetDigHyp', or a string of them (recursive).
---
-data LdhStr =
-  LdhStrSingleLdh LetDigHyp |
-  LdhStrMultipleLdh LetDigHyp LdhStr
-  deriving (Eq, Show)
-
-instance Pretty LdhStr where
-  pretty_show (LdhStrSingleLdh ldh) = pretty_show ldh
-  pretty_show (LdhStrMultipleLdh ldh s) = (pretty_show ldh) ++ (pretty_show s)
-
--- | Parse a string of letters, digits, and hyphens (an 'LdhStr').
---
---   ==== _Examples_
---
---   >>> import Text.Parsec ( parseTest )
---
---   Single letters, digits, and hyphens are parsed:
---
---   >>> parseTest ldh_str "a"
---   LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a')))
---
---   >>> parseTest ldh_str "0"
---   LdhStrSingleLdh (LetDigHypLetDig (LetDigDigit (Digit '0')))
---
---   >>> parseTest ldh_str "-"
---   LdhStrSingleLdh (LetDigHypHyphen (Hyphen '-'))
---
---   As well as strings of them:
---
---   >>> import Text.Parsec ( parse )
---   >>> pretty_print $ parse ldh_str "" "a0-b"
---   a0-b
---
-ldh_str :: Parser LdhStr
-ldh_str = try both <|> just_one
-  where
-    both :: Parser LdhStr
-    both = do
-      ldh1 <- let_dig_hyp
-      ldh_tail <- ldh_str
-      return $ LdhStrMultipleLdh ldh1 ldh_tail
-
-    just_one :: Parser LdhStr
-    just_one = fmap LdhStrSingleLdh let_dig_hyp
-
-
-
--- | A version of 'last' that works on a 'LdhStr' rather than a
---   list. That is, it returns the last 'LetDigHyp' in the
---   string. Since 'LdhStr' contains at least one character, there's
---   no \"nil\" case here.
---
---   ==== _Examples_
---
---   >>> import Text.Parsec ( parse )
---
---   >>> let (Right r) = parse ldh_str "" "a"
---   >>> last_ldh_str r
---   LetDigHypLetDig (LetDigLetter (Letter 'a'))
---
---   >>> let (Right r) = parse ldh_str "" "abc-def"
---   >>> last_ldh_str r
---   LetDigHypLetDig (LetDigLetter (Letter 'f'))
---
-last_ldh_str :: LdhStr -> LetDigHyp
-last_ldh_str (LdhStrSingleLdh x) = x
-last_ldh_str (LdhStrMultipleLdh _ x) = last_ldh_str x
-
-
--- | A version of 'init' that works on a 'LdhStr' rather than a
---   list. That is, it returns everything /except/ the last character in
---   the string.
---
---   Since an 'LdhStr' must contain at least one character, this might
---   not be opssible (when the input is of length one). So, we return
---   a 'Maybe' value.
---
---   ==== _Examples_
---
---   >>> import Text.Parsec ( parse )
---
---   >>> let (Right r) = parse ldh_str "" "a"
---   >>> init_ldh_str r
---   Nothing
---
---   >>> let (Right r) = parse ldh_str "" "ab"
---   >>> init_ldh_str r
---   Just (LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a'))))
---
---   >>> let (Right r) = parse ldh_str "" "abc-def"
---   >>> init_ldh_str r
---   Just (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'a'))) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'b'))) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'c'))) (LdhStrMultipleLdh (LetDigHypHyphen (Hyphen '-')) (LdhStrMultipleLdh (LetDigHypLetDig (LetDigLetter (Letter 'd'))) (LdhStrSingleLdh (LetDigHypLetDig (LetDigLetter (Letter 'e')))))))))
---
-init_ldh_str :: LdhStr -> Maybe LdhStr
-init_ldh_str (LdhStrSingleLdh _) = Nothing
-init_ldh_str (LdhStrMultipleLdh h t) =
-  Just $ case (init_ldh_str t) of
-           -- We just got the second-to-last character, we're done.
-           Nothing   -> LdhStrSingleLdh h
-
-           -- There's still more stuff. Recurse.
-           Just rest -> LdhStrMultipleLdh h rest
-
-
--- | Compute the length of an 'LdhStr'. It will be at least one, since
---   'LdhStr's are non-empty. And if there's something other than the
---   first character present, we simply recurse.
---
---   ==== _Examples_
---
---   >>> import Text.Parsec ( parse )
---
---   >>> let (Right r) = parse ldh_str "" "a"
---   >>> length_ldh_str r
---   1
---
---   >>> let (Right r) = parse ldh_str "" "abc-def"
---   >>> length_ldh_str r
---   7
---
-length_ldh_str :: LdhStr -> Int
-length_ldh_str (LdhStrSingleLdh _) = 1
-length_ldh_str (LdhStrMultipleLdh _ t) = 1 + (length_ldh_str t)
  
  -- * Letter/Digit/Hyphen string followed by a trailing Letter/Digit
  
@@ -313,6 +56,8 @@ instance Pretty LdhStrLetDig where
    pretty_show (LdhStrLetDig Nothing ld) = pretty_show ld
    pretty_show (LdhStrLetDig (Just s) ld) = (pretty_show s) ++ (pretty_show ld)
  
+
+
  -- | Parse an 'LdhStrLetDig'. This isn't in the grammar, but we might
  --   as well define the parser for it independently since we gave it
  --   its own data type.
@@ -344,23 +89,24 @@ ldh_str_let_dig = do
    full_ldh <- ldh_str
  
    -- So we have to go back and see what happened.
-  case (last_ldh_str full_ldh) of
-    (LetDigHypHyphen _) -> fail "label cannot end with a hyphen"
-    (LetDigHypLetDig ld) ->
-      -- Ok, the label didn't end with a hyphen; now we need to split
-      -- off the last letter/digit so we can pack it into our return
-      -- type separately.
-      return $ case (init_ldh_str full_ldh) of
-                 -- We only parsed one letter/digit. This can happen
-                 -- if the label contains two characters. For example,
-                 -- if we try to parse the label "ab", then the "a"
-                 -- will be eaten by the label parser, and this
-                 -- function will be left with only "b".
-                 Nothing -> LdhStrLetDig Nothing ld
-
-                 -- Usual case: there's was some leading let-dig-hyp junk,
-                 -- return it too.
-                 leading_ldhs -> LdhStrLetDig leading_ldhs ld
+  case (backwards full_ldh) of
+
+    -- Fail on a single hyphen.
+    (LdhStrSingleLdh (LetDigHypHyphen _)) ->
+      fail "label cannot end with a hyphen"
+
+    -- Fail for a hyphen followed by other stuff.
+    (LdhStrMultipleLdh (LetDigHypHyphen _) _) ->
+      fail "label cannot end with a hyphen"
+
+    -- Simply return the thing if it's a single non-hyphen.
+    (LdhStrSingleLdh (LetDigHypLetDig ld)) -> return $ LdhStrLetDig Nothing ld
+
+    -- And peel off the last character for a non-hyphen followed by
+    -- other stuff. We wind up reversing things twice, but whatever.
+    (LdhStrMultipleLdh (LetDigHypLetDig ld) init_ldh_rev) ->
+      let init_ldh = backwards init_ldh_rev
+      in return $ LdhStrLetDig (Just init_ldh) ld
  
  
  
@@ -383,7 +129,7 @@ ldh_str_let_dig = do
  length_ldh_str_let_dig :: LdhStrLetDig -> Int
  length_ldh_str_let_dig (LdhStrLetDig Nothing _) = 1
  length_ldh_str_let_dig (LdhStrLetDig (Just ldhstring) _) =
-  1 + (length_ldh_str ldhstring)
+  1 + (ldh_str_length ldhstring)
  
  
  -- * Labels
@@ -496,6 +242,55 @@ instance Pretty Subdomain where
    pretty_show (SubdomainMultipleLabel l s) =
      (pretty_show l) ++ "." ++ (pretty_show s)
  
+
+instance Reversible Subdomain where
+  -- | Reverse the labels of the given subdomain.
+  --
+  --   ==== _Examples_
+  --
+  --   >>> import Text.Parsec ( parse )
+  --
+  --   Standard usage:
+  --
+  --   >>> let (Right r) = parse subdomain "" "com"
+  --   >>> pretty_print $ backwards r
+  --   com
+  --
+  --   >>> let (Right r) = parse subdomain "" "example.com"
+  --   >>> pretty_print $ backwards r
+  --   com.example
+  --
+  --   >>> let (Right r) = parse subdomain "" "www.example.com"
+  --   >>> pretty_print $ backwards r
+  --   com.example.www
+  --
+  --   >>> let (Right r) = parse subdomain "" "new.www.example.com"
+  --   >>> pretty_print $ backwards r
+  --   com.example.www.new
+  --
+
+  -- It's easy to reverse a single label...
+  backwards s@(SubdomainSingleLabel _) = s
+
+  -- For multiple labels we have two cases. The first is where we have
+  -- exactly two labels, and we just need to swap them.
+  backwards (SubdomainMultipleLabel l (SubdomainSingleLabel m)) =
+    SubdomainMultipleLabel m (SubdomainSingleLabel l)
+
+  -- And now the hard case. See the 'LdhStr' implementation for an
+  -- explanation.
+  --
+  backwards (SubdomainMultipleLabel l s) = build (SubdomainSingleLabel l) s
+    where
+      -- Build up the first Subdomain on the left by peeling off the
+      -- leading elements of the second Subdomain.
+      build :: Subdomain -> Subdomain -> Subdomain
+      build dst (SubdomainSingleLabel final) = SubdomainMultipleLabel final dst
+      build dst (SubdomainMultipleLabel leading rest) =
+        build (SubdomainMultipleLabel leading dst) rest
+
+
+
  -- | Parse an RFC1035 \"subdomain\". The given grammar is,
  --
  --     <subdomain> ::= <label> | <subdomain> "." <label>
@@ -627,6 +422,7 @@ subdomain_has_equal_neighbors s =
  
  
  
+
  -- * Domains
  
  -- | An RFC1035 domain. According to RFC1035 a domain can be either a
@@ -717,3 +513,24 @@ domain = try parse_subdomain <|> parse_empty
  
      parse_empty :: Parser Domain
      parse_empty = string "" >> return DomainRoot
+
+
+instance Reversible Domain where
+  -- | Reverse the labels of a 'Domain'.
+  --
+  --   --   ==== _Examples_
+  --
+  --   >>> import Text.Parsec ( parse )
+  --
+  --   The root reverses to itself:
+  --
+  --   >>> let (Right r) = parse domain "" ""
+  --   >>> backwards r
+  --   DomainRoot
+  --
+  --   >>> let (Right r) = parse domain "" "new.www.example.com"
+  --   >>> pretty_print $ backwards r
+  --   com.example.www.new
+  --
+  backwards DomainRoot = DomainRoot
+  backwards (DomainName s) = DomainName $ backwards s