src/Network/DNS/RBL/Site.hs

   1 -- | This module contains the 'Site' data type representing one
   2 --   blacklist with its associated return codes and weight. For
   3 --   example, in Postfix's main.cf you might have,
   4 --
   5 --     postscreen_dnsbl_sites = bl.mailspike.net=127.0.0.[2;10;11]*2, ...
   6 --
   7 --   Here, the blacklist (a 'UserDomain') is \"bl.mailspike.net\", the
   8 --   return code pattern is \"127.0.0.[2;10;11]\", and the weight is
   9 --   \"2".
  10 --
  11 module Network.DNS.RBL.Site (
  12   Site(..),
  13   Weight(..),
  14   site_tests,
  15   sites )
  16 where
  17
  18 import Data.List ( intercalate )
  19 import Test.Tasty ( TestTree, testGroup )
  20 import Test.Tasty.HUnit ( (@?=), testCase )
  21 import Text.Parsec (
  22   (<|>),
  23   char,
  24   choice,
  25   digit,
  26   many1,
  27   option,
  28   optionMaybe,
  29   parse,
  30   sepBy1,
  31   space,
  32   try,
  33   unexpected )
  34 import Text.Parsec.String ( Parser )
  35 import Text.Read ( readMaybe )
  36
  37 import Network.DNS.RBL.Domain ( UserDomain, user_domain )
  38 import Network.DNS.RBL.IPv4Pattern ( IPv4Pattern, v4pattern )
  39 import Network.DNS.RBL.Pretty ( Pretty(..) )
  40
  41
  42 newtype Weight = Weight Int deriving (Eq, Show)
  43
  44 instance Pretty Weight where
  45   pretty_show (Weight w) = show w
  46
  47
  48 -- | Parse the weight multiplier at the end of a site.
  49 --
  50 --   ==== _Examples_
  51 --
  52 --   >>> import Text.Parsec ( parseTest )
  53 --
  54 --   Negative, zero, and positive integers are all supported:
  55 --
  56 --   >>> parseTest weight "*-5"
  57 --   Weight (-5)
  58 --
  59 --   >>> parseTest weight "*0"
  60 --   Weight 0
  61 --
  62 --   >>> parseTest weight "*17"
  63 --   Weight 17
  64 --
  65 --   If the weight is empty, it defaults to @1@:
  66 --
  67 --   >>> parseTest weight ""
  68 --   Weight 1
  69 --
  70 --   The default is used whenever parsing fails:
  71 --
  72 --   >>> parseTest weight "*hello"
  73 --   Weight 1
  74 --
  75 --   The 'Pretty' instance works as intended:
  76 --
  77 --   >>> import Text.Parsec ( parse )
  78 --   >>> pretty_print $ parse weight "" "*3"
  79 --   3
  80 --
  81 weight :: Parser Weight
  82 weight = try parse_weight <|> return (Weight 1)
  83   where
  84     parse_weight = do
  85       _ <- char '*'
  86       sign <- (char '-') <|> (option '+' (char '+'))
  87       w <- many1 digit
  88       case ( readMaybe w :: Maybe Int ) of
  89         -- If "many1 digit" gives us a list of digits, we should be able
  90         -- to convert that to an Int! It will overflow rather than fail
  91         -- if the input is too big/small, so it should really always
  92         -- succeed.
  93         Nothing -> unexpected "weight: readMaybe failed on a sequence of digits!"
  94         Just k  -> return $ Weight (if sign == '-' then negate k else k)
  95
  96
  97
  98 -- | A DNSBL as it would be input into postfix. It has a blacklist
  99 --   (DNS) name, a pattern of addresses to use for a \"hit\", and a
 100 --   weight multiplier.
 101 --
 102 data Site = Site UserDomain (Maybe IPv4Pattern) Weight
 103
 104
 105 -- | Pretty print DNSBL sites. This is straightforward except for the
 106 --   weight. We default to a weight of @1@, but this leaves us with a
 107 --   choice. If the user leaves off the weight, do we want to
 108 --   pretty-print it as @1@? How about if we explicitly writes the
 109 --   \"*1\" multiplier?
 110 --
 111 --   The pretty-printing isn't user-facing, really, so it makes sense
 112 --   to just choose one of these behaviors rather than pass around a
 113 --   @Maybe Weight@. We always print the multiplier, even when it's @1@.
 114 --
 115 instance Pretty Site where
 116   pretty_show (Site d p w) =
 117     (pretty_show d) ++ pattern_string ++ "*" ++ (pretty_show w)
 118     where
 119       pattern_string = case p of
 120                          Nothing -> ""
 121                          Just pat -> "=" ++ pretty_show pat
 122
 123
 124 -- | Parse a single 'Site'.
 125 --
 126 --   ==== _Examples_
 127 --
 128 --   >>> import Text.Parsec ( parse )
 129 --
 130 --   >>> let spamhaus = "zen.spamhaus.org*3"
 131 --   >>> pretty_print $ parse site "" spamhaus
 132 --   zen.spamhaus.org*3
 133 --
 134 --   >>> let mailspike = "bl.mailspike.net=127.0.0.[2;10;11]*2"
 135 --   >>> pretty_print $ parse site "" mailspike
 136 --   bl.mailspike.net=127.0.0.[2;10;11]*2
 137 --
 138 --   If the weight is left unspecified, it defaults to \"1\" which is
 139 --   then printed:
 140 --
 141 --   >>> let hostkarma = "hostkarma.junkemailfilter.com=127.0.0.2"
 142 --   >>> pretty_print $ parse site "" hostkarma
 143 --   hostkarma.junkemailfilter.com=127.0.0.2*1
 144 --
 145 --   >>> let ubl = "ubl.unsubscore.com"
 146 --   >>> pretty_print $ parse site "" ubl
 147 --   ubl.unsubscore.com*1
 148 --
 149 site :: Parser Site
 150 site = do
 151   d <- user_domain
 152   return_codes <- optionMaybe $ char '=' >> v4pattern
 153   w <- weight
 154   return $ Site d return_codes w
 155
 156
 157 -- | Parse more than one 'Site', separated by commas and/or
 158 --   whitespace.
 159 --
 160 --   ==== _Examples_
 161 --
 162 --   >>> import Text.Parsec ( parse )
 163 --
 164 --   Any combination of comma/spaces can be used as a separator:
 165 --
 166 --   >>> let spamhaus = "zen.spamhaus.org*3"
 167 --   >>> let mailspike = "bl.mailspike.net=127.0.0.[2;10;11]*2"
 168 --   >>> let bl_list = spamhaus ++ "," ++ mailspike
 169 --   >>> pretty_print $ parse sites "" bl_list
 170 --   ["zen.spamhaus.org*3","bl.mailspike.net=127.0.0.[2;10;11]*2"]
 171 --   >>> let bl_list = spamhaus ++ " ,   " ++ mailspike
 172 --   >>> pretty_print $ parse sites "" bl_list
 173 --   ["zen.spamhaus.org*3","bl.mailspike.net=127.0.0.[2;10;11]*2"]
 174 --   >>> let bl_list = spamhaus ++ " " ++ mailspike
 175 --   >>> pretty_print $ parse sites "" bl_list
 176 --   ["zen.spamhaus.org*3","bl.mailspike.net=127.0.0.[2;10;11]*2"]
 177 --
 178 --   Any whitespace, in fact, should work:
 179 --
 180 --   >>> let spamhaus = "zen.spamhaus.org*3"
 181 --   >>> let mailspike = "bl.mailspike.net=127.0.0.[2;10;11]*2"
 182 --   >>> let bl_list = spamhaus ++ "\n,\t   \t\r" ++ mailspike
 183 --   >>> pretty_print $ parse sites "" bl_list
 184 --   ["zen.spamhaus.org*3","bl.mailspike.net=127.0.0.[2;10;11]*2"]
 185 --
 186 sites :: Parser [Site]
 187 sites = site `sepBy1` many1 (choice [char ',', space])
 188
 189
 190
 191 -- * Tests
 192
 193 site_tests :: TestTree
 194 site_tests =
 195   testGroup
 196     "Site tests"
 197     [ test_full_maincf_sites_parsed ]
 198
 199
 200 -- | This is a sample \"postscreen_dnsbl_sites\" from a real main.cf.
 201 --   We should be able to parse it as a list of 'Site's.
 202 --
 203 test_full_maincf_sites_parsed :: TestTree
 204 test_full_maincf_sites_parsed =
 205   testCase "a full main.cf list of postscreen_dnsbl_sites is parsed" $ do
 206     -- Whatever, it's a test.
 207     let actual = pretty_show $ parse sites "" input
 208     actual @?= expected
 209     where
 210       input = intercalate ",\n\t" [
 211                "zen.spamhaus.org*3",
 212                "b.barracudacentral.org*3",
 213                "sip.invaluement.invalid*3",
 214                "jerks.viabit.com*3",
 215                "bl.mailspike.net=127.0.0.[2;10;11]*2",
 216                "bl.spamcop.net*2",
 217                "psbl.surriel.com*2",
 218                "bl.mailspike.net=127.0.0.12*2",
 219                "bl.spameatingmonkey.net*2",
 220                "db.wpbl.info*2",
 221                "dnsbl.sorbs.net",
 222                "dnsbl-1.uceprotect.net",
 223                "hostkarma.junkemailfilter.com=127.0.0.2",
 224                "ubl.unsubscore.com",
 225                "dnsbl.zapbl.net" ]
 226
 227       -- We expect the "one" multipliers to have been added, and the
 228       -- quotation marks to be added...
 229       expected = "[\"" ++
 230                  intercalate "\",\"" [
 231                   "zen.spamhaus.org*3",
 232                   "b.barracudacentral.org*3",
 233                   "sip.invaluement.invalid*3",
 234                   "jerks.viabit.com*3",
 235                   "bl.mailspike.net=127.0.0.[2;10;11]*2",
 236                   "bl.spamcop.net*2",
 237                   "psbl.surriel.com*2",
 238                   "bl.mailspike.net=127.0.0.12*2",
 239                   "bl.spameatingmonkey.net*2",
 240                   "db.wpbl.info*2",
 241                   "dnsbl.sorbs.net*1",
 242                   "dnsbl-1.uceprotect.net*1",
 243                   "hostkarma.junkemailfilter.com=127.0.0.2*1",
 244                   "ubl.unsubscore.com*1",
 245                   "dnsbl.zapbl.net*1" ]
 246                  ++ "\"]"