src/Network/DNS/RBL/Site.hs

   1 -- | This module contains the 'Site' data type representing one
   2 --   blacklist with its associated return codes and weight. For
   3 --   example, in Postfix's main.cf you might have,
   4 --
   5 --     postscreen_dnsbl_sites = bl.mailspike.net=127.0.0.[2;10;11]*2, ...
   6 --
   7 --   Here, the blacklist (a 'Host') is \"bl.mailspike.net\", the
   8 --   return code pattern is \"127.0.0.[2;10;11]\", and the weight is
   9 --   \"2".
  10 --
  11 module Network.DNS.RBL.Site (
  12   Site(..),
  13   site_tests,
  14   sites )
  15 where
  16
  17 import Data.List ( intercalate )
  18 import Test.Tasty ( TestTree, testGroup )
  19 import Test.Tasty.HUnit ( (@?=), testCase )
  20 import Text.Parsec (
  21   char,
  22   choice,
  23   many1,
  24   optionMaybe,
  25   parse,
  26   sepBy1,
  27   space )
  28 import Text.Parsec.String ( Parser )
  29
  30 import Network.DNS.RBL.Host ( Host, host )
  31 import Network.DNS.RBL.IPv4Pattern ( IPv4Pattern, v4pattern )
  32 import Network.DNS.RBL.Weight ( Weight, weight )
  33 import Network.DNS.RBL.Pretty ( Pretty(..) )
  34
  35
  36 -- | A DNSBL as it would be input into postfix. It has a blacklist
  37 --   (DNS) name, a pattern of addresses to use for a \"hit\", and a
  38 --   weight multiplier.
  39 --
  40 data Site = Site Host (Maybe IPv4Pattern) Weight
  41
  42
  43 -- | Pretty print DNSBL sites. This is straightforward except for the
  44 --   weight. We default to a weight of @1@, but this leaves us with a
  45 --   choice. If the user leaves off the weight, do we want to
  46 --   pretty-print it as @1@? How about if we explicitly writes the
  47 --   \"*1\" multiplier?
  48 --
  49 --   The pretty-printing isn't user-facing, really, so it makes sense
  50 --   to just choose one of these behaviors rather than pass around a
  51 --   @Maybe Weight@. We always print the multiplier, even when it's @1@.
  52 --
  53 instance Pretty Site where
  54   pretty_show (Site d p w) =
  55     (pretty_show d) ++ pattern_string ++ "*" ++ (pretty_show w)
  56     where
  57       pattern_string = case p of
  58                          Nothing -> ""
  59                          Just pat -> "=" ++ pretty_show pat
  60
  61
  62 -- | Parse a single 'Site'.
  63 --
  64 --   ==== _Examples_
  65 --
  66 --   >>> import Text.Parsec ( parse )
  67 --
  68 --   >>> let spamhaus = "zen.spamhaus.org*3"
  69 --   >>> pretty_print $ parse site "" spamhaus
  70 --   zen.spamhaus.org*3
  71 --
  72 --   >>> let mailspike = "bl.mailspike.net=127.0.0.[2;10;11]*2"
  73 --   >>> pretty_print $ parse site "" mailspike
  74 --   bl.mailspike.net=127.0.0.[2;10;11]*2
  75 --
  76 --   If the weight is left unspecified, it defaults to \"1\" which is
  77 --   then printed:
  78 --
  79 --   >>> let hostkarma = "hostkarma.junkemailfilter.com=127.0.0.2"
  80 --   >>> pretty_print $ parse site "" hostkarma
  81 --   hostkarma.junkemailfilter.com=127.0.0.2*1
  82 --
  83 --   >>> let ubl = "ubl.unsubscore.com"
  84 --   >>> pretty_print $ parse site "" ubl
  85 --   ubl.unsubscore.com*1
  86 --
  87 site :: Parser Site
  88 site = do
  89   d <- host
  90   return_codes <- optionMaybe $ char '=' >> v4pattern
  91   w <- weight
  92   return $ Site d return_codes w
  93
  94
  95 -- | Parse more than one 'Site', separated by commas and/or
  96 --   whitespace.
  97 --
  98 --   ==== _Examples_
  99 --
 100 --   >>> import Text.Parsec ( parse )
 101 --
 102 --   Any combination of comma/spaces can be used as a separator:
 103 --
 104 --   >>> let spamhaus = "zen.spamhaus.org*3"
 105 --   >>> let mailspike = "bl.mailspike.net=127.0.0.[2;10;11]*2"
 106 --   >>> let bl_list = spamhaus ++ "," ++ mailspike
 107 --   >>> pretty_print $ parse sites "" bl_list
 108 --   ["zen.spamhaus.org*3","bl.mailspike.net=127.0.0.[2;10;11]*2"]
 109 --   >>> let bl_list = spamhaus ++ " ,   " ++ mailspike
 110 --   >>> pretty_print $ parse sites "" bl_list
 111 --   ["zen.spamhaus.org*3","bl.mailspike.net=127.0.0.[2;10;11]*2"]
 112 --   >>> let bl_list = spamhaus ++ " " ++ mailspike
 113 --   >>> pretty_print $ parse sites "" bl_list
 114 --   ["zen.spamhaus.org*3","bl.mailspike.net=127.0.0.[2;10;11]*2"]
 115 --
 116 --   Any whitespace, in fact, should work:
 117 --
 118 --   >>> let spamhaus = "zen.spamhaus.org*3"
 119 --   >>> let mailspike = "bl.mailspike.net=127.0.0.[2;10;11]*2"
 120 --   >>> let bl_list = spamhaus ++ "\n,\t   \t\r" ++ mailspike
 121 --   >>> pretty_print $ parse sites "" bl_list
 122 --   ["zen.spamhaus.org*3","bl.mailspike.net=127.0.0.[2;10;11]*2"]
 123 --
 124 sites :: Parser [Site]
 125 sites = site `sepBy1` many1 (choice [char ',', space])
 126
 127
 128
 129 -- * Tests
 130
 131 site_tests :: TestTree
 132 site_tests =
 133   testGroup
 134     "Site tests"
 135     [ test_full_maincf_sites_parsed ]
 136
 137
 138 -- | This is a sample \"postscreen_dnsbl_sites\" from a real main.cf.
 139 --   We should be able to parse it as a list of 'Site's.
 140 --
 141 test_full_maincf_sites_parsed :: TestTree
 142 test_full_maincf_sites_parsed =
 143   testCase "a full main.cf list of postscreen_dnsbl_sites is parsed" $ do
 144     -- Whatever, it's a test.
 145     let actual = pretty_show $ parse sites "" input
 146     actual @?= expected
 147     where
 148       input = intercalate ",\n\t" [
 149                "zen.spamhaus.org*3",
 150                "b.barracudacentral.org*3",
 151                "sip.invaluement.invalid*3",
 152                "jerks.viabit.com*3",
 153                "bl.mailspike.net=127.0.0.[2;10;11]*2",
 154                "bl.spamcop.net*2",
 155                "psbl.surriel.com*2",
 156                "bl.mailspike.net=127.0.0.12*2",
 157                "bl.spameatingmonkey.net*2",
 158                "db.wpbl.info*2",
 159                "dnsbl.sorbs.net",
 160                "dnsbl-1.uceprotect.net",
 161                "hostkarma.junkemailfilter.com=127.0.0.2",
 162                "ubl.unsubscore.com",
 163                "dnsbl.zapbl.net" ]
 164
 165       -- We expect the "one" multipliers to have been added, and the
 166       -- quotation marks to be added...
 167       expected = "[\"" ++
 168                  intercalate "\",\"" [
 169                   "zen.spamhaus.org*3",
 170                   "b.barracudacentral.org*3",
 171                   "sip.invaluement.invalid*3",
 172                   "jerks.viabit.com*3",
 173                   "bl.mailspike.net=127.0.0.[2;10;11]*2",
 174                   "bl.spamcop.net*2",
 175                   "psbl.surriel.com*2",
 176                   "bl.mailspike.net=127.0.0.12*2",
 177                   "bl.spameatingmonkey.net*2",
 178                   "db.wpbl.info*2",
 179                   "dnsbl.sorbs.net*1",
 180                   "dnsbl-1.uceprotect.net*1",
 181                   "hostkarma.junkemailfilter.com=127.0.0.2*1",
 182                   "ubl.unsubscore.com*1",
 183                   "dnsbl.zapbl.net*1" ]
 184                  ++ "\"]"