-- | This module contains the 'Site' data type representing one -- blacklist with its associated return codes and weight. For -- example, in Postfix's main.cf you might have, -- -- postscreen_dnsbl_sites = bl.mailspike.net=127.0.0.[2;10;11]*2, ... -- -- Here, the blacklist (a 'UserDomain') is \"bl.mailspike.net\", the -- return code pattern is \"127.0.0.[2;10;11]\", and the weight is -- \"2". -- module Network.DNS.RBL.Site ( Site(..), Weight(..), site_tests, sites ) where import Data.List ( intercalate ) import Test.Tasty ( TestTree, testGroup ) import Test.Tasty.HUnit ( (@?=), testCase ) import Text.Parsec ( (<|>), char, choice, digit, many1, option, optionMaybe, parse, sepBy1, space, try, unexpected ) import Text.Parsec.String ( Parser ) import Text.Read ( readMaybe ) import Network.DNS.RBL.Domain ( UserDomain, user_domain ) import Network.DNS.RBL.IPv4Pattern ( IPv4Pattern, v4pattern ) import Network.DNS.RBL.Pretty ( Pretty(..) ) newtype Weight = Weight Int deriving (Eq, Show) instance Pretty Weight where pretty_show (Weight w) = show w -- | Parse the weight multiplier at the end of a site. -- -- ==== _Examples_ -- -- >>> import Text.Parsec ( parseTest ) -- -- Negative, zero, and positive integers are all supported: -- -- >>> parseTest weight "*-5" -- Weight (-5) -- -- >>> parseTest weight "*0" -- Weight 0 -- -- >>> parseTest weight "*17" -- Weight 17 -- -- If the weight is empty, it defaults to @1@: -- -- >>> parseTest weight "" -- Weight 1 -- -- The default is used whenever parsing fails: -- -- >>> parseTest weight "*hello" -- Weight 1 -- -- The 'Pretty' instance works as intended: -- -- >>> import Text.Parsec ( parse ) -- >>> pretty_print $ parse weight "" "*3" -- 3 -- weight :: Parser Weight weight = try parse_weight <|> return (Weight 1) where parse_weight = do _ <- char '*' sign <- (char '-') <|> (option '+' (char '+')) w <- many1 digit case ( readMaybe w :: Maybe Int ) of -- If "many1 digit" gives us a list of digits, we should be able -- to convert that to an Int! It will overflow rather than fail -- if the input is too big/small, so it should really always -- succeed. Nothing -> unexpected "weight: readMaybe failed on a sequence of digits!" Just k -> return $ Weight (if sign == '-' then negate k else k) -- | A DNSBL as it would be input into postfix. It has a blacklist -- (DNS) name, a pattern of addresses to use for a \"hit\", and a -- weight multiplier. -- data Site = Site UserDomain (Maybe IPv4Pattern) Weight -- | Pretty print DNSBL sites. This is straightforward except for the -- weight. We default to a weight of @1@, but this leaves us with a -- choice. If the user leaves off the weight, do we want to -- pretty-print it as @1@? How about if we explicitly writes the -- \"*1\" multiplier? -- -- The pretty-printing isn't user-facing, really, so it makes sense -- to just choose one of these behaviors rather than pass around a -- @Maybe Weight@. We always print the multiplier, even when it's @1@. -- instance Pretty Site where pretty_show (Site d p w) = (pretty_show d) ++ pattern_string ++ "*" ++ (pretty_show w) where pattern_string = case p of Nothing -> "" Just pat -> "=" ++ pretty_show pat -- | Parse a single 'Site'. -- -- ==== _Examples_ -- -- >>> import Text.Parsec ( parse ) -- -- >>> let spamhaus = "zen.spamhaus.org*3" -- >>> pretty_print $ parse site "" spamhaus -- zen.spamhaus.org*3 -- -- >>> let mailspike = "bl.mailspike.net=127.0.0.[2;10;11]*2" -- >>> pretty_print $ parse site "" mailspike -- bl.mailspike.net=127.0.0.[2;10;11]*2 -- -- If the weight is left unspecified, it defaults to \"1\" which is -- then printed: -- -- >>> let hostkarma = "hostkarma.junkemailfilter.com=127.0.0.2" -- >>> pretty_print $ parse site "" hostkarma -- hostkarma.junkemailfilter.com=127.0.0.2*1 -- -- >>> let ubl = "ubl.unsubscore.com" -- >>> pretty_print $ parse site "" ubl -- ubl.unsubscore.com*1 -- site :: Parser Site site = do d <- user_domain return_codes <- optionMaybe $ char '=' >> v4pattern w <- weight return $ Site d return_codes w -- | Parse more than one 'Site', separated by commas and/or -- whitespace. -- -- ==== _Examples_ -- -- >>> import Text.Parsec ( parse ) -- -- Any combination of comma/spaces can be used as a separator: -- -- >>> let spamhaus = "zen.spamhaus.org*3" -- >>> let mailspike = "bl.mailspike.net=127.0.0.[2;10;11]*2" -- >>> let bl_list = spamhaus ++ "," ++ mailspike -- >>> pretty_print $ parse sites "" bl_list -- ["zen.spamhaus.org*3","bl.mailspike.net=127.0.0.[2;10;11]*2"] -- >>> let bl_list = spamhaus ++ " , " ++ mailspike -- >>> pretty_print $ parse sites "" bl_list -- ["zen.spamhaus.org*3","bl.mailspike.net=127.0.0.[2;10;11]*2"] -- >>> let bl_list = spamhaus ++ " " ++ mailspike -- >>> pretty_print $ parse sites "" bl_list -- ["zen.spamhaus.org*3","bl.mailspike.net=127.0.0.[2;10;11]*2"] -- -- Any whitespace, in fact, should work: -- -- >>> let spamhaus = "zen.spamhaus.org*3" -- >>> let mailspike = "bl.mailspike.net=127.0.0.[2;10;11]*2" -- >>> let bl_list = spamhaus ++ "\n,\t \t\r" ++ mailspike -- >>> pretty_print $ parse sites "" bl_list -- ["zen.spamhaus.org*3","bl.mailspike.net=127.0.0.[2;10;11]*2"] -- sites :: Parser [Site] sites = site `sepBy1` many1 (choice [char ',', space]) -- * Tests site_tests :: TestTree site_tests = testGroup "Site tests" [ test_full_maincf_sites_parsed ] -- | This is a sample \"postscreen_dnsbl_sites\" from a real main.cf. -- We should be able to parse it as a list of 'Site's. -- test_full_maincf_sites_parsed :: TestTree test_full_maincf_sites_parsed = testCase "a full main.cf list of postscreen_dnsbl_sites is parsed" $ do -- Whatever, it's a test. let actual = pretty_show $ parse sites "" input actual @?= expected where input = intercalate ",\n\t" [ "zen.spamhaus.org*3", "b.barracudacentral.org*3", "sip.invaluement.invalid*3", "jerks.viabit.com*3", "bl.mailspike.net=127.0.0.[2;10;11]*2", "bl.spamcop.net*2", "psbl.surriel.com*2", "bl.mailspike.net=127.0.0.12*2", "bl.spameatingmonkey.net*2", "db.wpbl.info*2", "dnsbl.sorbs.net", "dnsbl-1.uceprotect.net", "hostkarma.junkemailfilter.com=127.0.0.2", "ubl.unsubscore.com", "dnsbl.zapbl.net" ] -- We expect the "one" multipliers to have been added, and the -- quotation marks to be added... expected = "[\"" ++ intercalate "\",\"" [ "zen.spamhaus.org*3", "b.barracudacentral.org*3", "sip.invaluement.invalid*3", "jerks.viabit.com*3", "bl.mailspike.net=127.0.0.[2;10;11]*2", "bl.spamcop.net*2", "psbl.surriel.com*2", "bl.mailspike.net=127.0.0.12*2", "bl.spameatingmonkey.net*2", "db.wpbl.info*2", "dnsbl.sorbs.net*1", "dnsbl-1.uceprotect.net*1", "hostkarma.junkemailfilter.com=127.0.0.2*1", "ubl.unsubscore.com*1", "dnsbl.zapbl.net*1" ] ++ "\"]"