From: Michael Orlitzky Date: Thu, 9 Jul 2015 06:34:17 +0000 (-0400) Subject: Implement DnsblSite parsing and add a test suite for it. X-Git-Url: http://gitweb.michael.orlitzky.com/?p=dead%2Fharbl.git;a=commitdiff_plain;h=20e2618366a1965f5cd91b1daf1d5a37d459deaa;ds=sidebyside Implement DnsblSite parsing and add a test suite for it. --- diff --git a/src/DnsblSite.hs b/src/DnsblSite.hs index bf87a89..46a872b 100644 --- a/src/DnsblSite.hs +++ b/src/DnsblSite.hs @@ -8,29 +8,41 @@ -- return code pattern is \"127.0.0.[2;10;11]\", and the weight is -- \"2". -- -module DnsblSite () +module DnsblSite ( + dnsbl_site_tests, + dnsbl_sites ) where +import Data.List ( intercalate ) +import Test.Tasty ( TestTree, testGroup ) +import Test.Tasty.HUnit ( (@?=), testCase ) import Text.Parsec ( - ParseError, (<|>), char, + choice, digit, - eof, many1, option, + optionMaybe, parse, - string, + sepBy1, + space, try, unexpected ) import Text.Parsec.String ( Parser ) import Text.Read ( readMaybe ) -import Domain ( UserDomain ) -import IPv4Pattern ( IPv4Pattern ) +import Domain ( UserDomain, user_domain ) +import IPv4Pattern ( IPv4Pattern, v4pattern ) +import Pretty ( Pretty(..) ) + newtype Weight = Weight Int deriving (Eq, Show) +instance Pretty Weight where + pretty_show (Weight w) = show w + + -- | Parse the weight multiplier at the end of a dnsbl_site. -- -- ==== _Examples_ @@ -48,28 +60,24 @@ newtype Weight = Weight Int deriving (Eq, Show) -- >>> parseTest weight "*17" -- Weight 17 -- --- A bare asterisk doesn't work: --- --- >>> parseTest weight "*" --- parse error at (line 1, column 2): --- unexpected end of input --- expecting "-", "+" or digit --- -- If the weight is empty, it defaults to @1@: -- -- >>> parseTest weight "" -- Weight 1 -- --- But the default is only used if the weight is really empty, --- not if parsing simply fails: +-- The default is used whenever parsing fails: -- -- >>> parseTest weight "*hello" --- parse error at (line 1, column 2): --- unexpected "h" --- expecting "-", "+" or digit +-- Weight 1 +-- +-- The 'Pretty' instance works as intended: +-- +-- >>> import Text.Parsec ( parse ) +-- >>> pretty_print $ parse weight "" "*3" +-- 3 -- weight :: Parser Weight -weight = try parse_weight <|> (eof >> return (Weight 1)) +weight = try parse_weight <|> return (Weight 1) where parse_weight = do _ <- char '*' @@ -84,4 +92,153 @@ weight = try parse_weight <|> (eof >> return (Weight 1)) Just k -> return $ Weight (if sign == '-' then negate k else k) -data DnsblSite = DnsblSite UserDomain IPv4Pattern Weight + +-- | A DNSBL as it would be input into postfix. It has a blacklist +-- (DNS) name, a pattern of addresses to use for a \"hit\", and a +-- weight multiplier. +-- +data DnsblSite = DnsblSite UserDomain (Maybe IPv4Pattern) Weight + + +-- | Pretty print DNSBL sites. This is straightforward except for the +-- weight. We default to a weight of @1@, but this leaves us with a +-- choice. If the user leaves off the weight, do we want to +-- pretty-print it as @1@? How about if we explicitly writes the +-- \"*1\" multiplier? +-- +-- The pretty-printing isn't user-facing, really, so it makes sense +-- to just choose one of these behaviors rather than pass around a +-- @Maybe Weight@. We always print the multiplier, even when it's @1@. +-- +instance Pretty DnsblSite where + pretty_show (DnsblSite d p w) = + (pretty_show d) ++ pattern_string ++ "*" ++ (pretty_show w) + where + pattern_string = case p of + Nothing -> "" + Just pat -> "=" ++ pretty_show pat + + +-- | Parse a single 'DnsblSite'. +-- +-- ==== _Examples_ +-- +-- >>> import Text.Parsec ( parse ) +-- +-- >>> let spamhaus = "zen.spamhaus.org*3" +-- >>> pretty_print $ parse dnsbl_site "" spamhaus +-- zen.spamhaus.org*3 +-- +-- >>> let mailspike = "bl.mailspike.net=127.0.0.[2;10;11]*2" +-- >>> pretty_print $ parse dnsbl_site "" mailspike +-- bl.mailspike.net=127.0.0.[2;10;11]*2 +-- +-- If the weight is left unspecified, it defaults to \"1\" which is +-- then printed: +-- +-- >>> let hostkarma = "hostkarma.junkemailfilter.com=127.0.0.2" +-- >>> pretty_print $ parse dnsbl_site "" hostkarma +-- hostkarma.junkemailfilter.com=127.0.0.2*1 +-- +-- >>> let ubl = "ubl.unsubscore.com" +-- >>> pretty_print $ parse dnsbl_site "" ubl +-- ubl.unsubscore.com*1 +-- +dnsbl_site :: Parser DnsblSite +dnsbl_site = do + d <- user_domain + return_codes <- optionMaybe $ char '=' >> v4pattern + w <- weight + return $ DnsblSite d return_codes w + + +-- | Parse more than one 'DnsblSite', separated by commas and/or +-- whitespace. +-- +-- ==== _Examples_ +-- +-- >>> import Text.Parsec ( parse ) +-- +-- Any combination of comma/spaces can be used as a separator: +-- +-- >>> let spamhaus = "zen.spamhaus.org*3" +-- >>> let mailspike = "bl.mailspike.net=127.0.0.[2;10;11]*2" +-- >>> let bl_list = spamhaus ++ "," ++ mailspike +-- >>> pretty_print $ parse dnsbl_sites "" bl_list +-- ["zen.spamhaus.org*3","bl.mailspike.net=127.0.0.[2;10;11]*2"] +-- >>> let bl_list = spamhaus ++ " , " ++ mailspike +-- >>> pretty_print $ parse dnsbl_sites "" bl_list +-- ["zen.spamhaus.org*3","bl.mailspike.net=127.0.0.[2;10;11]*2"] +-- >>> let bl_list = spamhaus ++ " " ++ mailspike +-- >>> pretty_print $ parse dnsbl_sites "" bl_list +-- ["zen.spamhaus.org*3","bl.mailspike.net=127.0.0.[2;10;11]*2"] +-- +-- Any whitespace, in fact, should work: +-- +-- >>> let spamhaus = "zen.spamhaus.org*3" +-- >>> let mailspike = "bl.mailspike.net=127.0.0.[2;10;11]*2" +-- >>> let bl_list = spamhaus ++ "\n,\t \t\r" ++ mailspike +-- >>> pretty_print $ parse dnsbl_sites "" bl_list +-- ["zen.spamhaus.org*3","bl.mailspike.net=127.0.0.[2;10;11]*2"] +-- +dnsbl_sites :: Parser [DnsblSite] +dnsbl_sites = dnsbl_site `sepBy1` many1 (choice [char ',', space]) + + + +-- * Tests + +dnsbl_site_tests :: TestTree +dnsbl_site_tests = + testGroup + "DnsblSite tests" + [ test_full_maincf_sites_parsed ] + + +-- | This is a sample \"postscreen_dnsbl_sites\" from a real main.cf. +-- We should be able to parse it as a list of 'DnsblSite's. +-- +test_full_maincf_sites_parsed :: TestTree +test_full_maincf_sites_parsed = + testCase "a full main.cf list of postscreen_dnsbl_sites is parsed" $ do + -- Whatever, it's a test. + let actual = pretty_show $ parse dnsbl_sites "" input + actual @?= expected + where + input = intercalate ",\n\t" [ + "zen.spamhaus.org*3", + "b.barracudacentral.org*3", + "sip.invaluement.invalid*3", + "jerks.viabit.com*3", + "bl.mailspike.net=127.0.0.[2;10;11]*2", + "bl.spamcop.net*2", + "psbl.surriel.com*2", + "bl.mailspike.net=127.0.0.12*2", + "bl.spameatingmonkey.net*2", + "db.wpbl.info*2", + "dnsbl.sorbs.net", + "dnsbl-1.uceprotect.net", + "hostkarma.junkemailfilter.com=127.0.0.2", + "ubl.unsubscore.com", + "dnsbl.zapbl.net" ] + + -- We expect the "one" multipliers to have been added, and the + -- quotation marks to be added... + expected = "[\"" ++ + intercalate "\",\"" [ + "zen.spamhaus.org*3", + "b.barracudacentral.org*3", + "sip.invaluement.invalid*3", + "jerks.viabit.com*3", + "bl.mailspike.net=127.0.0.[2;10;11]*2", + "bl.spamcop.net*2", + "psbl.surriel.com*2", + "bl.mailspike.net=127.0.0.12*2", + "bl.spameatingmonkey.net*2", + "db.wpbl.info*2", + "dnsbl.sorbs.net*1", + "dnsbl-1.uceprotect.net*1", + "hostkarma.junkemailfilter.com=127.0.0.2*1", + "ubl.unsubscore.com*1", + "dnsbl.zapbl.net*1" ] + ++ "\"]" diff --git a/test/TestSuite.hs b/test/TestSuite.hs index c7d9613..cb79fd0 100644 --- a/test/TestSuite.hs +++ b/test/TestSuite.hs @@ -1,8 +1,9 @@ import Test.Tasty ( TestTree, defaultMain, testGroup ) +import DnsblSite ( dnsbl_site_tests ) import IPv4Pattern ( ipv4pattern_tests ) tests :: TestTree -tests = testGroup "All Tests" [ ipv4pattern_tests ] +tests = testGroup "All Tests" [ dnsbl_site_tests, ipv4pattern_tests ] main :: IO () main = defaultMain tests