X-Git-Url: http://gitweb.michael.orlitzky.com/?p=dead%2Fharbl.git;a=blobdiff_plain;f=src%2FNetwork%2FDNS%2FRBL%2FSite.hs;fp=src%2FNetwork%2FDNS%2FRBL%2FSite.hs;h=de3cae460637fe701ac477b760b394135b50527d;hp=0000000000000000000000000000000000000000;hb=2f174a6245027b8d3a4e568a82c240a3595e6bbb;hpb=bc28c407970d9ff1bfeacc88363fd6d23c0af440 diff --git a/src/Network/DNS/RBL/Site.hs b/src/Network/DNS/RBL/Site.hs new file mode 100644 index 0000000..de3cae4 --- /dev/null +++ b/src/Network/DNS/RBL/Site.hs @@ -0,0 +1,244 @@ +-- | This module contains the 'Site' data type representing one +-- blacklist with its associated return codes and weight. For +-- example, in Postfix's main.cf you might have, +-- +-- postscreen_dnsbl_sites = bl.mailspike.net=127.0.0.[2;10;11]*2, ... +-- +-- Here, the blacklist (a 'UserDomain') is \"bl.mailspike.net\", the +-- return code pattern is \"127.0.0.[2;10;11]\", and the weight is +-- \"2". +-- +module Network.DNS.RBL.Site ( + site_tests, + sites ) +where + +import Data.List ( intercalate ) +import Test.Tasty ( TestTree, testGroup ) +import Test.Tasty.HUnit ( (@?=), testCase ) +import Text.Parsec ( + (<|>), + char, + choice, + digit, + many1, + option, + optionMaybe, + parse, + sepBy1, + space, + try, + unexpected ) +import Text.Parsec.String ( Parser ) +import Text.Read ( readMaybe ) + +import Network.DNS.RBL.Domain ( UserDomain, user_domain ) +import Network.DNS.RBL.IPv4Pattern ( IPv4Pattern, v4pattern ) +import Network.DNS.RBL.Pretty ( Pretty(..) ) + + +newtype Weight = Weight Int deriving (Eq, Show) + +instance Pretty Weight where + pretty_show (Weight w) = show w + + +-- | Parse the weight multiplier at the end of a site. +-- +-- ==== _Examples_ +-- +-- >>> import Text.Parsec ( parseTest ) +-- +-- Negative, zero, and positive integers are all supported: +-- +-- >>> parseTest weight "*-5" +-- Weight (-5) +-- +-- >>> parseTest weight "*0" +-- Weight 0 +-- +-- >>> parseTest weight "*17" +-- Weight 17 +-- +-- If the weight is empty, it defaults to @1@: +-- +-- >>> parseTest weight "" +-- Weight 1 +-- +-- The default is used whenever parsing fails: +-- +-- >>> parseTest weight "*hello" +-- Weight 1 +-- +-- The 'Pretty' instance works as intended: +-- +-- >>> import Text.Parsec ( parse ) +-- >>> pretty_print $ parse weight "" "*3" +-- 3 +-- +weight :: Parser Weight +weight = try parse_weight <|> return (Weight 1) + where + parse_weight = do + _ <- char '*' + sign <- (char '-') <|> (option '+' (char '+')) + w <- many1 digit + case ( readMaybe w :: Maybe Int ) of + -- If "many1 digit" gives us a list of digits, we should be able + -- to convert that to an Int! It will overflow rather than fail + -- if the input is too big/small, so it should really always + -- succeed. + Nothing -> unexpected "weight: readMaybe failed on a sequence of digits!" + Just k -> return $ Weight (if sign == '-' then negate k else k) + + + +-- | A DNSBL as it would be input into postfix. It has a blacklist +-- (DNS) name, a pattern of addresses to use for a \"hit\", and a +-- weight multiplier. +-- +data Site = Site UserDomain (Maybe IPv4Pattern) Weight + + +-- | Pretty print DNSBL sites. This is straightforward except for the +-- weight. We default to a weight of @1@, but this leaves us with a +-- choice. If the user leaves off the weight, do we want to +-- pretty-print it as @1@? How about if we explicitly writes the +-- \"*1\" multiplier? +-- +-- The pretty-printing isn't user-facing, really, so it makes sense +-- to just choose one of these behaviors rather than pass around a +-- @Maybe Weight@. We always print the multiplier, even when it's @1@. +-- +instance Pretty Site where + pretty_show (Site d p w) = + (pretty_show d) ++ pattern_string ++ "*" ++ (pretty_show w) + where + pattern_string = case p of + Nothing -> "" + Just pat -> "=" ++ pretty_show pat + + +-- | Parse a single 'Site'. +-- +-- ==== _Examples_ +-- +-- >>> import Text.Parsec ( parse ) +-- +-- >>> let spamhaus = "zen.spamhaus.org*3" +-- >>> pretty_print $ parse site "" spamhaus +-- zen.spamhaus.org*3 +-- +-- >>> let mailspike = "bl.mailspike.net=127.0.0.[2;10;11]*2" +-- >>> pretty_print $ parse site "" mailspike +-- bl.mailspike.net=127.0.0.[2;10;11]*2 +-- +-- If the weight is left unspecified, it defaults to \"1\" which is +-- then printed: +-- +-- >>> let hostkarma = "hostkarma.junkemailfilter.com=127.0.0.2" +-- >>> pretty_print $ parse site "" hostkarma +-- hostkarma.junkemailfilter.com=127.0.0.2*1 +-- +-- >>> let ubl = "ubl.unsubscore.com" +-- >>> pretty_print $ parse site "" ubl +-- ubl.unsubscore.com*1 +-- +site :: Parser Site +site = do + d <- user_domain + return_codes <- optionMaybe $ char '=' >> v4pattern + w <- weight + return $ Site d return_codes w + + +-- | Parse more than one 'Site', separated by commas and/or +-- whitespace. +-- +-- ==== _Examples_ +-- +-- >>> import Text.Parsec ( parse ) +-- +-- Any combination of comma/spaces can be used as a separator: +-- +-- >>> let spamhaus = "zen.spamhaus.org*3" +-- >>> let mailspike = "bl.mailspike.net=127.0.0.[2;10;11]*2" +-- >>> let bl_list = spamhaus ++ "," ++ mailspike +-- >>> pretty_print $ parse sites "" bl_list +-- ["zen.spamhaus.org*3","bl.mailspike.net=127.0.0.[2;10;11]*2"] +-- >>> let bl_list = spamhaus ++ " , " ++ mailspike +-- >>> pretty_print $ parse sites "" bl_list +-- ["zen.spamhaus.org*3","bl.mailspike.net=127.0.0.[2;10;11]*2"] +-- >>> let bl_list = spamhaus ++ " " ++ mailspike +-- >>> pretty_print $ parse sites "" bl_list +-- ["zen.spamhaus.org*3","bl.mailspike.net=127.0.0.[2;10;11]*2"] +-- +-- Any whitespace, in fact, should work: +-- +-- >>> let spamhaus = "zen.spamhaus.org*3" +-- >>> let mailspike = "bl.mailspike.net=127.0.0.[2;10;11]*2" +-- >>> let bl_list = spamhaus ++ "\n,\t \t\r" ++ mailspike +-- >>> pretty_print $ parse sites "" bl_list +-- ["zen.spamhaus.org*3","bl.mailspike.net=127.0.0.[2;10;11]*2"] +-- +sites :: Parser [Site] +sites = site `sepBy1` many1 (choice [char ',', space]) + + + +-- * Tests + +site_tests :: TestTree +site_tests = + testGroup + "Site tests" + [ test_full_maincf_sites_parsed ] + + +-- | This is a sample \"postscreen_dnsbl_sites\" from a real main.cf. +-- We should be able to parse it as a list of 'Site's. +-- +test_full_maincf_sites_parsed :: TestTree +test_full_maincf_sites_parsed = + testCase "a full main.cf list of postscreen_dnsbl_sites is parsed" $ do + -- Whatever, it's a test. + let actual = pretty_show $ parse sites "" input + actual @?= expected + where + input = intercalate ",\n\t" [ + "zen.spamhaus.org*3", + "b.barracudacentral.org*3", + "sip.invaluement.invalid*3", + "jerks.viabit.com*3", + "bl.mailspike.net=127.0.0.[2;10;11]*2", + "bl.spamcop.net*2", + "psbl.surriel.com*2", + "bl.mailspike.net=127.0.0.12*2", + "bl.spameatingmonkey.net*2", + "db.wpbl.info*2", + "dnsbl.sorbs.net", + "dnsbl-1.uceprotect.net", + "hostkarma.junkemailfilter.com=127.0.0.2", + "ubl.unsubscore.com", + "dnsbl.zapbl.net" ] + + -- We expect the "one" multipliers to have been added, and the + -- quotation marks to be added... + expected = "[\"" ++ + intercalate "\",\"" [ + "zen.spamhaus.org*3", + "b.barracudacentral.org*3", + "sip.invaluement.invalid*3", + "jerks.viabit.com*3", + "bl.mailspike.net=127.0.0.[2;10;11]*2", + "bl.spamcop.net*2", + "psbl.surriel.com*2", + "bl.mailspike.net=127.0.0.12*2", + "bl.spameatingmonkey.net*2", + "db.wpbl.info*2", + "dnsbl.sorbs.net*1", + "dnsbl-1.uceprotect.net*1", + "hostkarma.junkemailfilter.com=127.0.0.2*1", + "ubl.unsubscore.com*1", + "dnsbl.zapbl.net*1" ] + ++ "\"]"