-- | This module contains the 'Site' data type representing one
--   blacklist with its associated return codes and weight. For
--   example, in Postfix's main.cf you might have,
--
--     postscreen_dnsbl_sites = bl.mailspike.net=127.0.0.[2;10;11]*2, ...
--
--   Here, the blacklist (a 'UserDomain') is \"bl.mailspike.net\", the
--   return code pattern is \"127.0.0.[2;10;11]\", and the weight is
--   \"2".
--
module Network.DNS.RBL.Site (
  Site(..),
  Weight(..),
  site_tests,
  sites )
where

import Data.List ( intercalate )
import Test.Tasty ( TestTree, testGroup )
import Test.Tasty.HUnit ( (@?=), testCase )
import Text.Parsec (
  (<|>),
  char,
  choice,
  digit,
  many1,
  option,
  optionMaybe,
  parse,
  sepBy1,
  space,
  try,
  unexpected )
import Text.Parsec.String ( Parser )
import Text.Read ( readMaybe )

import Network.DNS.RBL.Domain ( UserDomain, user_domain )
import Network.DNS.RBL.IPv4Pattern ( IPv4Pattern, v4pattern )
import Network.DNS.RBL.Pretty ( Pretty(..) )


newtype Weight = Weight Int deriving (Eq, Show)

instance Pretty Weight where
  pretty_show (Weight w) = show w


-- | Parse the weight multiplier at the end of a site.
--
--   ==== _Examples_
--
--   >>> import Text.Parsec ( parseTest )
--
--   Negative, zero, and positive integers are all supported:
--
--   >>> parseTest weight "*-5"
--   Weight (-5)
--
--   >>> parseTest weight "*0"
--   Weight 0
--
--   >>> parseTest weight "*17"
--   Weight 17
--
--   If the weight is empty, it defaults to @1@:
--
--   >>> parseTest weight ""
--   Weight 1
--
--   The default is used whenever parsing fails:
--
--   >>> parseTest weight "*hello"
--   Weight 1
--
--   The 'Pretty' instance works as intended:
--
--   >>> import Text.Parsec ( parse )
--   >>> pretty_print $ parse weight "" "*3"
--   3
--
weight :: Parser Weight
weight = try parse_weight <|> return (Weight 1)
  where
    parse_weight = do
      _ <- char '*'
      sign <- (char '-') <|> (option '+' (char '+'))
      w <- many1 digit
      case ( readMaybe w :: Maybe Int ) of
        -- If "many1 digit" gives us a list of digits, we should be able
        -- to convert that to an Int! It will overflow rather than fail
        -- if the input is too big/small, so it should really always
        -- succeed.
        Nothing -> unexpected "weight: readMaybe failed on a sequence of digits!"
        Just k  -> return $ Weight (if sign == '-' then negate k else k)


-- | A DNSBL as it would be input into postfix. It has a blacklist
--   (DNS) name, a pattern of addresses to use for a \"hit\", and a
--   weight multiplier.
--
data Site = Site UserDomain (Maybe IPv4Pattern) Weight


-- | Pretty print DNSBL sites. This is straightforward except for the
--   weight. We default to a weight of @1@, but this leaves us with a
--   choice. If the user leaves off the weight, do we want to
--   pretty-print it as @1@? How about if we explicitly writes the
--   \"*1\" multiplier?
--
--   The pretty-printing isn't user-facing, really, so it makes sense
--   to just choose one of these behaviors rather than pass around a
--   @Maybe Weight@. We always print the multiplier, even when it's @1@.
--
instance Pretty Site where
  pretty_show (Site d p w) =
    (pretty_show d) ++ pattern_string ++ "*" ++ (pretty_show w)
    where
      pattern_string = case p of
                         Nothing -> ""
                         Just pat -> "=" ++ pretty_show pat


-- | Parse a single 'Site'.
--
--   ==== _Examples_
--
--   >>> import Text.Parsec ( parse )
--
--   >>> let spamhaus = "zen.spamhaus.org*3"
--   >>> pretty_print $ parse site "" spamhaus
--   zen.spamhaus.org*3
--
--   >>> let mailspike = "bl.mailspike.net=127.0.0.[2;10;11]*2"
--   >>> pretty_print $ parse site "" mailspike
--   bl.mailspike.net=127.0.0.[2;10;11]*2
--
--   If the weight is left unspecified, it defaults to \"1\" which is
--   then printed:
--
--   >>> let hostkarma = "hostkarma.junkemailfilter.com=127.0.0.2"
--   >>> pretty_print $ parse site "" hostkarma
--   hostkarma.junkemailfilter.com=127.0.0.2*1
--
--   >>> let ubl = "ubl.unsubscore.com"
--   >>> pretty_print $ parse site "" ubl
--   ubl.unsubscore.com*1
--
site :: Parser Site
site = do
  d <- user_domain
  return_codes <- optionMaybe $ char '=' >> v4pattern
  w <- weight
  return $ Site d return_codes w


-- | Parse more than one 'Site', separated by commas and/or
--   whitespace.
--
--   ==== _Examples_
--
--   >>> import Text.Parsec ( parse )
--
--   Any combination of comma/spaces can be used as a separator:
--
--   >>> let spamhaus = "zen.spamhaus.org*3"
--   >>> let mailspike = "bl.mailspike.net=127.0.0.[2;10;11]*2"
--   >>> let bl_list = spamhaus ++ "," ++ mailspike
--   >>> pretty_print $ parse sites "" bl_list
--   ["zen.spamhaus.org*3","bl.mailspike.net=127.0.0.[2;10;11]*2"]
--   >>> let bl_list = spamhaus ++ " ,   " ++ mailspike
--   >>> pretty_print $ parse sites "" bl_list
--   ["zen.spamhaus.org*3","bl.mailspike.net=127.0.0.[2;10;11]*2"]
--   >>> let bl_list = spamhaus ++ " " ++ mailspike
--   >>> pretty_print $ parse sites "" bl_list
--   ["zen.spamhaus.org*3","bl.mailspike.net=127.0.0.[2;10;11]*2"]
--
--   Any whitespace, in fact, should work:
--
--   >>> let spamhaus = "zen.spamhaus.org*3"
--   >>> let mailspike = "bl.mailspike.net=127.0.0.[2;10;11]*2"
--   >>> let bl_list = spamhaus ++ "\n,\t   \t\r" ++ mailspike
--   >>> pretty_print $ parse sites "" bl_list
--   ["zen.spamhaus.org*3","bl.mailspike.net=127.0.0.[2;10;11]*2"]
--
sites :: Parser [Site]
sites = site `sepBy1` many1 (choice [char ',', space])


-- * Tests

site_tests :: TestTree
site_tests =
  testGroup
    "Site tests"
    [ test_full_maincf_sites_parsed ]


-- | This is a sample \"postscreen_dnsbl_sites\" from a real main.cf.
--   We should be able to parse it as a list of 'Site's.
--
test_full_maincf_sites_parsed :: TestTree
test_full_maincf_sites_parsed =
  testCase "a full main.cf list of postscreen_dnsbl_sites is parsed" $ do
    -- Whatever, it's a test.
    let actual = pretty_show $ parse sites "" input
    actual @?= expected
    where
      input = intercalate ",\n\t" [
               "zen.spamhaus.org*3",
               "b.barracudacentral.org*3",
               "sip.invaluement.invalid*3",
               "jerks.viabit.com*3",
               "bl.mailspike.net=127.0.0.[2;10;11]*2",
               "bl.spamcop.net*2",
               "psbl.surriel.com*2",
               "bl.mailspike.net=127.0.0.12*2",
               "bl.spameatingmonkey.net*2",
               "db.wpbl.info*2",
               "dnsbl.sorbs.net",
               "dnsbl-1.uceprotect.net",
               "hostkarma.junkemailfilter.com=127.0.0.2",
               "ubl.unsubscore.com",
               "dnsbl.zapbl.net" ]

      -- We expect the "one" multipliers to have been added, and the
      -- quotation marks to be added...
      expected = "[\"" ++
                 intercalate "\",\"" [
                  "zen.spamhaus.org*3",
                  "b.barracudacentral.org*3",
                  "sip.invaluement.invalid*3",
                  "jerks.viabit.com*3",
                  "bl.mailspike.net=127.0.0.[2;10;11]*2",
                  "bl.spamcop.net*2",
                  "psbl.surriel.com*2",
                  "bl.mailspike.net=127.0.0.12*2",
                  "bl.spameatingmonkey.net*2",
                  "db.wpbl.info*2",
                  "dnsbl.sorbs.net*1",
                  "dnsbl-1.uceprotect.net*1",
                  "hostkarma.junkemailfilter.com=127.0.0.2*1",
                  "ubl.unsubscore.com*1",
                  "dnsbl.zapbl.net*1" ]
                 ++ "\"]"