]> gitweb.michael.orlitzky.com - dead/harbl.git/blob - src/DnsblSite.hs
46a872ba4e545695cca2ee7a4558ffa76a38a4ac
[dead/harbl.git] / src / DnsblSite.hs
1 -- | This module contains the 'DnsblSite' data type representing one
2 -- blacklist with its associated return codes and weight. For example,
3 -- in Postfix's main.cf you might have,
4 --
5 -- postscreen_dnsbl_sites = bl.mailspike.net=127.0.0.[2;10;11]*2, ...
6 --
7 -- Here, the blacklist (a 'UserDomain') is \"bl.mailspike.net\", the
8 -- return code pattern is \"127.0.0.[2;10;11]\", and the weight is
9 -- \"2".
10 --
11 module DnsblSite (
12 dnsbl_site_tests,
13 dnsbl_sites )
14 where
15
16 import Data.List ( intercalate )
17 import Test.Tasty ( TestTree, testGroup )
18 import Test.Tasty.HUnit ( (@?=), testCase )
19 import Text.Parsec (
20 (<|>),
21 char,
22 choice,
23 digit,
24 many1,
25 option,
26 optionMaybe,
27 parse,
28 sepBy1,
29 space,
30 try,
31 unexpected )
32 import Text.Parsec.String ( Parser )
33 import Text.Read ( readMaybe )
34
35 import Domain ( UserDomain, user_domain )
36 import IPv4Pattern ( IPv4Pattern, v4pattern )
37 import Pretty ( Pretty(..) )
38
39
40 newtype Weight = Weight Int deriving (Eq, Show)
41
42 instance Pretty Weight where
43 pretty_show (Weight w) = show w
44
45
46 -- | Parse the weight multiplier at the end of a dnsbl_site.
47 --
48 -- ==== _Examples_
49 --
50 -- >>> import Text.Parsec ( parseTest )
51 --
52 -- Negative, zero, and positive integers are all supported:
53 --
54 -- >>> parseTest weight "*-5"
55 -- Weight (-5)
56 --
57 -- >>> parseTest weight "*0"
58 -- Weight 0
59 --
60 -- >>> parseTest weight "*17"
61 -- Weight 17
62 --
63 -- If the weight is empty, it defaults to @1@:
64 --
65 -- >>> parseTest weight ""
66 -- Weight 1
67 --
68 -- The default is used whenever parsing fails:
69 --
70 -- >>> parseTest weight "*hello"
71 -- Weight 1
72 --
73 -- The 'Pretty' instance works as intended:
74 --
75 -- >>> import Text.Parsec ( parse )
76 -- >>> pretty_print $ parse weight "" "*3"
77 -- 3
78 --
79 weight :: Parser Weight
80 weight = try parse_weight <|> return (Weight 1)
81 where
82 parse_weight = do
83 _ <- char '*'
84 sign <- (char '-') <|> (option '+' (char '+'))
85 w <- many1 digit
86 case ( readMaybe w :: Maybe Int ) of
87 -- If "many1 digit" gives us a list of digits, we should be able
88 -- to convert that to an Int! It will overflow rather than fail
89 -- if the input is too big/small, so it should really always
90 -- succeed.
91 Nothing -> unexpected "weight: readMaybe failed on a sequence of digits!"
92 Just k -> return $ Weight (if sign == '-' then negate k else k)
93
94
95
96 -- | A DNSBL as it would be input into postfix. It has a blacklist
97 -- (DNS) name, a pattern of addresses to use for a \"hit\", and a
98 -- weight multiplier.
99 --
100 data DnsblSite = DnsblSite UserDomain (Maybe IPv4Pattern) Weight
101
102
103 -- | Pretty print DNSBL sites. This is straightforward except for the
104 -- weight. We default to a weight of @1@, but this leaves us with a
105 -- choice. If the user leaves off the weight, do we want to
106 -- pretty-print it as @1@? How about if we explicitly writes the
107 -- \"*1\" multiplier?
108 --
109 -- The pretty-printing isn't user-facing, really, so it makes sense
110 -- to just choose one of these behaviors rather than pass around a
111 -- @Maybe Weight@. We always print the multiplier, even when it's @1@.
112 --
113 instance Pretty DnsblSite where
114 pretty_show (DnsblSite d p w) =
115 (pretty_show d) ++ pattern_string ++ "*" ++ (pretty_show w)
116 where
117 pattern_string = case p of
118 Nothing -> ""
119 Just pat -> "=" ++ pretty_show pat
120
121
122 -- | Parse a single 'DnsblSite'.
123 --
124 -- ==== _Examples_
125 --
126 -- >>> import Text.Parsec ( parse )
127 --
128 -- >>> let spamhaus = "zen.spamhaus.org*3"
129 -- >>> pretty_print $ parse dnsbl_site "" spamhaus
130 -- zen.spamhaus.org*3
131 --
132 -- >>> let mailspike = "bl.mailspike.net=127.0.0.[2;10;11]*2"
133 -- >>> pretty_print $ parse dnsbl_site "" mailspike
134 -- bl.mailspike.net=127.0.0.[2;10;11]*2
135 --
136 -- If the weight is left unspecified, it defaults to \"1\" which is
137 -- then printed:
138 --
139 -- >>> let hostkarma = "hostkarma.junkemailfilter.com=127.0.0.2"
140 -- >>> pretty_print $ parse dnsbl_site "" hostkarma
141 -- hostkarma.junkemailfilter.com=127.0.0.2*1
142 --
143 -- >>> let ubl = "ubl.unsubscore.com"
144 -- >>> pretty_print $ parse dnsbl_site "" ubl
145 -- ubl.unsubscore.com*1
146 --
147 dnsbl_site :: Parser DnsblSite
148 dnsbl_site = do
149 d <- user_domain
150 return_codes <- optionMaybe $ char '=' >> v4pattern
151 w <- weight
152 return $ DnsblSite d return_codes w
153
154
155 -- | Parse more than one 'DnsblSite', separated by commas and/or
156 -- whitespace.
157 --
158 -- ==== _Examples_
159 --
160 -- >>> import Text.Parsec ( parse )
161 --
162 -- Any combination of comma/spaces can be used as a separator:
163 --
164 -- >>> let spamhaus = "zen.spamhaus.org*3"
165 -- >>> let mailspike = "bl.mailspike.net=127.0.0.[2;10;11]*2"
166 -- >>> let bl_list = spamhaus ++ "," ++ mailspike
167 -- >>> pretty_print $ parse dnsbl_sites "" bl_list
168 -- ["zen.spamhaus.org*3","bl.mailspike.net=127.0.0.[2;10;11]*2"]
169 -- >>> let bl_list = spamhaus ++ " , " ++ mailspike
170 -- >>> pretty_print $ parse dnsbl_sites "" bl_list
171 -- ["zen.spamhaus.org*3","bl.mailspike.net=127.0.0.[2;10;11]*2"]
172 -- >>> let bl_list = spamhaus ++ " " ++ mailspike
173 -- >>> pretty_print $ parse dnsbl_sites "" bl_list
174 -- ["zen.spamhaus.org*3","bl.mailspike.net=127.0.0.[2;10;11]*2"]
175 --
176 -- Any whitespace, in fact, should work:
177 --
178 -- >>> let spamhaus = "zen.spamhaus.org*3"
179 -- >>> let mailspike = "bl.mailspike.net=127.0.0.[2;10;11]*2"
180 -- >>> let bl_list = spamhaus ++ "\n,\t \t\r" ++ mailspike
181 -- >>> pretty_print $ parse dnsbl_sites "" bl_list
182 -- ["zen.spamhaus.org*3","bl.mailspike.net=127.0.0.[2;10;11]*2"]
183 --
184 dnsbl_sites :: Parser [DnsblSite]
185 dnsbl_sites = dnsbl_site `sepBy1` many1 (choice [char ',', space])
186
187
188
189 -- * Tests
190
191 dnsbl_site_tests :: TestTree
192 dnsbl_site_tests =
193 testGroup
194 "DnsblSite tests"
195 [ test_full_maincf_sites_parsed ]
196
197
198 -- | This is a sample \"postscreen_dnsbl_sites\" from a real main.cf.
199 -- We should be able to parse it as a list of 'DnsblSite's.
200 --
201 test_full_maincf_sites_parsed :: TestTree
202 test_full_maincf_sites_parsed =
203 testCase "a full main.cf list of postscreen_dnsbl_sites is parsed" $ do
204 -- Whatever, it's a test.
205 let actual = pretty_show $ parse dnsbl_sites "" input
206 actual @?= expected
207 where
208 input = intercalate ",\n\t" [
209 "zen.spamhaus.org*3",
210 "b.barracudacentral.org*3",
211 "sip.invaluement.invalid*3",
212 "jerks.viabit.com*3",
213 "bl.mailspike.net=127.0.0.[2;10;11]*2",
214 "bl.spamcop.net*2",
215 "psbl.surriel.com*2",
216 "bl.mailspike.net=127.0.0.12*2",
217 "bl.spameatingmonkey.net*2",
218 "db.wpbl.info*2",
219 "dnsbl.sorbs.net",
220 "dnsbl-1.uceprotect.net",
221 "hostkarma.junkemailfilter.com=127.0.0.2",
222 "ubl.unsubscore.com",
223 "dnsbl.zapbl.net" ]
224
225 -- We expect the "one" multipliers to have been added, and the
226 -- quotation marks to be added...
227 expected = "[\"" ++
228 intercalate "\",\"" [
229 "zen.spamhaus.org*3",
230 "b.barracudacentral.org*3",
231 "sip.invaluement.invalid*3",
232 "jerks.viabit.com*3",
233 "bl.mailspike.net=127.0.0.[2;10;11]*2",
234 "bl.spamcop.net*2",
235 "psbl.surriel.com*2",
236 "bl.mailspike.net=127.0.0.12*2",
237 "bl.spameatingmonkey.net*2",
238 "db.wpbl.info*2",
239 "dnsbl.sorbs.net*1",
240 "dnsbl-1.uceprotect.net*1",
241 "hostkarma.junkemailfilter.com=127.0.0.2*1",
242 "ubl.unsubscore.com*1",
243 "dnsbl.zapbl.net*1" ]
244 ++ "\"]"