ef0df315d678a58782f343df2f9d9abe8a58bcef
[dead/harbl.git] / harbl / src / Network / DNS / RBL / Site.hs
1 -- | This module contains the 'Site' data type representing one
2 -- blacklist with its associated return codes and weight. For
3 -- example, in Postfix's main.cf you might have,
4 --
5 -- postscreen_dnsbl_sites = bl.mailspike.net=127.0.0.[2;10;11]*2, ...
6 --
7 -- Here, the blacklist (a 'Host') is \"bl.mailspike.net\", the
8 -- return code pattern is \"127.0.0.[2;10;11]\", and the weight is
9 -- \"2".
10 --
11 module Network.DNS.RBL.Site (
12 Site(..),
13 site_tests,
14 sites )
15 where
16
17 import Data.List ( intercalate )
18 import Test.Tasty ( TestTree, testGroup )
19 import Test.Tasty.HUnit ( (@?=), testCase )
20 import Text.Parsec (
21 (<|>),
22 char,
23 choice,
24 digit,
25 many1,
26 option,
27 optionMaybe,
28 parse,
29 sepBy1,
30 space,
31 try,
32 unexpected )
33 import Text.Parsec.String ( Parser )
34 import Text.Read ( readMaybe )
35
36 import Network.DNS.RBL.Host ( Host, host )
37 import Network.DNS.RBL.IPv4Pattern ( IPv4Pattern, v4pattern )
38 import Network.DNS.RBL.Pretty ( Pretty(..) )
39
40
41 newtype Weight = Weight Int deriving (Eq, Show)
42
43 instance Pretty Weight where
44 pretty_show (Weight w) = show w
45
46
47 -- | Parse the weight multiplier at the end of a site.
48 --
49 -- ==== _Examples_
50 --
51 -- >>> import Text.Parsec ( parseTest )
52 --
53 -- Negative, zero, and positive integers are all supported:
54 --
55 -- >>> parseTest weight "*-5"
56 -- Weight (-5)
57 --
58 -- >>> parseTest weight "*0"
59 -- Weight 0
60 --
61 -- >>> parseTest weight "*17"
62 -- Weight 17
63 --
64 -- If the weight is empty, it defaults to @1@:
65 --
66 -- >>> parseTest weight ""
67 -- Weight 1
68 --
69 -- The default is used whenever parsing fails:
70 --
71 -- >>> parseTest weight "*hello"
72 -- Weight 1
73 --
74 -- The 'Pretty' instance works as intended:
75 --
76 -- >>> import Text.Parsec ( parse )
77 -- >>> pretty_print $ parse weight "" "*3"
78 -- 3
79 --
80 weight :: Parser Weight
81 weight = try parse_weight <|> return (Weight 1)
82 where
83 parse_weight = do
84 _ <- char '*'
85 sign <- (char '-') <|> (option '+' (char '+'))
86 w <- many1 digit
87 case ( readMaybe w :: Maybe Int ) of
88 -- If "many1 digit" gives us a list of digits, we should be able
89 -- to convert that to an Int! It will overflow rather than fail
90 -- if the input is too big/small, so it should really always
91 -- succeed.
92 Nothing -> unexpected "weight: readMaybe failed on a sequence of digits!"
93 Just k -> return $ Weight (if sign == '-' then negate k else k)
94
95
96
97 -- | A DNSBL as it would be input into postfix. It has a blacklist
98 -- (DNS) name, a pattern of addresses to use for a \"hit\", and a
99 -- weight multiplier.
100 --
101 data Site = Site Host (Maybe IPv4Pattern) Weight
102
103
104 -- | Pretty print DNSBL sites. This is straightforward except for the
105 -- weight. We default to a weight of @1@, but this leaves us with a
106 -- choice. If the user leaves off the weight, do we want to
107 -- pretty-print it as @1@? How about if we explicitly writes the
108 -- \"*1\" multiplier?
109 --
110 -- The pretty-printing isn't user-facing, really, so it makes sense
111 -- to just choose one of these behaviors rather than pass around a
112 -- @Maybe Weight@. We always print the multiplier, even when it's @1@.
113 --
114 instance Pretty Site where
115 pretty_show (Site d p w) =
116 (pretty_show d) ++ pattern_string ++ "*" ++ (pretty_show w)
117 where
118 pattern_string = case p of
119 Nothing -> ""
120 Just pat -> "=" ++ pretty_show pat
121
122
123 -- | Parse a single 'Site'.
124 --
125 -- ==== _Examples_
126 --
127 -- >>> import Text.Parsec ( parse )
128 --
129 -- >>> let spamhaus = "zen.spamhaus.org*3"
130 -- >>> pretty_print $ parse site "" spamhaus
131 -- zen.spamhaus.org*3
132 --
133 -- >>> let mailspike = "bl.mailspike.net=127.0.0.[2;10;11]*2"
134 -- >>> pretty_print $ parse site "" mailspike
135 -- bl.mailspike.net=127.0.0.[2;10;11]*2
136 --
137 -- If the weight is left unspecified, it defaults to \"1\" which is
138 -- then printed:
139 --
140 -- >>> let hostkarma = "hostkarma.junkemailfilter.com=127.0.0.2"
141 -- >>> pretty_print $ parse site "" hostkarma
142 -- hostkarma.junkemailfilter.com=127.0.0.2*1
143 --
144 -- >>> let ubl = "ubl.unsubscore.com"
145 -- >>> pretty_print $ parse site "" ubl
146 -- ubl.unsubscore.com*1
147 --
148 site :: Parser Site
149 site = do
150 d <- host
151 return_codes <- optionMaybe $ char '=' >> v4pattern
152 w <- weight
153 return $ Site d return_codes w
154
155
156 -- | Parse more than one 'Site', separated by commas and/or
157 -- whitespace.
158 --
159 -- ==== _Examples_
160 --
161 -- >>> import Text.Parsec ( parse )
162 --
163 -- Any combination of comma/spaces can be used as a separator:
164 --
165 -- >>> let spamhaus = "zen.spamhaus.org*3"
166 -- >>> let mailspike = "bl.mailspike.net=127.0.0.[2;10;11]*2"
167 -- >>> let bl_list = spamhaus ++ "," ++ mailspike
168 -- >>> pretty_print $ parse sites "" bl_list
169 -- ["zen.spamhaus.org*3","bl.mailspike.net=127.0.0.[2;10;11]*2"]
170 -- >>> let bl_list = spamhaus ++ " , " ++ mailspike
171 -- >>> pretty_print $ parse sites "" bl_list
172 -- ["zen.spamhaus.org*3","bl.mailspike.net=127.0.0.[2;10;11]*2"]
173 -- >>> let bl_list = spamhaus ++ " " ++ mailspike
174 -- >>> pretty_print $ parse sites "" bl_list
175 -- ["zen.spamhaus.org*3","bl.mailspike.net=127.0.0.[2;10;11]*2"]
176 --
177 -- Any whitespace, in fact, should work:
178 --
179 -- >>> let spamhaus = "zen.spamhaus.org*3"
180 -- >>> let mailspike = "bl.mailspike.net=127.0.0.[2;10;11]*2"
181 -- >>> let bl_list = spamhaus ++ "\n,\t \t\r" ++ mailspike
182 -- >>> pretty_print $ parse sites "" bl_list
183 -- ["zen.spamhaus.org*3","bl.mailspike.net=127.0.0.[2;10;11]*2"]
184 --
185 sites :: Parser [Site]
186 sites = site `sepBy1` many1 (choice [char ',', space])
187
188
189
190 -- * Tests
191
192 site_tests :: TestTree
193 site_tests =
194 testGroup
195 "Site tests"
196 [ test_full_maincf_sites_parsed ]
197
198
199 -- | This is a sample \"postscreen_dnsbl_sites\" from a real main.cf.
200 -- We should be able to parse it as a list of 'Site's.
201 --
202 test_full_maincf_sites_parsed :: TestTree
203 test_full_maincf_sites_parsed =
204 testCase "a full main.cf list of postscreen_dnsbl_sites is parsed" $ do
205 -- Whatever, it's a test.
206 let actual = pretty_show $ parse sites "" input
207 actual @?= expected
208 where
209 input = intercalate ",\n\t" [
210 "zen.spamhaus.org*3",
211 "b.barracudacentral.org*3",
212 "sip.invaluement.invalid*3",
213 "jerks.viabit.com*3",
214 "bl.mailspike.net=127.0.0.[2;10;11]*2",
215 "bl.spamcop.net*2",
216 "psbl.surriel.com*2",
217 "bl.mailspike.net=127.0.0.12*2",
218 "bl.spameatingmonkey.net*2",
219 "db.wpbl.info*2",
220 "dnsbl.sorbs.net",
221 "dnsbl-1.uceprotect.net",
222 "hostkarma.junkemailfilter.com=127.0.0.2",
223 "ubl.unsubscore.com",
224 "dnsbl.zapbl.net" ]
225
226 -- We expect the "one" multipliers to have been added, and the
227 -- quotation marks to be added...
228 expected = "[\"" ++
229 intercalate "\",\"" [
230 "zen.spamhaus.org*3",
231 "b.barracudacentral.org*3",
232 "sip.invaluement.invalid*3",
233 "jerks.viabit.com*3",
234 "bl.mailspike.net=127.0.0.[2;10;11]*2",
235 "bl.spamcop.net*2",
236 "psbl.surriel.com*2",
237 "bl.mailspike.net=127.0.0.12*2",
238 "bl.spameatingmonkey.net*2",
239 "db.wpbl.info*2",
240 "dnsbl.sorbs.net*1",
241 "dnsbl-1.uceprotect.net*1",
242 "hostkarma.junkemailfilter.com=127.0.0.2*1",
243 "ubl.unsubscore.com*1",
244 "dnsbl.zapbl.net*1" ]
245 ++ "\"]"