Begin to add some lookup functions (just experimenting) to Network.DNS.RBL.
[dead/harbl.git] / src / Network / DNS / RBL / Site.hs
1 -- | This module contains the 'Site' data type representing one
2 -- blacklist with its associated return codes and weight. For
3 -- example, in Postfix's main.cf you might have,
4 --
5 -- postscreen_dnsbl_sites = bl.mailspike.net=127.0.0.[2;10;11]*2, ...
6 --
7 -- Here, the blacklist (a 'UserDomain') is \"bl.mailspike.net\", the
8 -- return code pattern is \"127.0.0.[2;10;11]\", and the weight is
9 -- \"2".
10 --
11 module Network.DNS.RBL.Site (
12 Site(..),
13 Weight(..),
14 site_tests,
15 sites )
16 where
17
18 import Data.List ( intercalate )
19 import Test.Tasty ( TestTree, testGroup )
20 import Test.Tasty.HUnit ( (@?=), testCase )
21 import Text.Parsec (
22 (<|>),
23 char,
24 choice,
25 digit,
26 many1,
27 option,
28 optionMaybe,
29 parse,
30 sepBy1,
31 space,
32 try,
33 unexpected )
34 import Text.Parsec.String ( Parser )
35 import Text.Read ( readMaybe )
36
37 import Network.DNS.RBL.Domain ( UserDomain, user_domain )
38 import Network.DNS.RBL.IPv4Pattern ( IPv4Pattern, v4pattern )
39 import Network.DNS.RBL.Pretty ( Pretty(..) )
40
41
42 newtype Weight = Weight Int deriving (Eq, Show)
43
44 instance Pretty Weight where
45 pretty_show (Weight w) = show w
46
47
48 -- | Parse the weight multiplier at the end of a site.
49 --
50 -- ==== _Examples_
51 --
52 -- >>> import Text.Parsec ( parseTest )
53 --
54 -- Negative, zero, and positive integers are all supported:
55 --
56 -- >>> parseTest weight "*-5"
57 -- Weight (-5)
58 --
59 -- >>> parseTest weight "*0"
60 -- Weight 0
61 --
62 -- >>> parseTest weight "*17"
63 -- Weight 17
64 --
65 -- If the weight is empty, it defaults to @1@:
66 --
67 -- >>> parseTest weight ""
68 -- Weight 1
69 --
70 -- The default is used whenever parsing fails:
71 --
72 -- >>> parseTest weight "*hello"
73 -- Weight 1
74 --
75 -- The 'Pretty' instance works as intended:
76 --
77 -- >>> import Text.Parsec ( parse )
78 -- >>> pretty_print $ parse weight "" "*3"
79 -- 3
80 --
81 weight :: Parser Weight
82 weight = try parse_weight <|> return (Weight 1)
83 where
84 parse_weight = do
85 _ <- char '*'
86 sign <- (char '-') <|> (option '+' (char '+'))
87 w <- many1 digit
88 case ( readMaybe w :: Maybe Int ) of
89 -- If "many1 digit" gives us a list of digits, we should be able
90 -- to convert that to an Int! It will overflow rather than fail
91 -- if the input is too big/small, so it should really always
92 -- succeed.
93 Nothing -> unexpected "weight: readMaybe failed on a sequence of digits!"
94 Just k -> return $ Weight (if sign == '-' then negate k else k)
95
96
97
98 -- | A DNSBL as it would be input into postfix. It has a blacklist
99 -- (DNS) name, a pattern of addresses to use for a \"hit\", and a
100 -- weight multiplier.
101 --
102 data Site = Site UserDomain (Maybe IPv4Pattern) Weight
103
104
105 -- | Pretty print DNSBL sites. This is straightforward except for the
106 -- weight. We default to a weight of @1@, but this leaves us with a
107 -- choice. If the user leaves off the weight, do we want to
108 -- pretty-print it as @1@? How about if we explicitly writes the
109 -- \"*1\" multiplier?
110 --
111 -- The pretty-printing isn't user-facing, really, so it makes sense
112 -- to just choose one of these behaviors rather than pass around a
113 -- @Maybe Weight@. We always print the multiplier, even when it's @1@.
114 --
115 instance Pretty Site where
116 pretty_show (Site d p w) =
117 (pretty_show d) ++ pattern_string ++ "*" ++ (pretty_show w)
118 where
119 pattern_string = case p of
120 Nothing -> ""
121 Just pat -> "=" ++ pretty_show pat
122
123
124 -- | Parse a single 'Site'.
125 --
126 -- ==== _Examples_
127 --
128 -- >>> import Text.Parsec ( parse )
129 --
130 -- >>> let spamhaus = "zen.spamhaus.org*3"
131 -- >>> pretty_print $ parse site "" spamhaus
132 -- zen.spamhaus.org*3
133 --
134 -- >>> let mailspike = "bl.mailspike.net=127.0.0.[2;10;11]*2"
135 -- >>> pretty_print $ parse site "" mailspike
136 -- bl.mailspike.net=127.0.0.[2;10;11]*2
137 --
138 -- If the weight is left unspecified, it defaults to \"1\" which is
139 -- then printed:
140 --
141 -- >>> let hostkarma = "hostkarma.junkemailfilter.com=127.0.0.2"
142 -- >>> pretty_print $ parse site "" hostkarma
143 -- hostkarma.junkemailfilter.com=127.0.0.2*1
144 --
145 -- >>> let ubl = "ubl.unsubscore.com"
146 -- >>> pretty_print $ parse site "" ubl
147 -- ubl.unsubscore.com*1
148 --
149 site :: Parser Site
150 site = do
151 d <- user_domain
152 return_codes <- optionMaybe $ char '=' >> v4pattern
153 w <- weight
154 return $ Site d return_codes w
155
156
157 -- | Parse more than one 'Site', separated by commas and/or
158 -- whitespace.
159 --
160 -- ==== _Examples_
161 --
162 -- >>> import Text.Parsec ( parse )
163 --
164 -- Any combination of comma/spaces can be used as a separator:
165 --
166 -- >>> let spamhaus = "zen.spamhaus.org*3"
167 -- >>> let mailspike = "bl.mailspike.net=127.0.0.[2;10;11]*2"
168 -- >>> let bl_list = spamhaus ++ "," ++ mailspike
169 -- >>> pretty_print $ parse sites "" bl_list
170 -- ["zen.spamhaus.org*3","bl.mailspike.net=127.0.0.[2;10;11]*2"]
171 -- >>> let bl_list = spamhaus ++ " , " ++ mailspike
172 -- >>> pretty_print $ parse sites "" bl_list
173 -- ["zen.spamhaus.org*3","bl.mailspike.net=127.0.0.[2;10;11]*2"]
174 -- >>> let bl_list = spamhaus ++ " " ++ mailspike
175 -- >>> pretty_print $ parse sites "" bl_list
176 -- ["zen.spamhaus.org*3","bl.mailspike.net=127.0.0.[2;10;11]*2"]
177 --
178 -- Any whitespace, in fact, should work:
179 --
180 -- >>> let spamhaus = "zen.spamhaus.org*3"
181 -- >>> let mailspike = "bl.mailspike.net=127.0.0.[2;10;11]*2"
182 -- >>> let bl_list = spamhaus ++ "\n,\t \t\r" ++ mailspike
183 -- >>> pretty_print $ parse sites "" bl_list
184 -- ["zen.spamhaus.org*3","bl.mailspike.net=127.0.0.[2;10;11]*2"]
185 --
186 sites :: Parser [Site]
187 sites = site `sepBy1` many1 (choice [char ',', space])
188
189
190
191 -- * Tests
192
193 site_tests :: TestTree
194 site_tests =
195 testGroup
196 "Site tests"
197 [ test_full_maincf_sites_parsed ]
198
199
200 -- | This is a sample \"postscreen_dnsbl_sites\" from a real main.cf.
201 -- We should be able to parse it as a list of 'Site's.
202 --
203 test_full_maincf_sites_parsed :: TestTree
204 test_full_maincf_sites_parsed =
205 testCase "a full main.cf list of postscreen_dnsbl_sites is parsed" $ do
206 -- Whatever, it's a test.
207 let actual = pretty_show $ parse sites "" input
208 actual @?= expected
209 where
210 input = intercalate ",\n\t" [
211 "zen.spamhaus.org*3",
212 "b.barracudacentral.org*3",
213 "sip.invaluement.invalid*3",
214 "jerks.viabit.com*3",
215 "bl.mailspike.net=127.0.0.[2;10;11]*2",
216 "bl.spamcop.net*2",
217 "psbl.surriel.com*2",
218 "bl.mailspike.net=127.0.0.12*2",
219 "bl.spameatingmonkey.net*2",
220 "db.wpbl.info*2",
221 "dnsbl.sorbs.net",
222 "dnsbl-1.uceprotect.net",
223 "hostkarma.junkemailfilter.com=127.0.0.2",
224 "ubl.unsubscore.com",
225 "dnsbl.zapbl.net" ]
226
227 -- We expect the "one" multipliers to have been added, and the
228 -- quotation marks to be added...
229 expected = "[\"" ++
230 intercalate "\",\"" [
231 "zen.spamhaus.org*3",
232 "b.barracudacentral.org*3",
233 "sip.invaluement.invalid*3",
234 "jerks.viabit.com*3",
235 "bl.mailspike.net=127.0.0.[2;10;11]*2",
236 "bl.spamcop.net*2",
237 "psbl.surriel.com*2",
238 "bl.mailspike.net=127.0.0.12*2",
239 "bl.spameatingmonkey.net*2",
240 "db.wpbl.info*2",
241 "dnsbl.sorbs.net*1",
242 "dnsbl-1.uceprotect.net*1",
243 "hostkarma.junkemailfilter.com=127.0.0.2*1",
244 "ubl.unsubscore.com*1",
245 "dnsbl.zapbl.net*1" ]
246 ++ "\"]"