Separate the Network.DNS.RBL.Weight module and fix the doctests.
[dead/harbl.git] / harbl / src / Network / DNS / RBL / Site.hs
1 -- | This module contains the 'Site' data type representing one
2 -- blacklist with its associated return codes and weight. For
3 -- example, in Postfix's main.cf you might have,
4 --
5 -- postscreen_dnsbl_sites = bl.mailspike.net=127.0.0.[2;10;11]*2, ...
6 --
7 -- Here, the blacklist (a 'Host') is \"bl.mailspike.net\", the
8 -- return code pattern is \"127.0.0.[2;10;11]\", and the weight is
9 -- \"2".
10 --
11 module Network.DNS.RBL.Site (
12 Site(..),
13 site_tests,
14 sites )
15 where
16
17 import Data.List ( intercalate )
18 import Test.Tasty ( TestTree, testGroup )
19 import Test.Tasty.HUnit ( (@?=), testCase )
20 import Text.Parsec (
21 char,
22 choice,
23 many1,
24 optionMaybe,
25 parse,
26 sepBy1,
27 space )
28 import Text.Parsec.String ( Parser )
29
30 import Network.DNS.RBL.Host ( Host, host )
31 import Network.DNS.RBL.IPv4Pattern ( IPv4Pattern, v4pattern )
32 import Network.DNS.RBL.Weight ( Weight, weight )
33 import Network.DNS.RBL.Pretty ( Pretty(..) )
34
35
36 -- | A DNSBL as it would be input into postfix. It has a blacklist
37 -- (DNS) name, a pattern of addresses to use for a \"hit\", and a
38 -- weight multiplier.
39 --
40 data Site = Site Host (Maybe IPv4Pattern) Weight
41
42
43 -- | Pretty print DNSBL sites. This is straightforward except for the
44 -- weight. We default to a weight of @1@, but this leaves us with a
45 -- choice. If the user leaves off the weight, do we want to
46 -- pretty-print it as @1@? How about if we explicitly writes the
47 -- \"*1\" multiplier?
48 --
49 -- The pretty-printing isn't user-facing, really, so it makes sense
50 -- to just choose one of these behaviors rather than pass around a
51 -- @Maybe Weight@. We always print the multiplier, even when it's @1@.
52 --
53 instance Pretty Site where
54 pretty_show (Site d p w) =
55 (pretty_show d) ++ pattern_string ++ "*" ++ (pretty_show w)
56 where
57 pattern_string = case p of
58 Nothing -> ""
59 Just pat -> "=" ++ pretty_show pat
60
61
62 -- | Parse a single 'Site'.
63 --
64 -- ==== _Examples_
65 --
66 -- >>> import Text.Parsec ( parse )
67 --
68 -- >>> let spamhaus = "zen.spamhaus.org*3"
69 -- >>> pretty_print $ parse site "" spamhaus
70 -- zen.spamhaus.org*3
71 --
72 -- >>> let mailspike = "bl.mailspike.net=127.0.0.[2;10;11]*2"
73 -- >>> pretty_print $ parse site "" mailspike
74 -- bl.mailspike.net=127.0.0.[2;10;11]*2
75 --
76 -- If the weight is left unspecified, it defaults to \"1\" which is
77 -- then printed:
78 --
79 -- >>> let hostkarma = "hostkarma.junkemailfilter.com=127.0.0.2"
80 -- >>> pretty_print $ parse site "" hostkarma
81 -- hostkarma.junkemailfilter.com=127.0.0.2*1
82 --
83 -- >>> let ubl = "ubl.unsubscore.com"
84 -- >>> pretty_print $ parse site "" ubl
85 -- ubl.unsubscore.com*1
86 --
87 site :: Parser Site
88 site = do
89 d <- host
90 return_codes <- optionMaybe $ char '=' >> v4pattern
91 w <- weight
92 return $ Site d return_codes w
93
94
95 -- | Parse more than one 'Site', separated by commas and/or
96 -- whitespace.
97 --
98 -- ==== _Examples_
99 --
100 -- >>> import Text.Parsec ( parse )
101 --
102 -- Any combination of comma/spaces can be used as a separator:
103 --
104 -- >>> let spamhaus = "zen.spamhaus.org*3"
105 -- >>> let mailspike = "bl.mailspike.net=127.0.0.[2;10;11]*2"
106 -- >>> let bl_list = spamhaus ++ "," ++ mailspike
107 -- >>> pretty_print $ parse sites "" bl_list
108 -- ["zen.spamhaus.org*3","bl.mailspike.net=127.0.0.[2;10;11]*2"]
109 -- >>> let bl_list = spamhaus ++ " , " ++ mailspike
110 -- >>> pretty_print $ parse sites "" bl_list
111 -- ["zen.spamhaus.org*3","bl.mailspike.net=127.0.0.[2;10;11]*2"]
112 -- >>> let bl_list = spamhaus ++ " " ++ mailspike
113 -- >>> pretty_print $ parse sites "" bl_list
114 -- ["zen.spamhaus.org*3","bl.mailspike.net=127.0.0.[2;10;11]*2"]
115 --
116 -- Any whitespace, in fact, should work:
117 --
118 -- >>> let spamhaus = "zen.spamhaus.org*3"
119 -- >>> let mailspike = "bl.mailspike.net=127.0.0.[2;10;11]*2"
120 -- >>> let bl_list = spamhaus ++ "\n,\t \t\r" ++ mailspike
121 -- >>> pretty_print $ parse sites "" bl_list
122 -- ["zen.spamhaus.org*3","bl.mailspike.net=127.0.0.[2;10;11]*2"]
123 --
124 sites :: Parser [Site]
125 sites = site `sepBy1` many1 (choice [char ',', space])
126
127
128
129 -- * Tests
130
131 site_tests :: TestTree
132 site_tests =
133 testGroup
134 "Site tests"
135 [ test_full_maincf_sites_parsed ]
136
137
138 -- | This is a sample \"postscreen_dnsbl_sites\" from a real main.cf.
139 -- We should be able to parse it as a list of 'Site's.
140 --
141 test_full_maincf_sites_parsed :: TestTree
142 test_full_maincf_sites_parsed =
143 testCase "a full main.cf list of postscreen_dnsbl_sites is parsed" $ do
144 -- Whatever, it's a test.
145 let actual = pretty_show $ parse sites "" input
146 actual @?= expected
147 where
148 input = intercalate ",\n\t" [
149 "zen.spamhaus.org*3",
150 "b.barracudacentral.org*3",
151 "sip.invaluement.invalid*3",
152 "jerks.viabit.com*3",
153 "bl.mailspike.net=127.0.0.[2;10;11]*2",
154 "bl.spamcop.net*2",
155 "psbl.surriel.com*2",
156 "bl.mailspike.net=127.0.0.12*2",
157 "bl.spameatingmonkey.net*2",
158 "db.wpbl.info*2",
159 "dnsbl.sorbs.net",
160 "dnsbl-1.uceprotect.net",
161 "hostkarma.junkemailfilter.com=127.0.0.2",
162 "ubl.unsubscore.com",
163 "dnsbl.zapbl.net" ]
164
165 -- We expect the "one" multipliers to have been added, and the
166 -- quotation marks to be added...
167 expected = "[\"" ++
168 intercalate "\",\"" [
169 "zen.spamhaus.org*3",
170 "b.barracudacentral.org*3",
171 "sip.invaluement.invalid*3",
172 "jerks.viabit.com*3",
173 "bl.mailspike.net=127.0.0.[2;10;11]*2",
174 "bl.spamcop.net*2",
175 "psbl.surriel.com*2",
176 "bl.mailspike.net=127.0.0.12*2",
177 "bl.spameatingmonkey.net*2",
178 "db.wpbl.info*2",
179 "dnsbl.sorbs.net*1",
180 "dnsbl-1.uceprotect.net*1",
181 "hostkarma.junkemailfilter.com=127.0.0.2*1",
182 "ubl.unsubscore.com*1",
183 "dnsbl.zapbl.net*1" ]
184 ++ "\"]"