Remove a TODO that turned out to be more trouble than it's worth.
[dead/harbl.git] / src / Network / DNS / RBL / IPv4Pattern.hs
1 -- | An IPv4 address pattern has four fields separated by ".". Each
2 -- field is either a decimal number, or a sequence inside "[]" that
3 -- contains one or more ";"-separated decimal numbers or
4 -- number..number ranges.
5 --
6 -- Thus, any pattern field can be a sequence inside "[]", but a "[]"
7 -- sequence cannot span multiple address fields, and a pattern field
8 -- cannot contain both a number and a "[]" sequence at the same
9 -- time.
10 --
11 -- This means that the pattern 1.2.[3.4] is not valid (the sequence
12 -- [3.4] cannot span two address fields) and the pattern
13 -- 1.2.3.3[6..9] is also not valid (the last field cannot be both
14 -- number 3 and sequence [6..9] at the same time).
15 --
16 -- The syntax for IPv4 patterns is as follows:
17 --
18 -- v4pattern = v4field "." v4field "." v4field "." v4field
19 -- v4field = v4octet | "[" v4sequence "]"
20 -- v4octet = any decimal number in the range 0 through 255
21 -- v4sequence = v4seq_member | v4sequence ";" v4seq_member
22 -- v4seq_member = v4octet | v4octet ".." v4octet
23 --
24 module Network.DNS.RBL.IPv4Pattern (
25 IPv4Pattern,
26 addresses,
27 ipv4pattern_tests,
28 v4pattern)
29 where
30
31
32 import Test.Tasty ( TestTree, testGroup )
33 import Test.Tasty.HUnit ( (@?=), testCase )
34 import Text.Parsec (
35 (<|>),
36 char,
37 digit,
38 many1,
39 parse,
40 string,
41 try,
42 unexpected )
43 import Text.Parsec.String ( Parser )
44 import Text.Read ( readMaybe )
45
46 import Network.DNS.RBL.Pretty ( Pretty(..) )
47
48
49 -- * Octets
50
51 -- | An ipv4 octet; that is, an integer between @0@ and @255@
52 -- inclusive. This is the data type corresponding to a \"v4octet\"
53 -- in the postscreen parser.
54 --
55 newtype IPv4Octet = IPv4Octet Int
56 deriving (Eq, Show)
57
58
59 instance Pretty IPv4Octet where
60 pretty_show (IPv4Octet x) = show x
61
62
63 -- | Parse an IPv4 octet, which should contain a string of digits.
64 -- Should fail if the parsed integer does not lie between @0@ and
65 -- @255@ inclusive.
66 --
67 -- ==== _Examples_
68 --
69 -- >>> import Text.Parsec ( parseTest )
70 --
71 -- Standard octets are parsed correctly:
72 --
73 -- >>> parseTest v4octet "0"
74 -- IPv4Octet 0
75 --
76 -- >>> parseTest v4octet "127"
77 -- IPv4Octet 127
78 --
79 -- >>> parseTest v4octet "255"
80 -- IPv4Octet 255
81 --
82 -- Non-digit input throws an error:
83 --
84 -- >>> parseTest v4octet "Hello, World!"
85 -- parse error at (line 1, column 1):
86 -- unexpected "H"
87 -- expecting digit
88 --
89 -- If we're given an integer outside the range @0..255@ (i.e. not a
90 -- valid octet), we fail:
91 --
92 -- >>> parseTest v4octet "9000"
93 -- parse error at (line 1, column 5):
94 -- unexpected end of input
95 -- expecting digit
96 -- Octet "9000" must be between 0 and 255.
97 --
98 v4octet :: Parser IPv4Octet
99 v4octet = do
100 s <- many1 digit
101 case ( readMaybe s :: Maybe Int ) of
102 -- If "many1 digit" gives us a list of digits, we should be able
103 -- to convert that to an Int! It will overflow rather than fail
104 -- if the input is too big/small, so it should really always
105 -- succeed.
106 Nothing -> unexpected "v4octet: readMaybe failed on a sequence of digits!"
107
108 -- If we got an Int, make sure it's actually a representation of
109 -- an octet.
110 Just k -> if 0 <= k && k <= 255
111 then return (IPv4Octet k)
112 else fail ("Octet \"" ++ (show k)
113 ++ "\" must be between 0 and 255.")
114
115
116
117
118 -- * Sequence members
119
120
121 -- | An ipv4 \"sequence member\". A sequence member is either an
122 -- integer (an octet) or a range of integers (contained in an
123 -- octet). This data type corresponds to \"v4seq_member\" in the
124 -- postscreen parser.
125 --
126 data IPv4SequenceMember =
127 IPv4SequenceMemberOctet IPv4Octet
128 | IPv4SequenceMemberOctetRange IPv4Octet IPv4Octet
129 deriving (Eq, Show)
130
131
132 instance Pretty IPv4SequenceMember where
133 pretty_show (IPv4SequenceMemberOctet octet) = pretty_show octet
134 pretty_show (IPv4SequenceMemberOctetRange octet1 octet2) =
135 (pretty_show octet1) ++ ".." ++ (pretty_show octet2)
136
137
138 -- | Parse an IPv4 \"sequence member\". A sequence member is either an
139 -- octet, or a start..end sequence (like an enumeration, in Haskell).
140 --
141 -- ==== _Examples_
142 --
143 -- >>> import Text.Parsec ( parseTest )
144 --
145 -- >>> parseTest v4seq_member "127"
146 -- IPv4SequenceMemberOctet (IPv4Octet 127)
147 --
148 -- >>> parseTest v4seq_member "1..5"
149 -- IPv4SequenceMemberOctetRange (IPv4Octet 1) (IPv4Octet 5)
150 --
151 v4seq_member :: Parser IPv4SequenceMember
152 v4seq_member = try both <|> just_one
153 where
154 both = do
155 oct1 <- v4octet
156 _ <- string ".."
157 oct2 <- v4octet
158 return $ IPv4SequenceMemberOctetRange oct1 oct2
159
160 just_one = fmap IPv4SequenceMemberOctet v4octet
161
162
163
164 -- * Sequences
165
166 -- | An ipv4 \"sequence\". A sequence contains either a single
167 -- \"sequence member\" (see 'IPv4SequenceMember'), or a sequence
168 -- member along with another sequence. So, this is a potentially
169 -- recursive definition. This type corresponds to \"v4sequence\" in
170 -- the postscreen parser.
171 --
172 data IPv4Sequence =
173 IPv4SequenceSingleMember IPv4SequenceMember
174 | IPv4SequenceOptions IPv4SequenceMember IPv4Sequence
175 deriving (Eq, Show)
176
177
178 instance Pretty IPv4Sequence where
179 pretty_show (IPv4SequenceSingleMember member) = pretty_show member
180 pretty_show (IPv4SequenceOptions member subsequence) =
181 (pretty_show member) ++ ";" ++ (pretty_show subsequence)
182
183
184 -- | Parse an IPv4 \"sequence\". A sequence is whatever is allowed
185 -- within square brackets. Basically it can be three things:
186 --
187 -- * An octet (number).
188 -- * A range of addresses in start..end format.
189 -- * An alternative, separated by a semicolon, where each side
190 -- contains one of the previous two options.
191 --
192 -- ==== _Examples_
193 --
194 -- >>> import Text.Parsec ( parseTest )
195 -- >>> parseTest v4sequence "1"
196 -- IPv4SequenceSingleMember (IPv4SequenceMemberOctet (IPv4Octet 1))
197 --
198 -- >>> pretty_print $ parse v4sequence "" "1..2"
199 -- 1..2
200 --
201 -- >>> pretty_print $ parse v4sequence "" "1..2;8"
202 -- 1..2;8
203 --
204 v4sequence :: Parser IPv4Sequence
205 v4sequence = try both <|> just_one
206 where
207 both = do
208 sm <- v4seq_member
209 _ <- char ';'
210 s <- v4sequence
211 return $ IPv4SequenceOptions sm s
212
213 just_one = fmap IPv4SequenceSingleMember v4seq_member
214
215
216
217 -- * Fields
218
219 data IPv4Field = IPv4FieldOctet IPv4Octet | IPv4FieldSequence IPv4Sequence
220 deriving (Eq, Show)
221
222
223 instance Pretty IPv4Field where
224 pretty_show (IPv4FieldOctet octet) = pretty_show octet
225 pretty_show (IPv4FieldSequence s) = "[" ++ (pretty_show s) ++ "]"
226
227
228 -- | Parse an IPv4 \"field\", which is either a boring old octet, or a
229 -- 'v4sequence' within square brackets.
230 --
231 -- ==== _Examples_
232 --
233 -- >>> import Text.Parsec ( parseTest )
234 -- >>> parseTest v4field "127"
235 -- IPv4FieldOctet (IPv4Octet 127)
236 --
237 -- >>> pretty_print $ parse v4field "" "[127]"
238 -- [127]
239 --
240 v4field :: Parser IPv4Field
241 v4field = just_octet <|> brackets
242 where
243 just_octet = fmap IPv4FieldOctet v4octet
244
245 brackets = do
246 _ <- char '['
247 s <- v4sequence
248 _ <- char ']'
249 return $ IPv4FieldSequence s
250
251
252
253 -- * Patterns
254
255 data IPv4Pattern =
256 IPv4Pattern IPv4Field IPv4Field IPv4Field IPv4Field
257 deriving (Eq, Show)
258
259
260 instance Pretty IPv4Pattern where
261 pretty_show (IPv4Pattern f1 f2 f3 f4) =
262 (pretty_show f1) ++ "."
263 ++ (pretty_show f2)
264 ++ "."
265 ++ (pretty_show f3)
266 ++ "."
267 ++ (pretty_show f4)
268
269
270 -- | Parse an ipv4 address pattern. This consists of four fields,
271 -- separated by periods, where a field is either a simple octet or a
272 -- sequence.
273 --
274 -- See also: 'v4field', 'v4sequence'.
275 --
276 -- ==== _Examples_
277 --
278 -- >>> pretty_print $ parse v4pattern "" "127.0.0.1"
279 -- 127.0.0.1
280 --
281 -- >>> pretty_print $ parse v4pattern "" "127.0.[1..3].1"
282 -- 127.0.[1..3].1
283 --
284 -- >>> pretty_print $ parse v4pattern "" "127.0.[1..3;8].1"
285 -- 127.0.[1..3;8].1
286 --
287 -- In the module intro, it is mentioned that this is invalid:
288 --
289 -- >>> import Text.Parsec ( parseTest )
290 -- >>> parseTest v4pattern "1.2.[3.4]"
291 -- parse error at (line 1, column 7):
292 -- unexpected "."
293 -- expecting digit or "]"
294 --
295 -- This one is /also/ invalid; however, we'll parse the valid part off
296 -- the front of it:
297 --
298 -- >>> pretty_print $ parse v4pattern "" "1.2.3.3[6..9]"
299 -- 1.2.3.3
300 --
301 v4pattern :: Parser IPv4Pattern
302 v4pattern = do
303 field1 <- v4field
304 _ <- char '.'
305 field2 <- v4field
306 _ <- char '.'
307 field3 <- v4field
308 _ <- char '.'
309 field4 <- v4field
310 return $ IPv4Pattern field1 field2 field3 field4
311
312
313
314 -- * Enumeration
315
316 -- | Enumerate the members of an 'IPv4SequenceMember'. A sequence
317 -- member is either an octet, which is easy to enumerate -- we just
318 -- print it -- or an octet range whose members can be enumerated
319 -- from least to greatest.
320 --
321 -- We enumerate strings instead of integers because the big picture
322 -- is that we will be listing out patterns of ipv4 addresses, and
323 -- those are represented as strings (dotted quad format).
324 --
325 -- ==== _Examples_
326 --
327 -- >>> let (Right r) = parse v4seq_member "" "127"
328 -- >>> sequence_members r
329 -- ["127"]
330 --
331 -- >>> let (Right r) = parse v4seq_member "" "127..135"
332 -- >>> sequence_members r
333 -- ["127","128","129","130","131","132","133","134","135"]
334 --
335 sequence_members :: IPv4SequenceMember -> [String]
336 sequence_members (IPv4SequenceMemberOctet (IPv4Octet i)) = [show i]
337 sequence_members (IPv4SequenceMemberOctetRange (IPv4Octet s) (IPv4Octet t)) =
338 [show x | x <- [s .. t]]
339
340
341 -- | Enumerate the members of an ipv4 sequence. These consist of
342 -- either a single sequence member (in which case we delegate to
343 -- 'sequence_members'), or an \"option\" which is enumerated
344 -- recursively.
345 --
346 -- ==== _Examples_
347 --
348 -- >>> let (Right r) = parse v4sequence "" "1"
349 -- >>> sequences r
350 -- ["1"]
351 --
352 -- >>> let (Right r) = parse v4sequence "" "1..2"
353 -- >>> sequences r
354 -- ["1","2"]
355 --
356 -- >>> let (Right r) = parse v4sequence "" "1..3;4;5..9"
357 -- >>> sequences r
358 -- ["1","2","3","4","5","6","7","8","9"]
359 --
360 sequences :: IPv4Sequence -> [String]
361 sequences (IPv4SequenceSingleMember sm) =
362 sequence_members sm
363 sequences (IPv4SequenceOptions sm s) =
364 (sequence_members sm) ++ (sequences s)
365
366
367 -- | Enumerate the members of an 'IPv4Field'. If the field contains a
368 -- single 'IPv4Octet', we simply 'show' it. Otherwise it contains an
369 -- 'IPv4FieldSequence', and we enumerate that recursively using
370 -- 'sequences'.
371 --
372 -- ==== _Examples_
373 --
374 -- >>> let (Right r) = parse v4field "" "1"
375 -- >>> fields r
376 -- ["1"]
377 --
378 -- >>> let (Right r) = parse v4field "" "[127..135]"
379 -- >>> fields r
380 -- ["127","128","129","130","131","132","133","134","135"]
381 --
382 fields :: IPv4Field -> [String]
383 fields (IPv4FieldOctet (IPv4Octet i)) = [show i]
384 fields (IPv4FieldSequence s) = sequences s
385
386
387 -- | Enumerate the addresses represented by a given 'IPv4Pattern'.
388 --
389 -- A pattern contains four fields, sepearated by period
390 -- characters. We want to list all possible combinations of
391 -- addresses where the first octet comes from the first field, the
392 -- second octet comes from the second field... and so on. To do
393 -- this, we take advantage of the List monad and the fact that
394 -- 'fields' returns a list of 'String's.
395 --
396 -- ==== _Examples_
397 --
398 -- A single address:
399 --
400 -- >>> let (Right r) = parse v4pattern "" "127.0.0.1"
401 -- >>> addresses r
402 -- ["127.0.0.1"]
403 --
404 -- Anything between 127.0.0.2 and 127.0.0.4, and either 127.0.0.10
405 -- or 127.0.0.11:
406 --
407 -- >>> let (Right r) = parse v4pattern "" "127.0.0.[2..4;10;11]"
408 -- >>> addresses r
409 -- ["127.0.0.2","127.0.0.3","127.0.0.4","127.0.0.10","127.0.0.11"]
410 --
411 addresses :: IPv4Pattern -> [String]
412 addresses (IPv4Pattern field1 field2 field3 field4) = do
413 f1 <- fields field1
414 f2 <- fields field2
415 f3 <- fields field3
416 f4 <- fields field4
417 return $ f1 ++ "." ++ f2 ++ "." ++ f3 ++ "." ++ f4
418
419
420
421 -- * Tests
422
423 ipv4pattern_tests :: TestTree
424 ipv4pattern_tests =
425 testGroup "IPv4Pattern Tests" [ v4octet_tests ]
426
427
428 v4octet_tests :: TestTree
429 v4octet_tests =
430 testGroup
431 "v4octet tests"
432 [ test_v4octet_single_digit_parsed ]
433
434 test_v4octet_single_digit_parsed :: TestTree
435 test_v4octet_single_digit_parsed =
436 testCase "a single digit is parsed as a v4octet" $ do
437 -- Whatever, it's a test.
438 let (Right actual) = parse v4octet "" "1"
439 let expected = IPv4Octet 1
440 actual @?= expected