]> gitweb.michael.orlitzky.com - dead/harbl.git/blob - src/IPv4Pattern.hs
Switch license to AGPL-3.
[dead/harbl.git] / src / IPv4Pattern.hs
1 {-# LANGUAGE FlexibleInstances #-}
2
3 -- | An IPv4 address pattern has four fields separated by ".". Each
4 -- field is either a decimal number, or a sequence inside "[]" that
5 -- contains one or more ";"-separated decimal numbers or
6 -- number..number ranges.
7 --
8 -- Thus, any pattern field can be a sequence inside "[]", but a "[]"
9 -- sequence cannot span multiple address fields, and a pattern field
10 -- cannot contain both a number and a "[]" sequence at the same
11 -- time.
12 --
13 -- This means that the pattern 1.2.[3.4] is not valid (the sequence
14 -- [3.4] cannot span two address fields) and the pattern
15 -- 1.2.3.3[6..9] is also not valid (the last field cannot be both
16 -- number 3 and sequence [6..9] at the same time).
17 --
18 -- The syntax for IPv4 patterns is as follows:
19 --
20 -- v4pattern = v4field "." v4field "." v4field "." v4field
21 -- v4field = v4octet | "[" v4sequence "]"
22 -- v4octet = any decimal number in the range 0 through 255
23 -- v4sequence = v4seq_member | v4sequence ";" v4seq_member
24 -- v4seq_member = v4octet | v4octet ".." v4octet
25 --
26 module IPv4Pattern
27 where
28
29
30 import Test.Tasty ( TestTree, testGroup )
31 import Test.Tasty.HUnit ( (@?=), testCase )
32 import Text.Parsec (
33 ParseError,
34 (<|>),
35 char,
36 digit,
37 many1,
38 parse,
39 string,
40 try,
41 unexpected )
42 import Text.Parsec.String ( Parser )
43 import Text.Read ( readMaybe )
44
45
46 class Pretty a where
47 -- | Obtain a pretty 'String' representation of the given thingy.
48 prettyshow :: a -> String
49
50 -- | Pretty-print the given thingy.
51 pp :: a -> IO ()
52 pp = putStrLn . prettyshow
53
54
55 -- | Define a 'Pretty' instance for the result of 'parse'. This lets
56 -- us pretty-print the result of a parse attempt without worrying
57 -- about whether or not it failed. If the parse failed, you get the
58 -- same output that you usually would. Otherwise we pretty-print the
59 -- parsed value.
60 --
61 instance Pretty a => Pretty (Either ParseError a) where
62 prettyshow (Left err) = show err
63 prettyshow (Right v) = prettyshow v
64
65
66 -- * Octets
67
68 -- | An ipv4 octet; that is, an integer between @0@ and @255@
69 -- inclusive. This is the data type corresponding to a \"v4octet\"
70 -- in the postscreen parser.
71 --
72 newtype IPv4Octet = IPv4Octet Int
73 deriving (Eq, Show)
74
75
76 instance Pretty IPv4Octet where
77 prettyshow (IPv4Octet x) = show x
78
79
80 -- | Parse an IPv4 octet, which should contain a string of digits.
81 -- Should fail if the parsed integer does not lie between @0@ and
82 -- @255@ inclusive.
83 --
84 -- ==== _Examples_
85 --
86 -- >>> import Text.Parsec ( parseTest )
87 --
88 -- Standard octets are parsed correctly:
89 --
90 -- >>> parseTest v4octet "0"
91 -- IPv4Octet 0
92 --
93 -- >>> parseTest v4octet "127"
94 -- IPv4Octet 127
95 --
96 -- >>> parseTest v4octet "255"
97 -- IPv4Octet 255
98 --
99 -- Non-digit input throws an error:
100 --
101 -- >>> parseTest v4octet "Hello, World!"
102 -- parse error at (line 1, column 1):
103 -- unexpected "H"
104 -- expecting digit
105 --
106 -- If we're given an integer outside the range @0..255@ (i.e. not a
107 -- valid octet), we fail:
108 --
109 -- >>> parseTest v4octet "9000"
110 -- parse error at (line 1, column 5):
111 -- unexpected end of input
112 -- expecting digit
113 -- Octet "9000" must be between 0 and 255.
114 --
115 v4octet :: Parser IPv4Octet
116 v4octet = do
117 s <- many1 digit
118 case ( readMaybe s :: Maybe Int ) of
119 -- If "many1 digit" gives us a list of digits, we should be able
120 -- to convert that to an Int! It will overflow rather than fail
121 -- if the input is too big/small, so it should really always
122 -- succeed.
123 Nothing -> unexpected "v4octet: readMaybe failed on a sequence of digits!"
124
125 -- If we got an Int, make sure it's actually a representation of
126 -- an octet.
127 Just k -> if 0 <= k && k <= 255
128 then return (IPv4Octet k)
129 else fail ("Octet \"" ++ (show k)
130 ++ "\" must be between 0 and 255.")
131
132
133
134
135 -- * Sequence members
136
137
138 -- | An ipv4 \"sequence member\". A sequence member is either an
139 -- integer (an octet) or a range of integers (contained in an
140 -- octet). This data type corresponds to \"v4seq_member\" in the
141 -- postscreen parser.
142 --
143 data IPv4SequenceMember =
144 IPv4SequenceMemberOctet IPv4Octet
145 | IPv4SequenceMemberOctetRange IPv4Octet IPv4Octet
146 deriving (Eq, Show)
147
148
149 instance Pretty IPv4SequenceMember where
150 prettyshow (IPv4SequenceMemberOctet octet) = prettyshow octet
151 prettyshow (IPv4SequenceMemberOctetRange octet1 octet2) =
152 (prettyshow octet1) ++ ".." ++ (prettyshow octet2)
153
154
155 -- | Parse an IPv4 \"sequence member\". A sequence member is either an
156 -- octet, or a start..end sequence (like an enumeration, in Haskell).
157 --
158 -- ==== _Examples_
159 --
160 -- >>> import Text.Parsec ( parseTest )
161 --
162 -- >>> parseTest v4seq_member "127"
163 -- IPv4SequenceMemberOctet (IPv4Octet 127)
164 --
165 -- >>> parseTest v4seq_member "1..5"
166 -- IPv4SequenceMemberOctetRange (IPv4Octet 1) (IPv4Octet 5)
167 --
168 v4seq_member :: Parser IPv4SequenceMember
169 v4seq_member = try both <|> just_one
170 where
171 both = do
172 oct1 <- v4octet
173 _ <- string ".."
174 oct2 <- v4octet
175 return $ IPv4SequenceMemberOctetRange oct1 oct2
176
177 just_one = fmap IPv4SequenceMemberOctet v4octet
178
179
180
181 -- * Sequences
182
183 -- | An ipv4 \"sequence\". A sequence contains either a single
184 -- \"sequence member\" (see 'IPv4SequenceMember'), or a sequence
185 -- member along with another sequence. So, this is a potentially
186 -- recursive definition. This type corresponds to \"v4sequence\" in
187 -- the postscreen parser.
188 --
189 data IPv4Sequence =
190 IPv4SequenceSingleMember IPv4SequenceMember
191 | IPv4SequenceOptions IPv4SequenceMember IPv4Sequence
192 deriving (Eq, Show)
193
194
195 instance Pretty IPv4Sequence where
196 prettyshow (IPv4SequenceSingleMember member) = prettyshow member
197 prettyshow (IPv4SequenceOptions member subsequence) =
198 (prettyshow member) ++ ";" ++ (prettyshow subsequence)
199
200
201 -- | Parse an IPv4 \"sequence\". A sequence is whatever is allowed
202 -- within square brackets. Basically it can be three things:
203 --
204 -- * An octet (number).
205 -- * A range of addresses in start..end format.
206 -- * An alternative, separated by a semicolon, where each side
207 -- contains one of the previous two options.
208 --
209 -- ==== _Examples_
210 --
211 -- >>> import Text.Parsec ( parseTest )
212 -- >>> parseTest v4sequence "1"
213 -- IPv4SequenceSingleMember (IPv4SequenceMemberOctet (IPv4Octet 1))
214 --
215 -- >>> pp $ parse v4sequence "" "1..2"
216 -- 1..2
217 --
218 -- >>> pp $ parse v4sequence "" "1..2;8"
219 -- 1..2;8
220 --
221 v4sequence :: Parser IPv4Sequence
222 v4sequence = try both <|> just_one -- Maybe sepBy is appropriate here?
223 where
224 both = do
225 sm <- v4seq_member
226 _ <- char ';'
227 s <- v4sequence
228 return $ IPv4SequenceOptions sm s
229
230 just_one = fmap IPv4SequenceSingleMember v4seq_member
231
232
233
234 -- * Fields
235
236 data IPv4Field = IPv4FieldOctet IPv4Octet | IPv4FieldSequence IPv4Sequence
237 deriving (Eq, Show)
238
239
240 instance Pretty IPv4Field where
241 prettyshow (IPv4FieldOctet octet) = prettyshow octet
242 prettyshow (IPv4FieldSequence s) = "[" ++ (prettyshow s) ++ "]"
243
244
245 -- | Parse an IPv4 \"field\", which is either a boring old octet, or a
246 -- 'v4sequence' within square brackets.
247 --
248 -- ==== _Examples_
249 --
250 -- >>> import Text.Parsec ( parseTest )
251 -- >>> parseTest v4field "127"
252 -- IPv4FieldOctet (IPv4Octet 127)
253 --
254 -- >>> pp $ parse v4field "" "[127]"
255 -- [127]
256 --
257 v4field :: Parser IPv4Field
258 v4field = just_octet <|> brackets
259 where
260 just_octet = fmap IPv4FieldOctet v4octet
261
262 brackets = do
263 _ <- char '['
264 s <- v4sequence
265 _ <- char ']'
266 return $ IPv4FieldSequence s
267
268
269
270 -- * Patterns
271
272 data IPv4Pattern =
273 IPv4Pattern IPv4Field IPv4Field IPv4Field IPv4Field
274 deriving (Eq, Show)
275
276
277 instance Pretty IPv4Pattern where
278 prettyshow (IPv4Pattern f1 f2 f3 f4) =
279 (prettyshow f1) ++ "."
280 ++ (prettyshow f2)
281 ++ "."
282 ++ (prettyshow f3)
283 ++ "."
284 ++ (prettyshow f4)
285
286
287 -- | Parse an ipv4 address pattern. This consists of four fields,
288 -- separated by periods, where a field is either a simple octet or a
289 -- sequence.
290 --
291 -- See also: 'v4field', 'v4sequence'.
292 --
293 -- ==== _Examples_
294 --
295 -- >>> pp $ parse v4pattern "" "127.0.0.1"
296 -- 127.0.0.1
297 --
298 -- >>> pp $ parse v4pattern "" "127.0.[1..3].1"
299 -- 127.0.[1..3].1
300 --
301 -- >>> pp $ parse v4pattern "" "127.0.[1..3;8].1"
302 -- 127.0.[1..3;8].1
303 --
304 -- In the module intro, it is mentioned that this is invalid:
305 --
306 -- >>> import Text.Parsec ( parseTest )
307 -- >>> parseTest v4pattern "1.2.[3.4]"
308 -- parse error at (line 1, column 7):
309 -- unexpected "."
310 -- expecting digit or "]"
311 --
312 -- This one is /also/ invalid; however, we'll parse the valid part off
313 -- the front of it:
314 --
315 -- >>> pp $ parse v4pattern "" "1.2.3.3[6..9]"
316 -- 1.2.3.3
317 --
318 v4pattern :: Parser IPv4Pattern
319 v4pattern = do
320 field1 <- v4field
321 _ <- char '.'
322 field2 <- v4field
323 _ <- char '.'
324 field3 <- v4field
325 _ <- char '.'
326 field4 <- v4field
327 return $ IPv4Pattern field1 field2 field3 field4
328
329
330
331 -- * Enumeration
332
333 -- | Enumerate the members of an 'IPv4SequenceMember'. A sequence
334 -- member is either an octet, which is easy to enumerate -- we just
335 -- print it -- or an octet range whose members can be enumerated
336 -- from least to greatest.
337 --
338 -- We enumerate strings instead of integers because the big picture
339 -- is that we will be listing out patterns of ipv4 addresses, and
340 -- those are represented as strings (dotted quad format).
341 --
342 -- ==== _Examples_
343 --
344 -- >>> let (Right r) = parse v4seq_member "" "127"
345 -- >>> sequence_members r
346 -- ["127"]
347 --
348 -- >>> let (Right r) = parse v4seq_member "" "127..135"
349 -- >>> sequence_members r
350 -- ["127","128","129","130","131","132","133","134","135"]
351 --
352 sequence_members :: IPv4SequenceMember -> [String]
353 sequence_members (IPv4SequenceMemberOctet (IPv4Octet i)) = [show i]
354 sequence_members (IPv4SequenceMemberOctetRange (IPv4Octet s) (IPv4Octet t)) =
355 [show x | x <- [s .. t]]
356
357
358 -- | Enumerate the members of an ipv4 sequence. These consist of
359 -- either a single sequence member (in which case we delegate to
360 -- 'sequence_members'), or an \"option\" which is enumerated
361 -- recursively.
362 --
363 -- ==== _Examples_
364 --
365 -- >>> let (Right r) = parse v4sequence "" "1"
366 -- >>> sequences r
367 -- ["1"]
368 --
369 -- >>> let (Right r) = parse v4sequence "" "1..2"
370 -- >>> sequences r
371 -- ["1","2"]
372 --
373 -- >>> let (Right r) = parse v4sequence "" "1..3;4;5..9"
374 -- >>> sequences r
375 -- ["1","2","3","4","5","6","7","8","9"]
376 --
377 sequences :: IPv4Sequence -> [String]
378 sequences (IPv4SequenceSingleMember sm) =
379 sequence_members sm
380 sequences (IPv4SequenceOptions sm s) =
381 (sequence_members sm) ++ (sequences s)
382
383
384 -- | Enumerate the members of an 'IPv4Field'. If the field contains a
385 -- single 'IPv4Octet', we simply 'show' it. Otherwise it contains an
386 -- 'IPv4FieldSequence', and we enumerate that recursively using
387 -- 'sequences'.
388 --
389 -- ==== _Examples_
390 --
391 -- >>> let (Right r) = parse v4field "" "1"
392 -- >>> fields r
393 -- ["1"]
394 --
395 -- >>> let (Right r) = parse v4field "" "[127..135]"
396 -- >>> fields r
397 -- ["127","128","129","130","131","132","133","134","135"]
398 --
399 fields :: IPv4Field -> [String]
400 fields (IPv4FieldOctet (IPv4Octet i)) = [show i]
401 fields (IPv4FieldSequence s) = sequences s
402
403
404 -- | Enumerate the addresses represented by a given 'IPv4Pattern'.
405 --
406 -- A pattern contains four fields, sepearated by period
407 -- characters. We want to list all possible combinations of
408 -- addresses where the first octet comes from the first field, the
409 -- second octet comes from the second field... and so on. To do
410 -- this, we take advantage of the List monad and the fact that
411 -- 'fields' returns a list of 'String's.
412 --
413 -- ==== _Examples_
414 --
415 -- A single address:
416 --
417 -- >>> let (Right r) = parse v4pattern "" "127.0.0.1"
418 -- >>> addresses r
419 -- ["127.0.0.1"]
420 --
421 -- Anything between 127.0.0.2 and 127.0.0.4, and either 127.0.0.10
422 -- or 127.0.0.11:
423 --
424 -- >>> let (Right r) = parse v4pattern "" "127.0.0.[2..4;10;11]"
425 -- >>> addresses r
426 -- ["127.0.0.2","127.0.0.3","127.0.0.4","127.0.0.10","127.0.0.11"]
427 --
428 addresses :: IPv4Pattern -> [String]
429 addresses (IPv4Pattern field1 field2 field3 field4) = do
430 f1 <- fields field1
431 f2 <- fields field2
432 f3 <- fields field3
433 f4 <- fields field4
434 return $ f1 ++ "." ++ f2 ++ "." ++ f3 ++ "." ++ f4
435
436
437
438 -- * Tests
439
440 v4octet_tests :: TestTree
441 v4octet_tests =
442 testGroup
443 "v4octet tests"
444 [ test_v4octet_single_digit_parsed ]
445
446 test_v4octet_single_digit_parsed :: TestTree
447 test_v4octet_single_digit_parsed =
448 testCase "a single digit is parsed as a v4octet" $ do
449 -- Whatever, it's a test.
450 let (Right actual) = parse v4octet "" "1"
451 let expected = IPv4Octet 1
452 actual @?= expected