Only parse valid octets (0 - 255).
[dead/harbl.git] / src / IPv4Pattern.hs
1 -- | An IPv4 address pattern has four fields separated by ".". Each
2 -- field is either a decimal number, or a sequence inside "[]" that
3 -- contains one or more ";"-separated decimal numbers or
4 -- number..number ranges.
5 --
6 -- Thus, any pattern field can be a sequence inside "[]", but a "[]"
7 -- sequence cannot span multiple address fields, and a pattern field
8 -- cannot contain both a number and a "[]" sequence at the same
9 -- time.
10 --
11 -- This means that the pattern 1.2.[3.4] is not valid (the sequence
12 -- [3.4] cannot span two address fields) and the pattern
13 -- 1.2.3.3[6..9] is also not valid (the last field cannot be both
14 -- number 3 and sequence [6..9] at the same time).
15 --
16 -- The syntax for IPv4 patterns is as follows:
17 --
18 -- v4pattern = v4field "." v4field "." v4field "." v4field
19 -- v4field = v4octet | "[" v4sequence "]"
20 -- v4octet = any decimal number in the range 0 through 255
21 -- v4sequence = v4seq_member | v4sequence ";" v4seq_member
22 -- v4seq_member = v4octet | v4octet ".." v4octet
23 --
24 module IPv4Pattern
25 where
26
27 import Test.Tasty ( TestTree, testGroup )
28 import Test.Tasty.HUnit ( (@?=), testCase )
29 import Text.Parsec
30 import Text.Parsec.String ( Parser )
31 import Text.Read ( readMaybe )
32
33
34 newtype IPv4Octet = IPv4Octet Int
35 deriving (Eq, Show)
36
37 data IPv4SequenceMember =
38 IPv4SequenceMemberOctet IPv4Octet
39 | IPv4SequenceMemberOctetRange IPv4Octet IPv4Octet
40 deriving (Eq, Show)
41
42 data IPv4Sequence =
43 IPv4SequenceSingleMember IPv4SequenceMember
44 | IPv4SequenceOptions IPv4SequenceMember IPv4Sequence
45 deriving (Eq, Show)
46
47
48 data IPv4Field = IPv4FieldOctet IPv4Octet | IPv4FieldSequence IPv4Sequence
49 deriving (Eq, Show)
50
51 data IPv4Pattern =
52 IPv4Pattern IPv4Field IPv4Field IPv4Field IPv4Field
53 deriving (Eq, Show)
54
55
56 -- | Parse an IPv4 \"sequence member\". A sequence member is either an
57 -- octet, or a start..end sequence (like an enumeration, in Haskell).
58 --
59 -- ==== _Examples_
60 --
61 -- >>> parseTest v4seq_member "127"
62 -- IPv4SequenceMemberOctet (IPv4Octet 127)
63 --
64 -- >>> parseTest v4seq_member "1..5"
65 -- IPv4SequenceMemberOctetRange (IPv4Octet 1) (IPv4Octet 5)
66 --
67 v4seq_member :: Parser IPv4SequenceMember
68 v4seq_member = try both <|> just_one
69 where
70 both = do
71 oct1 <- v4octet
72 _ <- string ".."
73 oct2 <- v4octet
74 return $ IPv4SequenceMemberOctetRange oct1 oct2
75
76 just_one = fmap IPv4SequenceMemberOctet v4octet
77
78
79 -- | Parse an IPv4 \"sequence\". A sequence is whatever is allowed
80 -- within square brackets. Basically it can be three things:
81 --
82 -- * An octet (number).
83 -- * A range of addresses in start..end format.
84 -- * An alternative, separated by a semicolon, where each side
85 -- contains one of the previous two options.
86 --
87 -- ==== _Examples_
88 --
89 -- >>> parseTest v4sequence "1"
90 -- IPv4SequenceSingleMember (IPv4SequenceMemberOctet (IPv4Octet 1))
91 --
92 -- >>> parseTest v4sequence "1..2"
93 -- IPv4SequenceSingleMember (IPv4SequenceMemberOctetRange (IPv4Octet 1) (IPv4Octet 2))
94 --
95 -- >>> parseTest v4sequence "1..2;8"
96 -- IPv4SequenceOptions (IPv4SequenceMemberOctetRange (IPv4Octet 1) (IPv4Octet 2)) (IPv4SequenceSingleMember (IPv4SequenceMemberOctet (IPv4Octet 8)))
97 --
98 v4sequence :: Parser IPv4Sequence
99 v4sequence = try both <|> just_one -- Maybe sepBy is appropriate here?
100 where
101 both = do
102 sm <- v4seq_member
103 _ <- char ';'
104 s <- v4sequence
105 return $ IPv4SequenceOptions sm s
106
107 just_one = fmap IPv4SequenceSingleMember v4seq_member
108
109
110 -- | Parse an IPv4 \"field\", which is either a boring old octet, or a
111 -- 'v4sequence' within square brackets.
112 --
113 -- ==== _Examples_
114 --
115 -- >>> parseTest v4field "127"
116 -- IPv4FieldOctet (IPv4Octet 127)
117 --
118 -- >>> parseTest v4field "[127]"
119 -- IPv4FieldSequence (IPv4SequenceSingleMember (IPv4SequenceMemberOctet (IPv4Octet 127)))
120 --
121 v4field :: Parser IPv4Field
122 v4field = just_octet <|> brackets
123 where
124 just_octet = fmap IPv4FieldOctet v4octet
125
126 brackets = do
127 _ <- char '['
128 s <- v4sequence
129 _ <- char ']'
130 return $ IPv4FieldSequence s
131
132
133 -- | Parse an IPv4 octet, which should contain a string of digits.
134 -- Should fail if the parsed integer does not lie between @0@ and
135 -- @255@ inclusive.
136 --
137 -- ==== _Examples_
138 --
139 -- Standard octets are parsed correctly:
140 --
141 -- parseTest v4octet "0"
142 -- IPv4Octet 0
143 --
144 -- >>> parseTest v4octet "127"
145 -- IPv4Octet 127
146 --
147 -- >>> parseTest v4octet "255"
148 -- IPv4Octet 255
149 --
150 -- Non-digit input throws an error:
151 --
152 -- >>> parseTest v4octet "Hello, World!"
153 -- parse error at (line 1, column 1):
154 -- unexpected "H"
155 -- expecting digit
156 --
157 -- If we're given an integer outside the range @0..255@ (i.e. not a
158 -- valid octet), we fail:
159 --
160 -- >>> parseTest v4octet "9000"
161 -- parse error at (line 1, column 5):
162 -- unexpected end of input
163 -- expecting digit
164 -- Octet "9000" must be between 0 and 255.
165 --
166 v4octet :: Parser IPv4Octet
167 v4octet = do
168 s <- many1 digit
169 case ( readMaybe s :: Maybe Int ) of
170 -- If "many1 digit" gives us a list of digits, we should be able
171 -- to convert that to an Int! It will overflow rather than fail
172 -- if the input is too big/small, so it should really always
173 -- succeed.
174 Nothing -> unexpected "readMaybe failed on a sequence of digits!"
175
176 -- If we got an Int, make sure it's actually a representation of
177 -- an octet.
178 Just k -> if 0 <= k && k <= 255
179 then return (IPv4Octet k)
180 else fail ("Octet \"" ++ (show k)
181 ++ "\" must be between 0 and 255.")
182
183
184 -- | Parse an ipv4 address pattern. This consists of four fields,
185 -- separated by periods, where a field is either a simple octet or a
186 -- sequence.
187 --
188 -- See also: 'v4field', 'v4sequence'.
189 --
190 -- ==== _Examples_
191 --
192 -- >>> parseTest v4pattern "127.0.0.1"
193 -- IPv4Pattern (IPv4FieldOctet (IPv4Octet 127)) (IPv4FieldOctet (IPv4Octet 0)) (IPv4FieldOctet (IPv4Octet 0)) (IPv4FieldOctet (IPv4Octet 1))
194 --
195 v4pattern :: Parser IPv4Pattern
196 v4pattern = do
197 field1 <- v4field
198 _ <- char '.'
199 field2 <- v4field
200 _ <- char '.'
201 field3 <- v4field
202 _ <- char '.'
203 field4 <- v4field
204 return $ IPv4Pattern field1 field2 field3 field4
205
206
207 -- | Enumerate the members of an 'IPv4SequenceMember'. A sequence
208 -- member is either an octet, which is easy to enumerate -- we just
209 -- print it -- or an octet range whose members can be enumerated
210 -- from least to greatest.
211 --
212 -- We enumerate strings instead of integers because the big picture
213 -- is that we will be listing out patterns of ipv4 addresses, and
214 -- those are represented as strings (dotted quad format).
215 --
216 -- ==== _Examples_
217 --
218 -- >>> let (Right r) = parse v4seq_member "" "127"
219 -- >>> sequence_members r
220 -- ["127"]
221 --
222 -- >>> let (Right r) = parse v4seq_member "" "127..135"
223 -- >>> sequence_members r
224 -- ["127","128","129","130","131","132","133","134","135"]
225 --
226 sequence_members :: IPv4SequenceMember -> [String]
227 sequence_members (IPv4SequenceMemberOctet (IPv4Octet i)) = [show i]
228 sequence_members (IPv4SequenceMemberOctetRange (IPv4Octet s) (IPv4Octet t)) =
229 [show x | x <- [s .. t]]
230
231
232 -- | Enumerate the members of an ipv4 sequence. These consist of
233 -- either a single sequence member (in which case we delegate to
234 -- 'sequence_members'), or an \"option\" which is enumerated
235 -- recursively.
236 --
237 -- ==== _Examples_
238 --
239 -- >>> let (Right r) = parse v4sequence "" "1"
240 -- >>> sequences r
241 -- ["1"]
242 --
243 -- >>> let (Right r) = parse v4sequence "" "1..2"
244 -- >>> sequences r
245 -- ["1","2"]
246 --
247 -- >>> let (Right r) = parse v4sequence "" "1..3;4;5..9"
248 -- >>> sequences r
249 -- ["1","2","3","4","5","6","7","8","9"]
250 --
251 sequences :: IPv4Sequence -> [String]
252 sequences (IPv4SequenceSingleMember sm) =
253 sequence_members sm
254 sequences (IPv4SequenceOptions sm s) =
255 (sequence_members sm) ++ (sequences s)
256
257
258 fields :: IPv4Field -> [String]
259 fields (IPv4FieldOctet (IPv4Octet i)) = [show i]
260 fields (IPv4FieldSequence s) = sequences s
261
262
263 addresses :: IPv4Pattern -> [String]
264 addresses (IPv4Pattern field1 field2 field3 field4) = do
265 f1 <- fields field1
266 f2 <- fields field2
267 f3 <- fields field3
268 f4 <- fields field4
269 return $ f1 ++ "." ++ f2 ++ "." ++ f3 ++ "." ++ f4
270
271
272 -- Tests
273
274 v4octet_tests :: TestTree
275 v4octet_tests =
276 testGroup
277 "v4octet tests"
278 [ test_v4octet_single_digit_parsed ]
279
280 test_v4octet_single_digit_parsed :: TestTree
281 test_v4octet_single_digit_parsed =
282 testCase "a single digit is parsed as a v4octet" $ do
283 -- Whatever, it's a test.
284 let (Right actual) = parse v4octet "" "1"
285 let expected = IPv4Octet 1
286 actual @?= expected