+-- | An IPv4 address pattern has four fields separated by ".". Each
+-- field is either a decimal number, or a sequence inside "[]" that
+-- contains one or more ";"-separated decimal numbers or
+-- number..number ranges.
+--
+-- Thus, any pattern field can be a sequence inside "[]", but a "[]"
+-- sequence cannot span multiple address fields, and a pattern field
+-- cannot contain both a number and a "[]" sequence at the same
+-- time.
+--
+-- This means that the pattern 1.2.[3.4] is not valid (the sequence
+-- [3.4] cannot span two address fields) and the pattern
+-- 1.2.3.3[6..9] is also not valid (the last field cannot be both
+-- number 3 and sequence [6..9] at the same time).
+--
+-- The syntax for IPv4 patterns is as follows:
+--
+-- v4pattern = v4field "." v4field "." v4field "." v4field
+-- v4field = v4octet | "[" v4sequence "]"
+-- v4octet = any decimal number in the range 0 through 255
+-- v4sequence = v4seq_member | v4sequence ";" v4seq_member
+-- v4seq_member = v4octet | v4octet ".." v4octet
+--
+module Network.DNS.RBL.IPv4Pattern (
+ IPv4Pattern,
+ addresses,
+ ipv4pattern_tests,
+ v4pattern)
+where
+
+
+import Test.Tasty ( TestTree, testGroup )
+import Test.Tasty.HUnit ( (@?=), testCase )
+import Text.Parsec (
+ (<|>),
+ char,
+ digit,
+ many1,
+ parse,
+ string,
+ try,
+ unexpected )
+import Text.Parsec.String ( Parser )
+import Text.Read ( readMaybe )
+
+import Network.DNS.RBL.Pretty ( Pretty(..) )
+
+
+-- * Octets
+
+-- | An ipv4 octet; that is, an integer between @0@ and @255@
+-- inclusive. This is the data type corresponding to a \"v4octet\"
+-- in the postscreen parser.
+--
+newtype IPv4Octet = IPv4Octet Int
+ deriving (Eq, Show)
+
+
+instance Pretty IPv4Octet where
+ pretty_show (IPv4Octet x) = show x
+
+
+-- | Parse an IPv4 octet, which should contain a string of digits.
+-- Should fail if the parsed integer does not lie between @0@ and
+-- @255@ inclusive.
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parseTest )
+--
+-- Standard octets are parsed correctly:
+--
+-- >>> parseTest v4octet "0"
+-- IPv4Octet 0
+--
+-- >>> parseTest v4octet "127"
+-- IPv4Octet 127
+--
+-- >>> parseTest v4octet "255"
+-- IPv4Octet 255
+--
+-- Non-digit input throws an error:
+--
+-- >>> parseTest v4octet "Hello, World!"
+-- parse error at (line 1, column 1):
+-- unexpected "H"
+-- expecting digit
+--
+-- If we're given an integer outside the range @0..255@ (i.e. not a
+-- valid octet), we fail:
+--
+-- >>> parseTest v4octet "9000"
+-- parse error at (line 1, column 5):
+-- unexpected end of input
+-- expecting digit
+-- Octet "9000" must be between 0 and 255.
+--
+v4octet :: Parser IPv4Octet
+v4octet = do
+ s <- many1 digit
+ case ( readMaybe s :: Maybe Int ) of
+ -- If "many1 digit" gives us a list of digits, we should be able
+ -- to convert that to an Int! It will overflow rather than fail
+ -- if the input is too big/small, so it should really always
+ -- succeed.
+ Nothing -> unexpected "v4octet: readMaybe failed on a sequence of digits!"
+
+ -- If we got an Int, make sure it's actually a representation of
+ -- an octet.
+ Just k -> if 0 <= k && k <= 255
+ then return (IPv4Octet k)
+ else fail ("Octet \"" ++ (show k)
+ ++ "\" must be between 0 and 255.")
+
+
+
+
+-- * Sequence members
+
+
+-- | An ipv4 \"sequence member\". A sequence member is either an
+-- integer (an octet) or a range of integers (contained in an
+-- octet). This data type corresponds to \"v4seq_member\" in the
+-- postscreen parser.
+--
+data IPv4SequenceMember =
+ IPv4SequenceMemberOctet IPv4Octet
+ | IPv4SequenceMemberOctetRange IPv4Octet IPv4Octet
+ deriving (Eq, Show)
+
+
+instance Pretty IPv4SequenceMember where
+ pretty_show (IPv4SequenceMemberOctet octet) = pretty_show octet
+ pretty_show (IPv4SequenceMemberOctetRange octet1 octet2) =
+ (pretty_show octet1) ++ ".." ++ (pretty_show octet2)
+
+
+-- | Parse an IPv4 \"sequence member\". A sequence member is either an
+-- octet, or a start..end sequence (like an enumeration, in Haskell).
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parseTest )
+--
+-- >>> parseTest v4seq_member "127"
+-- IPv4SequenceMemberOctet (IPv4Octet 127)
+--
+-- >>> parseTest v4seq_member "1..5"
+-- IPv4SequenceMemberOctetRange (IPv4Octet 1) (IPv4Octet 5)
+--
+v4seq_member :: Parser IPv4SequenceMember
+v4seq_member = try both <|> just_one
+ where
+ both = do
+ oct1 <- v4octet
+ _ <- string ".."
+ oct2 <- v4octet
+ return $ IPv4SequenceMemberOctetRange oct1 oct2
+
+ just_one = fmap IPv4SequenceMemberOctet v4octet
+
+
+
+-- * Sequences
+
+-- | An ipv4 \"sequence\". A sequence contains either a single
+-- \"sequence member\" (see 'IPv4SequenceMember'), or a sequence
+-- member along with another sequence. So, this is a potentially
+-- recursive definition. This type corresponds to \"v4sequence\" in
+-- the postscreen parser.
+--
+data IPv4Sequence =
+ IPv4SequenceSingleMember IPv4SequenceMember
+ | IPv4SequenceOptions IPv4SequenceMember IPv4Sequence
+ deriving (Eq, Show)
+
+
+instance Pretty IPv4Sequence where
+ pretty_show (IPv4SequenceSingleMember member) = pretty_show member
+ pretty_show (IPv4SequenceOptions member subsequence) =
+ (pretty_show member) ++ ";" ++ (pretty_show subsequence)
+
+
+-- | Parse an IPv4 \"sequence\". A sequence is whatever is allowed
+-- within square brackets. Basically it can be three things:
+--
+-- * An octet (number).
+-- * A range of addresses in start..end format.
+-- * An alternative, separated by a semicolon, where each side
+-- contains one of the previous two options.
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parseTest )
+-- >>> parseTest v4sequence "1"
+-- IPv4SequenceSingleMember (IPv4SequenceMemberOctet (IPv4Octet 1))
+--
+-- >>> pretty_print $ parse v4sequence "" "1..2"
+-- 1..2
+--
+-- >>> pretty_print $ parse v4sequence "" "1..2;8"
+-- 1..2;8
+--
+v4sequence :: Parser IPv4Sequence
+v4sequence = try both <|> just_one
+ where
+ both = do
+ sm <- v4seq_member
+ _ <- char ';'
+ s <- v4sequence
+ return $ IPv4SequenceOptions sm s
+
+ just_one = fmap IPv4SequenceSingleMember v4seq_member
+
+
+
+-- * Fields
+
+data IPv4Field = IPv4FieldOctet IPv4Octet | IPv4FieldSequence IPv4Sequence
+ deriving (Eq, Show)
+
+
+instance Pretty IPv4Field where
+ pretty_show (IPv4FieldOctet octet) = pretty_show octet
+ pretty_show (IPv4FieldSequence s) = "[" ++ (pretty_show s) ++ "]"
+
+
+-- | Parse an IPv4 \"field\", which is either a boring old octet, or a
+-- 'v4sequence' within square brackets.
+--
+-- ==== _Examples_
+--
+-- >>> import Text.Parsec ( parseTest )
+-- >>> parseTest v4field "127"
+-- IPv4FieldOctet (IPv4Octet 127)
+--
+-- >>> pretty_print $ parse v4field "" "[127]"
+-- [127]
+--
+v4field :: Parser IPv4Field
+v4field = just_octet <|> brackets
+ where
+ just_octet = fmap IPv4FieldOctet v4octet
+
+ brackets = do
+ _ <- char '['
+ s <- v4sequence
+ _ <- char ']'
+ return $ IPv4FieldSequence s
+
+
+
+-- * Patterns
+
+data IPv4Pattern =
+ IPv4Pattern IPv4Field IPv4Field IPv4Field IPv4Field
+ deriving (Eq, Show)
+
+
+instance Pretty IPv4Pattern where
+ pretty_show (IPv4Pattern f1 f2 f3 f4) =
+ (pretty_show f1) ++ "."
+ ++ (pretty_show f2)
+ ++ "."
+ ++ (pretty_show f3)
+ ++ "."
+ ++ (pretty_show f4)
+
+
+-- | Parse an ipv4 address pattern. This consists of four fields,
+-- separated by periods, where a field is either a simple octet or a
+-- sequence.
+--
+-- See also: 'v4field', 'v4sequence'.
+--
+-- ==== _Examples_
+--
+-- >>> pretty_print $ parse v4pattern "" "127.0.0.1"
+-- 127.0.0.1
+--
+-- >>> pretty_print $ parse v4pattern "" "127.0.[1..3].1"
+-- 127.0.[1..3].1
+--
+-- >>> pretty_print $ parse v4pattern "" "127.0.[1..3;8].1"
+-- 127.0.[1..3;8].1
+--
+-- In the module intro, it is mentioned that this is invalid:
+--
+-- >>> import Text.Parsec ( parseTest )
+-- >>> parseTest v4pattern "1.2.[3.4]"
+-- parse error at (line 1, column 7):
+-- unexpected "."
+-- expecting digit or "]"
+--
+-- This one is /also/ invalid; however, we'll parse the valid part off
+-- the front of it:
+--
+-- >>> pretty_print $ parse v4pattern "" "1.2.3.3[6..9]"
+-- 1.2.3.3
+--
+v4pattern :: Parser IPv4Pattern
+v4pattern = do
+ field1 <- v4field
+ _ <- char '.'
+ field2 <- v4field
+ _ <- char '.'
+ field3 <- v4field
+ _ <- char '.'
+ field4 <- v4field
+ return $ IPv4Pattern field1 field2 field3 field4
+
+
+
+-- * Enumeration
+
+-- | Enumerate the members of an 'IPv4SequenceMember'. A sequence
+-- member is either an octet, which is easy to enumerate -- we just
+-- print it -- or an octet range whose members can be enumerated
+-- from least to greatest.
+--
+-- We enumerate strings instead of integers because the big picture
+-- is that we will be listing out patterns of ipv4 addresses, and
+-- those are represented as strings (dotted quad format).
+--
+-- ==== _Examples_
+--
+-- >>> let (Right r) = parse v4seq_member "" "127"
+-- >>> sequence_members r
+-- ["127"]
+--
+-- >>> let (Right r) = parse v4seq_member "" "127..135"
+-- >>> sequence_members r
+-- ["127","128","129","130","131","132","133","134","135"]
+--
+sequence_members :: IPv4SequenceMember -> [String]
+sequence_members (IPv4SequenceMemberOctet (IPv4Octet i)) = [show i]
+sequence_members (IPv4SequenceMemberOctetRange (IPv4Octet s) (IPv4Octet t)) =
+ [show x | x <- [s .. t]]
+
+
+-- | Enumerate the members of an ipv4 sequence. These consist of
+-- either a single sequence member (in which case we delegate to
+-- 'sequence_members'), or an \"option\" which is enumerated
+-- recursively.
+--
+-- ==== _Examples_
+--
+-- >>> let (Right r) = parse v4sequence "" "1"
+-- >>> sequences r
+-- ["1"]
+--
+-- >>> let (Right r) = parse v4sequence "" "1..2"
+-- >>> sequences r
+-- ["1","2"]
+--
+-- >>> let (Right r) = parse v4sequence "" "1..3;4;5..9"
+-- >>> sequences r
+-- ["1","2","3","4","5","6","7","8","9"]
+--
+sequences :: IPv4Sequence -> [String]
+sequences (IPv4SequenceSingleMember sm) =
+ sequence_members sm
+sequences (IPv4SequenceOptions sm s) =
+ (sequence_members sm) ++ (sequences s)
+
+
+-- | Enumerate the members of an 'IPv4Field'. If the field contains a
+-- single 'IPv4Octet', we simply 'show' it. Otherwise it contains an
+-- 'IPv4FieldSequence', and we enumerate that recursively using
+-- 'sequences'.
+--
+-- ==== _Examples_
+--
+-- >>> let (Right r) = parse v4field "" "1"
+-- >>> fields r
+-- ["1"]
+--
+-- >>> let (Right r) = parse v4field "" "[127..135]"
+-- >>> fields r
+-- ["127","128","129","130","131","132","133","134","135"]
+--
+fields :: IPv4Field -> [String]
+fields (IPv4FieldOctet (IPv4Octet i)) = [show i]
+fields (IPv4FieldSequence s) = sequences s
+
+
+-- | Enumerate the addresses represented by a given 'IPv4Pattern'.
+--
+-- A pattern contains four fields, sepearated by period
+-- characters. We want to list all possible combinations of
+-- addresses where the first octet comes from the first field, the
+-- second octet comes from the second field... and so on. To do
+-- this, we take advantage of the List monad and the fact that
+-- 'fields' returns a list of 'String's.
+--
+-- ==== _Examples_
+--
+-- A single address:
+--
+-- >>> let (Right r) = parse v4pattern "" "127.0.0.1"
+-- >>> addresses r
+-- ["127.0.0.1"]
+--
+-- Anything between 127.0.0.2 and 127.0.0.4, and either 127.0.0.10
+-- or 127.0.0.11:
+--
+-- >>> let (Right r) = parse v4pattern "" "127.0.0.[2..4;10;11]"
+-- >>> addresses r
+-- ["127.0.0.2","127.0.0.3","127.0.0.4","127.0.0.10","127.0.0.11"]
+--
+addresses :: IPv4Pattern -> [String]
+addresses (IPv4Pattern field1 field2 field3 field4) = do
+ f1 <- fields field1
+ f2 <- fields field2
+ f3 <- fields field3
+ f4 <- fields field4
+ return $ f1 ++ "." ++ f2 ++ "." ++ f3 ++ "." ++ f4
+
+
+
+-- * Tests
+
+ipv4pattern_tests :: TestTree
+ipv4pattern_tests =
+ testGroup "IPv4Pattern Tests" [ v4octet_tests ]
+
+
+v4octet_tests :: TestTree
+v4octet_tests =
+ testGroup
+ "v4octet tests"
+ [ test_v4octet_single_digit_parsed ]
+
+test_v4octet_single_digit_parsed :: TestTree
+test_v4octet_single_digit_parsed =
+ testCase "a single digit is parsed as a v4octet" $ do
+ -- Whatever, it's a test.
+ let (Right actual) = parse v4octet "" "1"
+ let expected = IPv4Octet 1
+ actual @?= expected