X-Git-Url: http://gitweb.michael.orlitzky.com/?p=dead%2Fharbl.git;a=blobdiff_plain;f=harbl%2Fsrc%2FNetwork%2FDNS%2FRBL%2FIPv4Pattern.hs;fp=harbl%2Fsrc%2FNetwork%2FDNS%2FRBL%2FIPv4Pattern.hs;h=6e407192c03417b36d7f8d044fa0f484c9418f27;hp=0000000000000000000000000000000000000000;hb=b55e5db2a68be5d69b970bbe4b5ad447881abd3d;hpb=c4d41b93ec02ff4dc762163441ebefb0324e6f07 diff --git a/harbl/src/Network/DNS/RBL/IPv4Pattern.hs b/harbl/src/Network/DNS/RBL/IPv4Pattern.hs new file mode 100644 index 0000000..6e40719 --- /dev/null +++ b/harbl/src/Network/DNS/RBL/IPv4Pattern.hs @@ -0,0 +1,440 @@ +-- | An IPv4 address pattern has four fields separated by ".". Each +-- field is either a decimal number, or a sequence inside "[]" that +-- contains one or more ";"-separated decimal numbers or +-- number..number ranges. +-- +-- Thus, any pattern field can be a sequence inside "[]", but a "[]" +-- sequence cannot span multiple address fields, and a pattern field +-- cannot contain both a number and a "[]" sequence at the same +-- time. +-- +-- This means that the pattern 1.2.[3.4] is not valid (the sequence +-- [3.4] cannot span two address fields) and the pattern +-- 1.2.3.3[6..9] is also not valid (the last field cannot be both +-- number 3 and sequence [6..9] at the same time). +-- +-- The syntax for IPv4 patterns is as follows: +-- +-- v4pattern = v4field "." v4field "." v4field "." v4field +-- v4field = v4octet | "[" v4sequence "]" +-- v4octet = any decimal number in the range 0 through 255 +-- v4sequence = v4seq_member | v4sequence ";" v4seq_member +-- v4seq_member = v4octet | v4octet ".." v4octet +-- +module Network.DNS.RBL.IPv4Pattern ( + IPv4Pattern, + addresses, + ipv4pattern_tests, + v4pattern) +where + + +import Test.Tasty ( TestTree, testGroup ) +import Test.Tasty.HUnit ( (@?=), testCase ) +import Text.Parsec ( + (<|>), + char, + digit, + many1, + parse, + string, + try, + unexpected ) +import Text.Parsec.String ( Parser ) +import Text.Read ( readMaybe ) + +import Network.DNS.RBL.Pretty ( Pretty(..) ) + + +-- * Octets + +-- | An ipv4 octet; that is, an integer between @0@ and @255@ +-- inclusive. This is the data type corresponding to a \"v4octet\" +-- in the postscreen parser. +-- +newtype IPv4Octet = IPv4Octet Int + deriving (Eq, Show) + + +instance Pretty IPv4Octet where + pretty_show (IPv4Octet x) = show x + + +-- | Parse an IPv4 octet, which should contain a string of digits. +-- Should fail if the parsed integer does not lie between @0@ and +-- @255@ inclusive. +-- +-- ==== _Examples_ +-- +-- >>> import Text.Parsec ( parseTest ) +-- +-- Standard octets are parsed correctly: +-- +-- >>> parseTest v4octet "0" +-- IPv4Octet 0 +-- +-- >>> parseTest v4octet "127" +-- IPv4Octet 127 +-- +-- >>> parseTest v4octet "255" +-- IPv4Octet 255 +-- +-- Non-digit input throws an error: +-- +-- >>> parseTest v4octet "Hello, World!" +-- parse error at (line 1, column 1): +-- unexpected "H" +-- expecting digit +-- +-- If we're given an integer outside the range @0..255@ (i.e. not a +-- valid octet), we fail: +-- +-- >>> parseTest v4octet "9000" +-- parse error at (line 1, column 5): +-- unexpected end of input +-- expecting digit +-- Octet "9000" must be between 0 and 255. +-- +v4octet :: Parser IPv4Octet +v4octet = do + s <- many1 digit + case ( readMaybe s :: Maybe Int ) of + -- If "many1 digit" gives us a list of digits, we should be able + -- to convert that to an Int! It will overflow rather than fail + -- if the input is too big/small, so it should really always + -- succeed. + Nothing -> unexpected "v4octet: readMaybe failed on a sequence of digits!" + + -- If we got an Int, make sure it's actually a representation of + -- an octet. + Just k -> if 0 <= k && k <= 255 + then return (IPv4Octet k) + else fail ("Octet \"" ++ (show k) + ++ "\" must be between 0 and 255.") + + + + +-- * Sequence members + + +-- | An ipv4 \"sequence member\". A sequence member is either an +-- integer (an octet) or a range of integers (contained in an +-- octet). This data type corresponds to \"v4seq_member\" in the +-- postscreen parser. +-- +data IPv4SequenceMember = + IPv4SequenceMemberOctet IPv4Octet + | IPv4SequenceMemberOctetRange IPv4Octet IPv4Octet + deriving (Eq, Show) + + +instance Pretty IPv4SequenceMember where + pretty_show (IPv4SequenceMemberOctet octet) = pretty_show octet + pretty_show (IPv4SequenceMemberOctetRange octet1 octet2) = + (pretty_show octet1) ++ ".." ++ (pretty_show octet2) + + +-- | Parse an IPv4 \"sequence member\". A sequence member is either an +-- octet, or a start..end sequence (like an enumeration, in Haskell). +-- +-- ==== _Examples_ +-- +-- >>> import Text.Parsec ( parseTest ) +-- +-- >>> parseTest v4seq_member "127" +-- IPv4SequenceMemberOctet (IPv4Octet 127) +-- +-- >>> parseTest v4seq_member "1..5" +-- IPv4SequenceMemberOctetRange (IPv4Octet 1) (IPv4Octet 5) +-- +v4seq_member :: Parser IPv4SequenceMember +v4seq_member = try both <|> just_one + where + both = do + oct1 <- v4octet + _ <- string ".." + oct2 <- v4octet + return $ IPv4SequenceMemberOctetRange oct1 oct2 + + just_one = fmap IPv4SequenceMemberOctet v4octet + + + +-- * Sequences + +-- | An ipv4 \"sequence\". A sequence contains either a single +-- \"sequence member\" (see 'IPv4SequenceMember'), or a sequence +-- member along with another sequence. So, this is a potentially +-- recursive definition. This type corresponds to \"v4sequence\" in +-- the postscreen parser. +-- +data IPv4Sequence = + IPv4SequenceSingleMember IPv4SequenceMember + | IPv4SequenceOptions IPv4SequenceMember IPv4Sequence + deriving (Eq, Show) + + +instance Pretty IPv4Sequence where + pretty_show (IPv4SequenceSingleMember member) = pretty_show member + pretty_show (IPv4SequenceOptions member subsequence) = + (pretty_show member) ++ ";" ++ (pretty_show subsequence) + + +-- | Parse an IPv4 \"sequence\". A sequence is whatever is allowed +-- within square brackets. Basically it can be three things: +-- +-- * An octet (number). +-- * A range of addresses in start..end format. +-- * An alternative, separated by a semicolon, where each side +-- contains one of the previous two options. +-- +-- ==== _Examples_ +-- +-- >>> import Text.Parsec ( parseTest ) +-- >>> parseTest v4sequence "1" +-- IPv4SequenceSingleMember (IPv4SequenceMemberOctet (IPv4Octet 1)) +-- +-- >>> pretty_print $ parse v4sequence "" "1..2" +-- 1..2 +-- +-- >>> pretty_print $ parse v4sequence "" "1..2;8" +-- 1..2;8 +-- +v4sequence :: Parser IPv4Sequence +v4sequence = try both <|> just_one + where + both = do + sm <- v4seq_member + _ <- char ';' + s <- v4sequence + return $ IPv4SequenceOptions sm s + + just_one = fmap IPv4SequenceSingleMember v4seq_member + + + +-- * Fields + +data IPv4Field = IPv4FieldOctet IPv4Octet | IPv4FieldSequence IPv4Sequence + deriving (Eq, Show) + + +instance Pretty IPv4Field where + pretty_show (IPv4FieldOctet octet) = pretty_show octet + pretty_show (IPv4FieldSequence s) = "[" ++ (pretty_show s) ++ "]" + + +-- | Parse an IPv4 \"field\", which is either a boring old octet, or a +-- 'v4sequence' within square brackets. +-- +-- ==== _Examples_ +-- +-- >>> import Text.Parsec ( parseTest ) +-- >>> parseTest v4field "127" +-- IPv4FieldOctet (IPv4Octet 127) +-- +-- >>> pretty_print $ parse v4field "" "[127]" +-- [127] +-- +v4field :: Parser IPv4Field +v4field = just_octet <|> brackets + where + just_octet = fmap IPv4FieldOctet v4octet + + brackets = do + _ <- char '[' + s <- v4sequence + _ <- char ']' + return $ IPv4FieldSequence s + + + +-- * Patterns + +data IPv4Pattern = + IPv4Pattern IPv4Field IPv4Field IPv4Field IPv4Field + deriving (Eq, Show) + + +instance Pretty IPv4Pattern where + pretty_show (IPv4Pattern f1 f2 f3 f4) = + (pretty_show f1) ++ "." + ++ (pretty_show f2) + ++ "." + ++ (pretty_show f3) + ++ "." + ++ (pretty_show f4) + + +-- | Parse an ipv4 address pattern. This consists of four fields, +-- separated by periods, where a field is either a simple octet or a +-- sequence. +-- +-- See also: 'v4field', 'v4sequence'. +-- +-- ==== _Examples_ +-- +-- >>> pretty_print $ parse v4pattern "" "127.0.0.1" +-- 127.0.0.1 +-- +-- >>> pretty_print $ parse v4pattern "" "127.0.[1..3].1" +-- 127.0.[1..3].1 +-- +-- >>> pretty_print $ parse v4pattern "" "127.0.[1..3;8].1" +-- 127.0.[1..3;8].1 +-- +-- In the module intro, it is mentioned that this is invalid: +-- +-- >>> import Text.Parsec ( parseTest ) +-- >>> parseTest v4pattern "1.2.[3.4]" +-- parse error at (line 1, column 7): +-- unexpected "." +-- expecting digit or "]" +-- +-- This one is /also/ invalid; however, we'll parse the valid part off +-- the front of it: +-- +-- >>> pretty_print $ parse v4pattern "" "1.2.3.3[6..9]" +-- 1.2.3.3 +-- +v4pattern :: Parser IPv4Pattern +v4pattern = do + field1 <- v4field + _ <- char '.' + field2 <- v4field + _ <- char '.' + field3 <- v4field + _ <- char '.' + field4 <- v4field + return $ IPv4Pattern field1 field2 field3 field4 + + + +-- * Enumeration + +-- | Enumerate the members of an 'IPv4SequenceMember'. A sequence +-- member is either an octet, which is easy to enumerate -- we just +-- print it -- or an octet range whose members can be enumerated +-- from least to greatest. +-- +-- We enumerate strings instead of integers because the big picture +-- is that we will be listing out patterns of ipv4 addresses, and +-- those are represented as strings (dotted quad format). +-- +-- ==== _Examples_ +-- +-- >>> let (Right r) = parse v4seq_member "" "127" +-- >>> sequence_members r +-- ["127"] +-- +-- >>> let (Right r) = parse v4seq_member "" "127..135" +-- >>> sequence_members r +-- ["127","128","129","130","131","132","133","134","135"] +-- +sequence_members :: IPv4SequenceMember -> [String] +sequence_members (IPv4SequenceMemberOctet (IPv4Octet i)) = [show i] +sequence_members (IPv4SequenceMemberOctetRange (IPv4Octet s) (IPv4Octet t)) = + [show x | x <- [s .. t]] + + +-- | Enumerate the members of an ipv4 sequence. These consist of +-- either a single sequence member (in which case we delegate to +-- 'sequence_members'), or an \"option\" which is enumerated +-- recursively. +-- +-- ==== _Examples_ +-- +-- >>> let (Right r) = parse v4sequence "" "1" +-- >>> sequences r +-- ["1"] +-- +-- >>> let (Right r) = parse v4sequence "" "1..2" +-- >>> sequences r +-- ["1","2"] +-- +-- >>> let (Right r) = parse v4sequence "" "1..3;4;5..9" +-- >>> sequences r +-- ["1","2","3","4","5","6","7","8","9"] +-- +sequences :: IPv4Sequence -> [String] +sequences (IPv4SequenceSingleMember sm) = + sequence_members sm +sequences (IPv4SequenceOptions sm s) = + (sequence_members sm) ++ (sequences s) + + +-- | Enumerate the members of an 'IPv4Field'. If the field contains a +-- single 'IPv4Octet', we simply 'show' it. Otherwise it contains an +-- 'IPv4FieldSequence', and we enumerate that recursively using +-- 'sequences'. +-- +-- ==== _Examples_ +-- +-- >>> let (Right r) = parse v4field "" "1" +-- >>> fields r +-- ["1"] +-- +-- >>> let (Right r) = parse v4field "" "[127..135]" +-- >>> fields r +-- ["127","128","129","130","131","132","133","134","135"] +-- +fields :: IPv4Field -> [String] +fields (IPv4FieldOctet (IPv4Octet i)) = [show i] +fields (IPv4FieldSequence s) = sequences s + + +-- | Enumerate the addresses represented by a given 'IPv4Pattern'. +-- +-- A pattern contains four fields, sepearated by period +-- characters. We want to list all possible combinations of +-- addresses where the first octet comes from the first field, the +-- second octet comes from the second field... and so on. To do +-- this, we take advantage of the List monad and the fact that +-- 'fields' returns a list of 'String's. +-- +-- ==== _Examples_ +-- +-- A single address: +-- +-- >>> let (Right r) = parse v4pattern "" "127.0.0.1" +-- >>> addresses r +-- ["127.0.0.1"] +-- +-- Anything between 127.0.0.2 and 127.0.0.4, and either 127.0.0.10 +-- or 127.0.0.11: +-- +-- >>> let (Right r) = parse v4pattern "" "127.0.0.[2..4;10;11]" +-- >>> addresses r +-- ["127.0.0.2","127.0.0.3","127.0.0.4","127.0.0.10","127.0.0.11"] +-- +addresses :: IPv4Pattern -> [String] +addresses (IPv4Pattern field1 field2 field3 field4) = do + f1 <- fields field1 + f2 <- fields field2 + f3 <- fields field3 + f4 <- fields field4 + return $ f1 ++ "." ++ f2 ++ "." ++ f3 ++ "." ++ f4 + + + +-- * Tests + +ipv4pattern_tests :: TestTree +ipv4pattern_tests = + testGroup "IPv4Pattern Tests" [ v4octet_tests ] + + +v4octet_tests :: TestTree +v4octet_tests = + testGroup + "v4octet tests" + [ test_v4octet_single_digit_parsed ] + +test_v4octet_single_digit_parsed :: TestTree +test_v4octet_single_digit_parsed = + testCase "a single digit is parsed as a v4octet" $ do + -- Whatever, it's a test. + let (Right actual) = parse v4octet "" "1" + let expected = IPv4Octet 1 + actual @?= expected