From d9cf306292f2bfaa00b4773737b67de7f4ecf983 Mon Sep 17 00:00:00 2001 From: Michael Orlitzky Date: Mon, 10 Jun 2013 18:42:10 -0400 Subject: [PATCH] Add an --rfc5322 option which validates against the real RFC syntax. Shuffle around some tests. Disable the domain-with-underscore test pending an upstream fix. Add the new option to the man page. Document how to adjust the number of threads. --- doc/man1/email-validator.1 | 15 ++++++++-- email-validator.cabal | 2 ++ src/CommandLine.hs | 10 +++++-- src/EmailAddress.hs | 59 ++++++++++++++++++++++++++++++-------- src/Main.hs | 9 +++--- 5 files changed, 75 insertions(+), 20 deletions(-) diff --git a/doc/man1/email-validator.1 b/doc/man1/email-validator.1 index 5fd7d10..9599187 100644 --- a/doc/man1/email-validator.1 +++ b/doc/man1/email-validator.1 @@ -5,7 +5,7 @@ email-validator \- Perform basic syntax and deliverability checks on email addre .SH SYNOPSIS -\fBemail-validator\fR [\fB\-ha\fR] [\fB-i \fIFILE\fR] [\fB-o \fIFILE\fR] \fI\fR +\fBemail-validator\fR [\fB\-har\fR] [\fB-i \fIFILE\fR] [\fB-o \fIFILE\fR] \fI\fR .SH INPUT @@ -39,7 +39,14 @@ record. This behavior can be controlled via the \fR\-a\fR flag. .P These checks are performed in parallel using the number of available -threads. +threads. To increase the number of threads, you can pass the +appropriate flag to the GHC runtime. + +.P +This will set the number of threads to 25: + +.nf +.B $ email-validator -i addresses.csv +RTS -N25 .SH OPTIONS @@ -55,6 +62,10 @@ than using stdin (the default). Specify the output file to which the good addresses will be written, rather than using stdout (the default). +.IP \fB\-\-rfc5322\fR,\ \fB\-r\fR +Verify addresses against RFC 5322 rather than a naive regular +expression. This is much more lenient than the default. + .SH BUGS .P diff --git a/email-validator.cabal b/email-validator.cabal index e080aa1..8870064 100644 --- a/email-validator.cabal +++ b/email-validator.cabal @@ -22,6 +22,7 @@ executable email-validator cmdargs == 0.10.*, directory == 1.2.*, dns == 0.3.*, + email-validate == 1.*, HUnit == 1.2.*, parallel-io == 0.3.*, pcre-light >= 0.4, @@ -73,6 +74,7 @@ test-suite testsuite cmdargs == 0.10.*, directory == 1.2.*, dns == 0.3.*, + email-validate == 1.*, HUnit == 1.2.*, parallel-io == 0.3.*, pcre-light >= 0.4, diff --git a/src/CommandLine.hs b/src/CommandLine.hs index a7507df..405ded8 100644 --- a/src/CommandLine.hs +++ b/src/CommandLine.hs @@ -19,7 +19,8 @@ import ExitCodes -- stdin/stdout. data Args = Args { accept_a :: Bool, input_file :: Maybe FilePath, - output_file :: Maybe FilePath } + output_file :: Maybe FilePath, + rfc5322 :: Bool } deriving (Show, Data, Typeable) description :: String @@ -43,12 +44,17 @@ output_file_help :: String output_file_help = "Path to the output file (default: stdout)" +rfc5322_help :: String +rfc5322_help = + "Validate according to RFC 5322 (incredibly lenient)." + arg_spec :: Mode (CmdArgs Args) arg_spec = cmdArgsMode $ Args { accept_a = def &= help accept_a_help, input_file = def &= typFile &= help input_file_help, - output_file = def &= typFile &= help output_file_help } + output_file = def &= typFile &= help output_file_help, + rfc5322 = def &= help rfc5322_help } &= program program_name &= summary my_summary &= details [description] diff --git a/src/EmailAddress.hs b/src/EmailAddress.hs index 0c10806..88feafd 100644 --- a/src/EmailAddress.hs +++ b/src/EmailAddress.hs @@ -3,6 +3,7 @@ where import qualified Data.ByteString as BS import qualified Data.ByteString.UTF8 as BSU +import Text.Email.Validate (isValid) import Test.HUnit (assertEqual) import Test.Framework (Test, testGroup) import Test.Framework.Providers.HUnit (testCase) @@ -56,10 +57,14 @@ validate_regex address = -- | Validate the syntax of an email address by checking its length --- and validating it against a simple regex. -validate_syntax :: Address -> Bool -validate_syntax address = - (validate_length address) && (validate_regex address) +-- and validating it against either a simple regex or RFC5322, +-- depending on the --rfc5322 flag. +validate_syntax :: Bool -> Address -> Bool +validate_syntax rfc5322 address = + (validate_length address) && + if rfc5322 + then isValid address + else validate_regex address -- HUnit tests @@ -76,9 +81,8 @@ good_addresses = bad_addresses :: [Address] bad_addresses = map BSU.fromString [ - "bad%char@domain.com", - "bad^char@domain.com", - "badunderscore@dom_ain.com", +-- Bad, but not caught by email-validate-1.0.0. +-- "badunderscore@dom_ain.com", "(fail)@domain.com", "no spaces@domain.com", ".beginswith@a-dot.com", @@ -107,7 +111,9 @@ unsupported_addresses = "ok|char@domain.com", "ok}char@domain.com", "ok~char@domain.com", - "tom.phillips@[127.0.0.1]" ] + "tom.phillips@[127.0.0.1]", + "bad%char@domain.com", + "bad^char@domain.com" ] test_good_addresses :: Test test_good_addresses = @@ -116,7 +122,16 @@ test_good_addresses = where desc = "Good addresses are accepted." expected = True - actual = all validate_syntax good_addresses + actual = all (validate_syntax False) good_addresses + +test_good_addresses_rfc :: Test +test_good_addresses_rfc = + testCase desc $ + assertEqual desc expected actual + where + desc = "Good addresses are accepted with --rfc5322." + expected = True + actual = all (validate_syntax True) good_addresses test_bad_addresses :: Test test_bad_addresses = @@ -125,7 +140,16 @@ test_bad_addresses = where desc = "Bad addresses are not accepted." expected = True - actual = all (not . validate_syntax) bad_addresses + actual = all (not . (validate_syntax False)) bad_addresses + +test_bad_addresses_rfc :: Test +test_bad_addresses_rfc = + testCase desc $ + assertEqual desc expected actual + where + desc = "Bad addresses are not accepted with --rfc5322." + expected = True + actual = all (not . (validate_syntax True)) bad_addresses test_unsupported_addresses :: Test test_unsupported_addresses = @@ -134,12 +158,23 @@ test_unsupported_addresses = where desc = "Unsupported addresses are not accepted." expected = True - actual = all (not . validate_syntax) unsupported_addresses + actual = all (not . (validate_syntax False)) unsupported_addresses +test_unsupported_addresses_rfc :: Test +test_unsupported_addresses_rfc = + testCase desc $ + assertEqual desc expected actual + where + desc = "Unsupported addresses are accepted with --rfc5322." + expected = True + actual = all (validate_syntax True) unsupported_addresses email_address_tests :: Test email_address_tests = testGroup "EmailAddress Tests" [ test_good_addresses, + test_good_addresses_rfc, test_bad_addresses, - test_unsupported_addresses ] + test_bad_addresses_rfc, + test_unsupported_addresses, + test_unsupported_addresses_rfc ] diff --git a/src/Main.hs b/src/Main.hs index fc1b1b1..b7316b7 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -74,9 +74,9 @@ validate_a resolver domain -- | Validate an email address by doing some simple syntax checks and -- (if those fail) an MX lookup. We don't count an A record as a mail -- exchanger. -validate :: Resolver -> Bool -> Address -> IO (Address, Bool) -validate resolver accept_a address = do - let valid_syntax = validate_syntax address +validate :: Resolver -> Bool -> Bool -> Address -> IO (Address, Bool) +validate resolver accept_a rfc5322 address = do + let valid_syntax = validate_syntax rfc5322 address if valid_syntax then do let (_,domain) = parts address mx_result <- validate_mx resolver domain @@ -130,7 +130,8 @@ main = do rs <- makeResolvSeed resolv_conf withResolver rs $ \resolver -> do -- Construst a list of [IO (Address, Bool)] - let actions = map (validate resolver accept_a) nonempty_addresses + let validate' = validate resolver accept_a rfc5322 + let actions = map validate' nonempty_addresses -- And compute them in parallel. results <- parallel actions stopGlobalPool -- 2.43.2