From eae92cebe7fab4afc5c7de377ec30ec348ef09ad Mon Sep 17 00:00:00 2001 From: Michael Orlitzky Date: Sat, 5 Oct 2013 15:08:21 -0400 Subject: [PATCH] Bump the version to 0.0.2. Drop the utf8-string dependency. Replace all utf8-string functions and types with Data.ByteString.Char8. Fix the parallelism (now it actual runs in parallel). Update DNS functions for dns-1.0.0. --- email-validator.cabal | 12 +++---- src/EmailAddress.hs | 31 ++++++++++-------- src/Main.hs | 74 +++++++++++++++++++++++-------------------- 3 files changed, 63 insertions(+), 54 deletions(-) diff --git a/email-validator.cabal b/email-validator.cabal index 5efa913..d86f246 100644 --- a/email-validator.cabal +++ b/email-validator.cabal @@ -1,5 +1,5 @@ name: email-validator -version: 0.0.1 +version: 0.0.2 cabal-version: >= 1.8 author: Michael Orlitzky maintainer: Michael Orlitzky @@ -52,14 +52,13 @@ executable email-validator bytestring == 0.10.*, cmdargs == 0.10.*, directory == 1.2.*, - dns == 0.3.*, + dns == 1.*, email-validate == 1.*, HUnit == 1.2.*, parallel-io == 0.3.*, pcre-light >= 0.4, test-framework == 0.8.*, - test-framework-hunit == 0.3.*, - utf8-string == 0.3.* + test-framework-hunit == 0.3.* main-is: Main.hs @@ -104,14 +103,13 @@ test-suite testsuite bytestring == 0.10.*, cmdargs == 0.10.*, directory == 1.2.*, - dns == 0.3.*, + dns == 1.*, email-validate == 1.*, HUnit == 1.2.*, parallel-io == 0.3.*, pcre-light >= 0.4, test-framework == 0.8.*, - test-framework-hunit == 0.3.*, - utf8-string == 0.3.* + test-framework-hunit == 0.3.* -- It's not entirely clear to me why I have to reproduce all of this. ghc-options: diff --git a/src/EmailAddress.hs b/src/EmailAddress.hs index 88feafd..436d3f5 100644 --- a/src/EmailAddress.hs +++ b/src/EmailAddress.hs @@ -1,8 +1,13 @@ module EmailAddress where -import qualified Data.ByteString as BS -import qualified Data.ByteString.UTF8 as BSU +import qualified Data.ByteString.Char8 as BS ( + ByteString, + break, + empty, + length, + pack, + tail) import Text.Email.Validate (isValid) import Test.HUnit (assertEqual) import Test.Framework (Test, testGroup) @@ -16,9 +21,9 @@ import Text.Regex.PCRE.Light ( utf8 ) -type Address = BSU.ByteString -type LocalPart = BSU.ByteString -type DomainPart = BSU.ByteString +type Address = BS.ByteString +type LocalPart = BS.ByteString +type DomainPart = BS.ByteString @@ -28,7 +33,7 @@ parts address = (before, after) where break_func = (== '@') - (before, rest) = BSU.break break_func address + (before, rest) = BS.break break_func address after = if rest == BS.empty then BS.empty @@ -38,7 +43,7 @@ parts address = -- | Check that the lengths of the local/domain parts are within spec. validate_length :: Address -> Bool validate_length address = - (BSU.length localpart <= 64) && (BSU.length domain <= 255) + (BS.length localpart <= 64) && (BS.length domain <= 255) where (localpart, domain) = parts address @@ -51,7 +56,7 @@ validate_regex address = _ -> True where regex_str = "(\\w+)([\\w\\-\\.]*)@(([a-z0-9\\-]+\\.)+)[a-z]{2,4}$" - regex_bs = BSU.fromString regex_str + regex_bs = BS.pack regex_str regex = compile regex_bs [anchored, caseless, dollar_endonly, utf8] matches = match regex address [] @@ -70,7 +75,7 @@ validate_syntax rfc5322 address = -- HUnit tests good_addresses :: [Address] good_addresses = - map BSU.fromString [ + map BS.pack [ "phil@hotmail.com", "philq23562@hotmail.com", "gsdfg22-2_22@hot-mail.com", @@ -80,9 +85,9 @@ good_addresses = bad_addresses :: [Address] bad_addresses = - map BSU.fromString [ --- Bad, but not caught by email-validate-1.0.0. --- "badunderscore@dom_ain.com", + map BS.pack [ + -- Bad, but not caught by email-validate-0.0.1. + -- "badunderscore@dom_ain.com", "(fail)@domain.com", "no spaces@domain.com", ".beginswith@a-dot.com", @@ -96,7 +101,7 @@ bad_addresses = unsupported_addresses :: [Address] unsupported_addresses = - map BSU.fromString [ + map BS.pack [ "ok!char@domain.com", "ok#char@domain.com", "ok$char@domain.com", diff --git a/src/Main.hs b/src/Main.hs index ac72ad5..0e7c30f 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -4,10 +4,17 @@ module Main where -import Control.Concurrent.ParallelIO.Global (parallel, stopGlobalPool) +import Control.Concurrent.ParallelIO.Global ( + parallelInterleaved, + stopGlobalPool) import Control.Monad (unless) -import qualified Data.ByteString as BS -import qualified Data.ByteString.UTF8 as BSU +import qualified Data.ByteString.Char8 as BS ( + hGetContents, + hPutStrLn, + lines, + null, + pack, + readFile) import Network.DNS ( Domain, Resolver, @@ -19,7 +26,6 @@ import Network.DNS.Lookup (lookupA, lookupMX) import System.Directory (doesFileExist) import System.Exit (exitWith, ExitCode(..)) import System.IO ( - Handle, IOMode( WriteMode ), hClose, hFlush, @@ -41,12 +47,12 @@ resolv_conf = defaultResolvConf { resolvTimeout = 10 * 1000 * 1000 } -- | A list of common domains, there's no need to waste MX lookups -- on these. common_domains :: [Domain] -common_domains = map BSU.fromString [ "aol.com", - "comcast.net", - "gmail.com", - "msn.com", - "yahoo.com", - "verizon.net" ] +common_domains = map BS.pack [ "aol.com", + "comcast.net", + "gmail.com", + "msn.com", + "yahoo.com", + "verizon.net" ] -- | Check whether the given domain has a valid MX record. @@ -56,8 +62,9 @@ validate_mx resolver domain | otherwise = do result <- lookupMX resolver domain case result of - Nothing -> return False - _ -> return True + -- A list of one or more elements? + Right (_:_) -> return True + _ -> return False -- | Check whether the given domain has a valid A record. @@ -67,8 +74,8 @@ validate_a resolver domain | otherwise = do result <- lookupA resolver domain case result of - Nothing -> return False - _ -> return True + Right (_:_) -> return True + _ -> return False -- | Validate an email address by doing some simple syntax checks and @@ -93,14 +100,6 @@ validate resolver accept_a rfc5322 address = do return (address, False) --- | Append a ByteString to a file Handle, followed by a newline. -append_handle_with_newline :: Handle -> BS.ByteString -> IO () -append_handle_with_newline h bs = do - BS.hPutStr h bs - BS.hPutStr h newline - where - newline = BSU.fromString "\n" - main :: IO () main = do @@ -122,23 +121,30 @@ main = do Just path -> openFile path WriteMode -- Split the input into lines. - let addresses = BSU.lines input + let addresses = BS.lines input -- And remove the empty ones. let nonempty_addresses = filter (not . BS.null) addresses rs <- makeResolvSeed resolv_conf - withResolver rs $ \resolver -> do - -- Construst a list of [IO (Address, Bool)] - let validate' = validate resolver accept_a rfc5322 - let actions = map validate' nonempty_addresses - -- And compute them in parallel. - results <- parallel actions - stopGlobalPool - -- Find the pairs with a True in the second position. - let good_pairs = filter snd results - -- And output the results. - mapM_ ((append_handle_with_newline output_handle) . fst) good_pairs + let validate' addr = withResolver rs $ \resolver -> + validate resolver accept_a rfc5322 addr + + -- Construct a list of [IO (Address, Bool)]. The withResolver calls + -- are the ones that should be run in parallel. + let actions = map validate' nonempty_addresses + + -- Run the lookup actions in parallel. + results <- parallelInterleaved actions + + -- Filter the bad ones. + let valid_results = filter snd results + + -- Output the results. + let valid_addresses = map fst valid_results + _ <- mapM (BS.hPutStrLn output_handle) valid_addresses + + stopGlobalPool -- Clean up. It's safe to try to close stdout. hFlush output_handle -- 2.43.2