module Main
where
-import Control.Concurrent.ParallelIO.Global (parallel, stopGlobalPool)
-import Control.Monad (when)
-import qualified Data.ByteString as BS
-import qualified Data.ByteString.UTF8 as BSU
+import Control.Concurrent.ParallelIO.Global (
+ parallelInterleaved,
+ stopGlobalPool )
+import Control.Monad ( unless )
+import qualified Data.ByteString.Char8 as BS (
+ hGetContents,
+ hPutStrLn,
+ lines,
+ null,
+ pack,
+ readFile )
import Network.DNS (
Domain,
Resolver,
ResolvConf(..),
defaultResolvConf,
makeResolvSeed,
- withResolver)
-import Network.DNS.Lookup (lookupMX)
-import System.Directory (doesFileExist)
-import System.Exit (exitWith, ExitCode(..))
+ withResolver )
+import Network.DNS.Lookup ( lookupA, lookupMX )
+import System.Directory ( doesFileExist )
+import System.Exit ( exitWith, ExitCode(..) )
import System.IO (
- Handle,
IOMode( WriteMode ),
hClose,
hFlush,
openFile,
stdin,
- stdout)
-import Text.Regex.PCRE.Light (compile, match, utf8)
+ stdout )
-import CommandLine (Args(..), apply_args)
-import ExitCodes (exit_input_file_doesnt_exist)
-type Address = BSU.ByteString
+import CommandLine ( Args(..), get_args )
+import EmailAddress(
+ Address,
+ parts,
+ validate_syntax )
+import ExitCodes ( exit_input_file_doesnt_exist )
--- | Resolver parameters. We increase the default timeout from 3 to 5
+
+-- | Resolver parameters. We increase the default timeout from 3 to 10
-- seconds.
resolv_conf :: ResolvConf
-resolv_conf = defaultResolvConf { resolvTimeout = 5 * 1000 * 1000 }
+resolv_conf = defaultResolvConf { resolvTimeout = 10 * 1000 * 1000 }
-- | A list of common domains, there's no need to waste MX lookups
-- on these.
common_domains :: [Domain]
-common_domains = map BSU.fromString [ "aol.com",
- "comcast.net",
- "gmail.com",
- "msn.com",
- "yahoo.com",
- "verizon.net" ]
+common_domains = map BS.pack [ "aol.com",
+ "comcast.net",
+ "gmail.com",
+ "msn.com",
+ "yahoo.com",
+ "verizon.net" ]
+
+-- | Check whether the given domain has a valid MX record.
validate_mx :: Resolver -> Domain -> IO Bool
validate_mx resolver domain
| domain `elem` common_domains = return True
| otherwise = do
result <- lookupMX resolver domain
case result of
- Nothing -> return False
- _ -> return True
-
--- | Split an address into local/domain parts.
-parts :: Address -> (BSU.ByteString, BSU.ByteString)
-parts address = bytestring_split address '@'
-
--- | Check that the lengths of the local/domain parts are within spec.
-validate_length :: Address -> Bool
-validate_length address =
- (BSU.length localpart <= 64) && (BSU.length domain <= 255)
- where
- (localpart, domain) = parts address
-
--- | Validate an email address against a simple regex. This should
--- catch common addresses but disallows a lot of (legal) weird stuff.
-validate_regex :: Address -> Bool
-validate_regex address =
- case matches of
- Nothing -> False
- _ -> True
- where
- regex_str = "(\\w+)([\\w\\-\\.]*)@(([0-9a-zA-Z\\-]+\\.)+)[a-zA-Z]{2,4}"
- regex_bs = BSU.fromString regex_str
- regex = compile regex_bs [utf8]
- matches = match regex address []
-
--- | Validate the syntax of an email address by checking its length
--- and validating it against a simple regex.
-validate_syntax :: Address -> Bool
-validate_syntax address =
- (validate_length address) && (validate_regex address)
-
--- | Split a 'ByteString' @s@ at the first occurrence of @character@.
-bytestring_split :: BSU.ByteString -> Char -> (BSU.ByteString, BSU.ByteString)
-bytestring_split s character =
- (before, after)
- where
- break_func = (== character)
- (before, rest) = BSU.break break_func s
- after = BS.tail rest
+ -- A list of one or more elements?
+ Right (_:_) -> return True
+ _ -> return False
+
+
+-- | Check whether the given domain has a valid A record.
+validate_a :: Resolver -> Domain -> IO Bool
+validate_a resolver domain
+ | domain `elem` common_domains = return True
+ | otherwise = do
+ result <- lookupA resolver domain
+ case result of
+ Right (_:_) -> return True
+ _ -> return False
-- | Validate an email address by doing some simple syntax checks and
-- (if those fail) an MX lookup. We don't count an A record as a mail
-- exchanger.
-validate :: Resolver -> Address -> IO (Address, Bool)
-validate resolver address = do
- let valid_syntax = validate_syntax address
+validate :: Resolver -> Bool -> Bool -> Address -> IO (Address, Bool)
+validate resolver accept_a rfc5322 address = do
+ let valid_syntax = validate_syntax rfc5322 address
if valid_syntax then do
let (_,domain) = parts address
mx_result <- validate_mx resolver domain
- return (address, mx_result)
- else do
+ if mx_result
+ then return (address, True)
+ else
+ if accept_a
+ then do
+ a_result <- validate_a resolver domain
+ return (address, a_result)
+ else
+ return (address, False)
+ else
return (address, False)
-append_handle_with_newline :: Handle -> BS.ByteString -> IO ()
-append_handle_with_newline h bs = do
- BS.hPutStr h bs
- BS.hPutStr h newline
- where
- newline = BSU.fromString "\n"
-
main :: IO ()
main = do
- Args{..} <- apply_args
+ Args{..} <- get_args
+ -- Get the input from either stdin, or the file given on the command
+ -- line.
input <- case input_file of
Nothing -> BS.hGetContents stdin
Just path -> do
is_file <- doesFileExist path
- when (not is_file) $ do
+ unless is_file $
exitWith (ExitFailure exit_input_file_doesnt_exist)
BS.readFile path
+ -- Do the same for the output handle and stdout.
output_handle <- case output_file of
Nothing -> return stdout
Just path -> openFile path WriteMode
- let addresses = BSU.lines input
+ -- Split the input into lines.
+ let addresses = BS.lines input
+
+ -- And remove the empty ones.
let nonempty_addresses = filter (not . BS.null) addresses
rs <- makeResolvSeed resolv_conf
- withResolver rs $ \resolver -> do
- let actions = map (validate resolver) nonempty_addresses
- results <- parallel actions
- stopGlobalPool
- let good_pairs = filter snd results
- mapM_ ((append_handle_with_newline output_handle) . fst) good_pairs
- hFlush output_handle
- hClose output_handle
+ let validate' addr = withResolver rs $ \resolver ->
+ validate resolver accept_a rfc5322 addr
+
+ -- Construct a list of [IO (Address, Bool)]. The withResolver calls
+ -- are the ones that should be run in parallel.
+ let actions = map validate' nonempty_addresses
+
+ -- Run the lookup actions in parallel.
+ results <- parallelInterleaved actions
+
+ -- Filter the bad ones.
+ let valid_results = filter snd results
+
+ -- Output the results.
+ let valid_addresses = map fst valid_results
+ _ <- mapM (BS.hPutStrLn output_handle) valid_addresses
+
+ stopGlobalPool
+
+ -- Clean up. It's safe to try to close stdout.
+ hFlush output_handle
+ hClose output_handle