where
import Control.Concurrent.ParallelIO.Global (parallel, stopGlobalPool)
-import Control.Monad (when)
+import Control.Monad (unless)
import qualified Data.ByteString as BS
import qualified Data.ByteString.UTF8 as BSU
import Network.DNS (
openFile,
stdin,
stdout)
-import Text.Regex.PCRE.Light (compile, match, utf8)
+
import CommandLine (Args(..), apply_args)
+import EmailAddress
import ExitCodes (exit_input_file_doesnt_exist)
-type Address = BSU.ByteString
-- | Resolver parameters. We increase the default timeout from 3 to 5
-- seconds.
"yahoo.com",
"verizon.net" ]
+
+-- | Check whether the given domain has a valid MX record.
validate_mx :: Resolver -> Domain -> IO Bool
validate_mx resolver domain
| domain `elem` common_domains = return True
Nothing -> return False
_ -> return True
--- | Split an address into local/domain parts.
-parts :: Address -> (BSU.ByteString, BSU.ByteString)
-parts address = bytestring_split address '@'
-
--- | Check that the lengths of the local/domain parts are within spec.
-validate_length :: Address -> Bool
-validate_length address =
- (BSU.length localpart <= 64) && (BSU.length domain <= 255)
- where
- (localpart, domain) = parts address
-
--- | Validate an email address against a simple regex. This should
--- catch common addresses but disallows a lot of (legal) weird stuff.
-validate_regex :: Address -> Bool
-validate_regex address =
- case matches of
- Nothing -> False
- _ -> True
- where
- regex_str = "(\\w+)([\\w\\-\\.]*)@(([0-9a-zA-Z\\-]+\\.)+)[a-zA-Z]{2,4}"
- regex_bs = BSU.fromString regex_str
- regex = compile regex_bs [utf8]
- matches = match regex address []
-- | Validate the syntax of an email address by checking its length
-- and validating it against a simple regex.
validate_syntax address =
(validate_length address) && (validate_regex address)
--- | Split a 'ByteString' @s@ at the first occurrence of @character@.
-bytestring_split :: BSU.ByteString -> Char -> (BSU.ByteString, BSU.ByteString)
-bytestring_split s character =
- (before, after)
- where
- break_func = (== character)
- (before, rest) = BSU.break break_func s
- after = BS.tail rest
-
-- | Validate an email address by doing some simple syntax checks and
-- (if those fail) an MX lookup. We don't count an A record as a mail
let (_,domain) = parts address
mx_result <- validate_mx resolver domain
return (address, mx_result)
- else do
+ else
return (address, False)
+-- | Append a ByteString to a file Handle, followed by a newline.
append_handle_with_newline :: Handle -> BS.ByteString -> IO ()
append_handle_with_newline h bs = do
BS.hPutStr h bs
main = do
Args{..} <- apply_args
+ -- Get the input from either stdin, or the file given on the command
+ -- line.
input <- case input_file of
Nothing -> BS.hGetContents stdin
Just path -> do
is_file <- doesFileExist path
- when (not is_file) $ do
+ unless is_file $
exitWith (ExitFailure exit_input_file_doesnt_exist)
BS.readFile path
+ -- Do the same for the output handle and stdout.
output_handle <- case output_file of
Nothing -> return stdout
Just path -> openFile path WriteMode
+ -- Split the input into lines.
let addresses = BSU.lines input
+
+ -- And remove the empty ones.
let nonempty_addresses = filter (not . BS.null) addresses
rs <- makeResolvSeed resolv_conf
withResolver rs $ \resolver -> do
+ -- Construst a list of [IO (Address, Bool)]
let actions = map (validate resolver) nonempty_addresses
+ -- And compute them in parallel.
results <- parallel actions
stopGlobalPool
+ -- Find the pairs with a True in the second position.
let good_pairs = filter snd results
+ -- And output the results.
mapM_ ((append_handle_with_newline output_handle) . fst) good_pairs
- hFlush output_handle
- hClose output_handle
+
+ -- Clean up. It's safe to try to close stdout.
+ hFlush output_handle
+ hClose output_handle