{-# LANGUAGE DoAndIfThenElse #-} {-# LANGUAGE RecordWildCards #-} module Main where import Control.Concurrent.ParallelIO.Global (parallel, stopGlobalPool) import Control.Monad (when) import qualified Data.ByteString as BS import qualified Data.ByteString.UTF8 as BSU import Network.DNS ( Domain, Resolver, ResolvConf(..), defaultResolvConf, makeResolvSeed, withResolver) import Network.DNS.Lookup (lookupMX) import System.Directory (doesFileExist) import System.Exit (exitWith, ExitCode(..)) import System.IO ( Handle, IOMode( WriteMode ), hClose, hFlush, openFile, stdin, stdout) import Text.Regex.PCRE.Light (compile, match, utf8) import CommandLine (Args(..), apply_args) import ExitCodes (exit_input_file_doesnt_exist) type Address = BSU.ByteString -- | Resolver parameters. We increase the default timeout from 3 to 5 -- seconds. resolv_conf :: ResolvConf resolv_conf = defaultResolvConf { resolvTimeout = 5 * 1000 * 1000 } -- | A list of common domains, there's no need to waste MX lookups -- on these. common_domains :: [Domain] common_domains = map BSU.fromString [ "aol.com", "comcast.net", "gmail.com", "msn.com", "yahoo.com", "verizon.net" ] -- | Check whether the given domain has a valid MX record. validate_mx :: Resolver -> Domain -> IO Bool validate_mx resolver domain | domain `elem` common_domains = return True | otherwise = do result <- lookupMX resolver domain case result of Nothing -> return False _ -> return True -- | Split an address into local/domain parts. parts :: Address -> (BSU.ByteString, BSU.ByteString) parts address = bytestring_split address '@' -- | Check that the lengths of the local/domain parts are within spec. validate_length :: Address -> Bool validate_length address = (BSU.length localpart <= 64) && (BSU.length domain <= 255) where (localpart, domain) = parts address -- | Validate an email address against a simple regex. This should -- catch common addresses but disallows a lot of (legal) weird stuff. validate_regex :: Address -> Bool validate_regex address = case matches of Nothing -> False _ -> True where regex_str = "(\\w+)([\\w\\-\\.]*)@(([0-9a-zA-Z\\-]+\\.)+)[a-zA-Z]{2,4}" regex_bs = BSU.fromString regex_str regex = compile regex_bs [utf8] matches = match regex address [] -- | Validate the syntax of an email address by checking its length -- and validating it against a simple regex. validate_syntax :: Address -> Bool validate_syntax address = (validate_length address) && (validate_regex address) -- | Split a 'ByteString' @s@ at the first occurrence of @character@. bytestring_split :: BSU.ByteString -> Char -> (BSU.ByteString, BSU.ByteString) bytestring_split s character = (before, after) where break_func = (== character) (before, rest) = BSU.break break_func s after = BS.tail rest -- | Validate an email address by doing some simple syntax checks and -- (if those fail) an MX lookup. We don't count an A record as a mail -- exchanger. validate :: Resolver -> Address -> IO (Address, Bool) validate resolver address = do let valid_syntax = validate_syntax address if valid_syntax then do let (_,domain) = parts address mx_result <- validate_mx resolver domain return (address, mx_result) else do return (address, False) -- | Append a ByteString to a file Handle, followed by a newline. append_handle_with_newline :: Handle -> BS.ByteString -> IO () append_handle_with_newline h bs = do BS.hPutStr h bs BS.hPutStr h newline where newline = BSU.fromString "\n" main :: IO () main = do Args{..} <- apply_args -- Get the input from either stdin, or the file given on the command -- line. input <- case input_file of Nothing -> BS.hGetContents stdin Just path -> do is_file <- doesFileExist path when (not is_file) $ do exitWith (ExitFailure exit_input_file_doesnt_exist) BS.readFile path -- Do the same for the output handle and stdout. output_handle <- case output_file of Nothing -> return stdout Just path -> openFile path WriteMode -- Split the input into lines. let addresses = BSU.lines input -- And remove the empty ones. let nonempty_addresses = filter (not . BS.null) addresses rs <- makeResolvSeed resolv_conf withResolver rs $ \resolver -> do -- Construst a list of [IO (Address, Bool)] let actions = map (validate resolver) nonempty_addresses -- And compute them in parallel. results <- parallel actions stopGlobalPool -- Find the pairs with a True in the second position. let good_pairs = filter snd results -- And output the results. mapM_ ((append_handle_with_newline output_handle) . fst) good_pairs -- Clean up. It's safe to try to close stdout. hFlush output_handle hClose output_handle