{-# LANGUAGE DoAndIfThenElse #-} module Main where import Control.Concurrent.ParallelIO.Global (parallel, stopGlobalPool) import qualified Data.ByteString as BS import qualified Data.ByteString.UTF8 as BSU import Network.DNS ( Domain, Resolver, ResolvConf(..), defaultResolvConf, makeResolvSeed, withResolver) import Network.DNS.Lookup (lookupMX) import System.IO (hGetContents, hFlush, hPutStrLn, stdin, stdout) import Text.Regex.PCRE.Light (compile, match, utf8) type Address = BSU.ByteString -- | Resolver parameters. We increase the default timeout from 3 to 5 -- seconds. resolv_conf :: ResolvConf resolv_conf = defaultResolvConf { resolvTimeout = 5 * 1000 * 1000 } -- | A list of common domains, there's no need to waste MX lookups -- on these. common_domains :: [Domain] common_domains = map BSU.fromString [ "aol.com", "comcast.net", "gmail.com", "msn.com", "yahoo.com", "verizon.net" ] validate_mx :: Resolver -> Domain -> IO Bool validate_mx resolver domain | domain `elem` common_domains = return True | otherwise = do result <- lookupMX resolver domain case result of Nothing -> return False _ -> return True -- | Split an address into local/domain parts. parts :: Address -> (BSU.ByteString, BSU.ByteString) parts address = bytestring_split address '@' -- | Check that the lengths of the local/domain parts are within spec. validate_length :: Address -> Bool validate_length address = (BSU.length localpart <= 64) && (BSU.length domain <= 255) where (localpart, domain) = parts address -- | Validate an email address against a simple regex. This should -- catch common addresses but disallows a lot of (legal) weird stuff. validate_regex :: Address -> Bool validate_regex address = case matches of Nothing -> False _ -> True where regex_str = "(\\w+)([\\w\\-\\.]*)@(([0-9a-zA-Z\\-]+\\.)+)[a-zA-Z]{2,4}" regex_bs = BSU.fromString regex_str regex = compile regex_bs [utf8] matches = match regex address [] -- | Validate the syntax of an email address by checking its length -- and validating it against a simple regex. validate_syntax :: Address -> Bool validate_syntax address = (validate_length address) && (validate_regex address) -- | Split a 'ByteString' @s@ at the first occurrence of @character@. bytestring_split :: BSU.ByteString -> Char -> (BSU.ByteString, BSU.ByteString) bytestring_split s character = (before, after) where break_func = (== character) (before, rest) = BSU.break break_func s after = BS.tail rest -- | Validate an email address by doing some simple syntax checks and -- (if those fail) an MX lookup. We don't count an A record as a mail -- exchanger. validate :: Resolver -> Address -> IO (Address, Bool) validate resolver address = do let valid_syntax = validate_syntax address if valid_syntax then do let (_,domain) = parts address mx_result <- validate_mx resolver domain return (address, mx_result) else do return (address, False) main :: IO () main = do input <- hGetContents stdin let addresses = BSU.lines $ BSU.fromString input let nonempty_addresses = filter (not . BS.null) addresses rs <- makeResolvSeed resolv_conf withResolver rs $ \resolver -> do let actions = map (validate resolver) nonempty_addresses results <- parallel actions let good_pairs = filter snd results mapM_ ((hPutStrLn stdout) . BSU.toString . fst) good_pairs stopGlobalPool hFlush stdout