module Main
where
-import Control.Monad (filterM)
-import qualified Data.ByteString.UTF8 as BS
+import Control.Concurrent.ParallelIO.Global (parallel, stopGlobalPool)
+import qualified Data.ByteString as BS
+import qualified Data.ByteString.UTF8 as BSU
import Network.DNS (
Domain,
Resolver,
withResolver)
import Network.DNS.Lookup (lookupMX)
import System.IO (hGetContents, hFlush, hPutStrLn, stdin, stdout)
+import Text.Regex.PCRE.Light (compile, match, utf8)
+type Address = BSU.ByteString
-type Address = BS.ByteString
-
+-- | Resolver parameters. We increase the default timeout from 3 to 5
+-- seconds.
resolv_conf :: ResolvConf
resolv_conf = defaultResolvConf { resolvTimeout = 5 * 1000 * 1000 }
+-- | A list of common domains, there's no need to waste MX lookups
+-- on these.
+common_domains :: [Domain]
+common_domains = map BSU.fromString [ "aol.com",
+ "comcast.net",
+ "gmail.com",
+ "msn.com",
+ "yahoo.com",
+ "verizon.net" ]
+
validate_mx :: Resolver -> Domain -> IO Bool
-validate_mx resolver domain = do
- result <- lookupMX resolver domain
- case result of
- Nothing -> return False
- _ -> return True
+validate_mx resolver domain
+ | domain `elem` common_domains = return True
+ | otherwise = do
+ result <- lookupMX resolver domain
+ case result of
+ Nothing -> return False
+ _ -> return True
+
+-- | Split an address into local/domain parts.
+parts :: Address -> (BSU.ByteString, BSU.ByteString)
+parts address = bytestring_split address '@'
+
+-- | Check that the lengths of the local/domain parts are within spec.
+validate_length :: Address -> Bool
+validate_length address =
+ (BSU.length localpart <= 64) && (BSU.length domain <= 255)
+ where
+ (localpart, domain) = parts address
-validate_syntax :: Address -> IO Bool
-validate_syntax address = do
- return True
+-- | Validate an email address against a simple regex. This should
+-- catch common addresses but disallows a lot of (legal) weird stuff.
+validate_regex :: Address -> Bool
+validate_regex address =
+ case matches of
+ Nothing -> False
+ _ -> True
+ where
+ regex_str = "(\\w+)([\\w\\-\\.]*)@(([0-9a-zA-Z\\-]+\\.)+)[a-zA-Z]{2,4}"
+ regex_bs = BSU.fromString regex_str
+ regex = compile regex_bs [utf8]
+ matches = match regex address []
-utf8_split = undefined
+-- | Validate the syntax of an email address by checking its length
+-- and validating it against a simple regex.
+validate_syntax :: Address -> Bool
+validate_syntax address =
+ (validate_length address) && (validate_regex address)
-validate :: Resolver -> Address -> IO Bool
+-- | Split a 'ByteString' @s@ at the first occurrence of @character@.
+bytestring_split :: BSU.ByteString -> Char -> (BSU.ByteString, BSU.ByteString)
+bytestring_split s character =
+ (before, after)
+ where
+ break_func = (== character)
+ (before, rest) = BSU.break break_func s
+ after = BS.tail rest
+
+
+-- | Validate an email address by doing some simple syntax checks and
+-- (if those fail) an MX lookup. We don't count an A record as a mail
+-- exchanger.
+validate :: Resolver -> Address -> IO (Address, Bool)
validate resolver address = do
- valid_syntax <- validate_syntax address
+ let valid_syntax = validate_syntax address
if valid_syntax then do
- let domain = utf8_split address (BS.fromString "@")
- validate_mx resolver domain
+ let (_,domain) = parts address
+ mx_result <- validate_mx resolver domain
+ return (address, mx_result)
else do
- return False
-
-empty_string :: BS.ByteString
-empty_string = BS.fromString ""
+ return (address, False)
main :: IO ()
main = do
input <- hGetContents stdin
- let addresses = BS.lines $ BS.fromString input
- let nonempty_addresses = filter (/= empty_string) addresses
+ let addresses = BSU.lines $ BSU.fromString input
+ let nonempty_addresses = filter (not . BS.null) addresses
rs <- makeResolvSeed resolv_conf
withResolver rs $ \resolver -> do
- good_addresses <- filterM (validate resolver) nonempty_addresses
- let good_address_strings = map BS.toString good_addresses
- mapM_ (hPutStrLn stdout) good_address_strings
+ let actions = map (validate resolver) nonempty_addresses
+ results <- parallel actions
+ let good_pairs = filter snd results
+ mapM_ ((hPutStrLn stdout) . BSU.toString . fst) good_pairs
+ stopGlobalPool
hFlush stdout