X-Git-Url: http://gitweb.michael.orlitzky.com/?a=blobdiff_plain;ds=sidebyside;f=src%2FMain.hs;h=1cc4c73aa47d963e56f30a7188f05781f010b76c;hb=HEAD;hp=3e99d0d006ccdcf8ee8e7e105ae4a74636187a95;hpb=9c30c25bb499e843a40062e2bd1e118c515aa159;p=email-validator.git diff --git a/src/Main.hs b/src/Main.hs index 3e99d0d..ab5f93a 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -1,62 +1,173 @@ {-# LANGUAGE DoAndIfThenElse #-} +{-# LANGUAGE RecordWildCards #-} -module Main +module Main (main) where -import Control.Monad (filterM) -import qualified Data.ByteString.UTF8 as BS +import Control.Concurrent.ParallelIO.Global ( + parallelInterleaved, + stopGlobalPool ) +import qualified Data.ByteString.Char8 as BS ( + hGetContents, + hPutStrLn, + lines, + null, + pack ) import Network.DNS ( Domain, Resolver, - ResolvConf(..), + ResolvConf( resolvTimeout ), defaultResolvConf, makeResolvSeed, - withResolver) -import Network.DNS.Lookup (lookupMX) -import System.IO (hGetContents, hFlush, hPutStrLn, stdin, stdout) + withResolver ) +import Network.DNS.Lookup ( lookupA, lookupMX ) +import System.IO ( + hFlush, + stdin, + stdout ) -type Address = BS.ByteString +import CommandLine ( + Args( Args, accept_a, rfc5322 ), + get_args ) +import EmailAddress( + Address, + parts, + validate_syntax ) + +-- | Resolver parameters. We increase the default timeout from 3 to 10 +-- seconds. resolv_conf :: ResolvConf -resolv_conf = defaultResolvConf { resolvTimeout = 5 * 1000 * 1000 } +resolv_conf = defaultResolvConf { resolvTimeout = 10 * 1000 * 1000 } + +-- | A list of common domains, there's no need to waste MX lookups on +-- these. This is a very limited list; I don't want to be in the +-- business of monitoring a million domains for MX record updates. +common_domains :: [Domain] +common_domains = map BS.pack [ "aol.com", + "comcast.net", + "cox.net", + "gmail.com", + "gmx.de", + "googlemail.com", + "hotmail.com", + "icloud.com", + "live.com", + "me.com", + "msn.com", + "outlook.com", + "proton.me", + "protonmail.ch", + "protonmail.com", + "yahoo.com", + "verizon.net" ] + -validate_mx :: Resolver -> Domain -> IO Bool -validate_mx resolver domain = do - result <- lookupMX resolver domain - case result of - Nothing -> return False - _ -> return True +-- | Check whether the given domain has a valid MX record. +-- +-- NULLMX (RFC7505) records consisting of a single period must not +-- be accepted. Moreover, the existence of a NULLMX must be reported +-- back to the caller because the whole point of a NULLMX is that +-- its existence should preempt an @A@ record check. We abuse the +-- return type for this, and return @Nothing@ in the event of a +-- NULLMX. Otherwise we return @Just True@ or @Just False@ to +-- indicate the existence (or not) of MX records. +-- +-- RFC7505 states that a domain MUST NOT have any other MX records +-- if it has a NULLMX record. We enforce this. If you have a NULLMX +-- record and some other MX record, we consider the set invalid. +-- +validate_mx :: Resolver -> Domain -> IO (Maybe Bool) +validate_mx resolver domain + | domain `elem` common_domains = return $ Just True + | otherwise = do + result <- lookupMX resolver domain + case result of + Left _ -> + return $ Just False + Right mxs -> + case mxs of + [] -> return $ Just False + _ -> if any (is_null) mxs + then return Nothing + else return $ Just True + where + nullmx :: Domain + nullmx = BS.pack "." -validate_syntax :: Address -> IO Bool -validate_syntax address = do - return True + is_null :: (Domain,Int) -> Bool + is_null (mx,prio) = mx == nullmx && prio == 0 -utf8_split = undefined +-- | Check whether the given domain has a valid A record. +validate_a :: Resolver -> Domain -> IO Bool +validate_a resolver domain + | domain `elem` common_domains = return True + | otherwise = do + result <- lookupA resolver domain + case result of + Right (_:_) -> return True + _ -> return False -validate :: Resolver -> Address -> IO Bool -validate resolver address = do - valid_syntax <- validate_syntax address + +-- | Validate an email address by doing some simple syntax checks and +-- (if those fail) an MX lookup. We don't count an @A@ record as a mail +-- exchanger unless @accept_a@ is True. And even then, the existence +-- of a NULLMX record will preclude the acceptance of an @A@ record. +-- The domain @example.com@ is a great test case for this behavior. +validate :: Resolver -> Bool -> Bool -> Address -> IO (Address, Bool) +validate resolver accept_a rfc5322 address = do + let valid_syntax = validate_syntax rfc5322 address if valid_syntax then do - let domain = utf8_split address (BS.fromString "@") - validate_mx resolver domain - else do - return False + let (_,domain) = parts address + mx_result <- validate_mx resolver domain + case mx_result of + Nothing -> + -- NULLMX, don't fall back to 'A' records under any + -- circumstances. + return (address, False) + Just mxr -> + if mxr + then return (address, True) + else + if accept_a + then do + a_result <- validate_a resolver domain + return (address, a_result) + else + return (address, False) + else + return (address, False) -empty_string :: BS.ByteString -empty_string = BS.fromString "" main :: IO () main = do - input <- hGetContents stdin + Args{..} <- get_args + + -- Split stdin into lines, which should result in a list of addresses. + input <- BS.hGetContents stdin + let addresses = BS.lines input - let addresses = BS.lines $ BS.fromString input - let nonempty_addresses = filter (/= empty_string) addresses + -- And remove the empty ones. + let nonempty_addresses = filter (not . BS.null) addresses rs <- makeResolvSeed resolv_conf - withResolver rs $ \resolver -> do - good_addresses <- filterM (validate resolver) nonempty_addresses - let good_address_strings = map BS.toString good_addresses - mapM_ (hPutStrLn stdout) good_address_strings + let validate' addr = withResolver rs $ \resolver -> + validate resolver accept_a rfc5322 addr + + -- Construct a list of [IO (Address, Bool)]. The withResolver calls + -- are the ones that should be run in parallel. + let actions = map validate' nonempty_addresses + + -- Run the lookup actions in parallel. + results <- parallelInterleaved actions + + -- Filter the bad ones. + let valid_results = filter snd results + + -- Output the results. + let valid_addresses = map fst valid_results + mapM_ (BS.hPutStrLn stdout) valid_addresses + stopGlobalPool hFlush stdout