X-Git-Url: http://gitweb.michael.orlitzky.com/?a=blobdiff_plain;f=src%2FMain.hs;h=74789e70f4e2cedbf0be6584c3b14626046bed52;hb=432089f7b63e96b6e15ad7895f0e4b6aa1a52efc;hp=9b22f5cfa20afe00ac3b82160a743781061ae754;hpb=80e83309f0de0b4b89002564c94d9d988924bf9e;p=email-validator.git diff --git a/src/Main.hs b/src/Main.hs index 9b22f5c..74789e7 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -1,126 +1,164 @@ {-# LANGUAGE DoAndIfThenElse #-} {-# LANGUAGE RecordWildCards #-} -module Main +module Main (main) where -import Control.Concurrent.ParallelIO.Global (parallel, stopGlobalPool) -import Control.Monad (unless) -import qualified Data.ByteString as BS -import qualified Data.ByteString.UTF8 as BSU +import Control.Concurrent.ParallelIO.Global ( + parallelInterleaved, + stopGlobalPool ) +import qualified Data.ByteString.Char8 as BS ( + hGetContents, + hPutStrLn, + lines, + null, + pack ) import Network.DNS ( Domain, Resolver, - ResolvConf(..), + ResolvConf( resolvTimeout ), defaultResolvConf, makeResolvSeed, - withResolver) -import Network.DNS.Lookup (lookupMX) -import System.Directory (doesFileExist) -import System.Exit (exitWith, ExitCode(..)) + withResolver ) +import Network.DNS.Lookup ( lookupA, lookupMX ) import System.IO ( - Handle, - IOMode( WriteMode ), - hClose, hFlush, - openFile, stdin, - stdout) + stdout ) -import CommandLine (Args(..), apply_args) -import EmailAddress -import ExitCodes (exit_input_file_doesnt_exist) +import CommandLine ( + Args( Args, accept_a, rfc5322 ), + get_args ) +import EmailAddress( + Address, + parts, + validate_syntax ) --- | Resolver parameters. We increase the default timeout from 3 to 5 +-- | Resolver parameters. We increase the default timeout from 3 to 10 -- seconds. resolv_conf :: ResolvConf -resolv_conf = defaultResolvConf { resolvTimeout = 5 * 1000 * 1000 } +resolv_conf = defaultResolvConf { resolvTimeout = 10 * 1000 * 1000 } --- | A list of common domains, there's no need to waste MX lookups --- on these. +-- | A list of common domains, there's no need to waste MX lookups on +-- these. This is a very limited list; I don't want to be in the +-- business of monitoring a million domains for MX record updates. common_domains :: [Domain] -common_domains = map BSU.fromString [ "aol.com", - "comcast.net", - "gmail.com", - "msn.com", - "yahoo.com", - "verizon.net" ] +common_domains = map BS.pack [ "aol.com", + "comcast.net", + "cox.net", + "gmail.com", + "gmx.de", + "googlemail.com", + "hotmail.com", + "icloud.com", + "live.com", + "me.com", + "msn.com", + "outlook.com", + "proton.me", + "protonmail.ch", + "protonmail.com", + "yahoo.com", + "verizon.net" ] + + +-- | Check whether the given domain has a valid MX record. NULLMX +-- (RFC7505) records consisting of a single period must not be +-- accepted. +-- +-- Two points about NULLMX: +-- +-- * RFC7505 states that a domain MUST NOT have any other MX records +-- if it has a NULLMX record. We don't enforce this. If you have a +-- NULLMX record and some other MX record, we will reluctantly +-- consider the second one valid. +-- +-- * RFC7505 also states that a NULLMX record must have a priority +-- of 0. We do not enforce this either. We ignore any records +-- containing an empty label (i.e. a single dot). Such a record will +-- not be deliverable anyway, and in light of the first item, means +-- that we will not \"incorrectly\" reject batshit-crazy domains +-- that have a NULLMX record (but with a non-zero priority) in +-- addition to other, valid MX records. +-- - --- | Check whether the given domain has a valid MX record. validate_mx :: Resolver -> Domain -> IO Bool validate_mx resolver domain | domain `elem` common_domains = return True | otherwise = do result <- lookupMX resolver domain - case result of - Nothing -> return False - _ -> return True + let nullmx = BS.pack "." :: Domain + let non_null = (\(mx,_) -> mx /= nullmx) :: (Domain,Int) -> Bool + let non_null_mxs = fmap (filter non_null) result + case non_null_mxs of + Right (_:_) -> return True + _ -> return False +-- | Check whether the given domain has a valid A record. +validate_a :: Resolver -> Domain -> IO Bool +validate_a resolver domain + | domain `elem` common_domains = return True + | otherwise = do + result <- lookupA resolver domain + case result of + Right (_:_) -> return True + _ -> return False + -- | Validate an email address by doing some simple syntax checks and -- (if those fail) an MX lookup. We don't count an A record as a mail -- exchanger. -validate :: Resolver -> Address -> IO (Address, Bool) -validate resolver address = do - let valid_syntax = validate_syntax address +validate :: Resolver -> Bool -> Bool -> Address -> IO (Address, Bool) +validate resolver accept_a rfc5322 address = do + let valid_syntax = validate_syntax rfc5322 address if valid_syntax then do let (_,domain) = parts address mx_result <- validate_mx resolver domain - return (address, mx_result) + if mx_result + then return (address, True) + else + if accept_a + then do + a_result <- validate_a resolver domain + return (address, a_result) + else + return (address, False) else return (address, False) --- | Append a ByteString to a file Handle, followed by a newline. -append_handle_with_newline :: Handle -> BS.ByteString -> IO () -append_handle_with_newline h bs = do - BS.hPutStr h bs - BS.hPutStr h newline - where - newline = BSU.fromString "\n" - main :: IO () main = do - Args{..} <- apply_args - - -- Get the input from either stdin, or the file given on the command - -- line. - input <- case input_file of - Nothing -> BS.hGetContents stdin - Just path -> do - is_file <- doesFileExist path - unless is_file $ - exitWith (ExitFailure exit_input_file_doesnt_exist) - BS.readFile path - - -- Do the same for the output handle and stdout. - output_handle <- case output_file of - Nothing -> return stdout - Just path -> openFile path WriteMode - - -- Split the input into lines. - let addresses = BSU.lines input + Args{..} <- get_args + + -- Split stdin into lines, which should result in a list of addresses. + input <- BS.hGetContents stdin + let addresses = BS.lines input -- And remove the empty ones. let nonempty_addresses = filter (not . BS.null) addresses rs <- makeResolvSeed resolv_conf - withResolver rs $ \resolver -> do - -- Construst a list of [IO (Address, Bool)] - let actions = map (validate resolver) nonempty_addresses - -- And compute them in parallel. - results <- parallel actions - stopGlobalPool - -- Find the pairs with a True in the second position. - let good_pairs = filter snd results - -- And output the results. - mapM_ ((append_handle_with_newline output_handle) . fst) good_pairs - - -- Clean up. It's safe to try to close stdout. - hFlush output_handle - hClose output_handle + let validate' addr = withResolver rs $ \resolver -> + validate resolver accept_a rfc5322 addr + + -- Construct a list of [IO (Address, Bool)]. The withResolver calls + -- are the ones that should be run in parallel. + let actions = map validate' nonempty_addresses + + -- Run the lookup actions in parallel. + results <- parallelInterleaved actions + + -- Filter the bad ones. + let valid_results = filter snd results + + -- Output the results. + let valid_addresses = map fst valid_results + mapM_ (BS.hPutStrLn stdout) valid_addresses + + stopGlobalPool + hFlush stdout