X-Git-Url: https://gitweb.michael.orlitzky.com/?a=blobdiff_plain;f=src%2FMain.hs;h=1cc4c73aa47d963e56f30a7188f05781f010b76c;hb=2e8aab2a69a8b9887e4f037115b80a2063d3a6b7;hp=bd3cc45f5ff741e936243c9de0cf5baa84f0e1ce;hpb=c9c873eead8a45dd7df8e8ef1f5861a5aa741d9e;p=email-validator.git diff --git a/src/Main.hs b/src/Main.hs index bd3cc45..1cc4c73 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -1,114 +1,131 @@ {-# LANGUAGE DoAndIfThenElse #-} +{-# LANGUAGE RecordWildCards #-} -module Main +module Main (main) where -import Control.Concurrent.ParallelIO.Global (parallel, stopGlobalPool) -import qualified Data.ByteString as BS -import qualified Data.ByteString.UTF8 as BSU +import Control.Concurrent.ParallelIO.Global ( + parallelInterleaved, + stopGlobalPool ) +import qualified Data.ByteString.Char8 as BS ( + hGetContents, + hPutStrLn, + lines, + null, + pack ) import Network.DNS ( Domain, Resolver, - ResolvConf(..), + ResolvConf( resolvTimeout ), defaultResolvConf, makeResolvSeed, - withResolver) -import Network.DNS.Lookup (lookupMX) -import System.IO (hGetContents, hFlush, hPutStrLn, stdin, stdout) -import Text.Regex.PCRE.Light (compile, match, utf8) + withResolver ) +import Network.DNS.Lookup ( lookupA, lookupMX ) +import System.IO ( + hFlush, + stdin, + stdout ) -type Address = BSU.ByteString --- | Resolver parameters. We increase the default timeout from 3 to 5 +import CommandLine ( + Args( Args, accept_a, rfc5322 ), + get_args ) +import EmailAddress( + Address, + parts, + validate_syntax ) + + +-- | Resolver parameters. We increase the default timeout from 3 to 10 -- seconds. resolv_conf :: ResolvConf -resolv_conf = defaultResolvConf { resolvTimeout = 5 * 1000 * 1000 } +resolv_conf = defaultResolvConf { resolvTimeout = 10 * 1000 * 1000 } -- | A list of common domains, there's no need to waste MX lookups -- on these. common_domains :: [Domain] -common_domains = map BSU.fromString [ "aol.com", - "comcast.net", - "gmail.com", - "msn.com", - "yahoo.com", - "verizon.net" ] +common_domains = map BS.pack [ "aol.com", + "comcast.net", + "gmail.com", + "msn.com", + "yahoo.com", + "verizon.net" ] + +-- | Check whether the given domain has a valid MX record. validate_mx :: Resolver -> Domain -> IO Bool validate_mx resolver domain | domain `elem` common_domains = return True | otherwise = do result <- lookupMX resolver domain case result of - Nothing -> return False - _ -> return True - --- | Split an address into local/domain parts. -parts :: Address -> (BSU.ByteString, BSU.ByteString) -parts address = bytestring_split address '@' - --- | Check that the lengths of the local/domain parts are within spec. -validate_length :: Address -> Bool -validate_length address = - (BSU.length localpart <= 64) && (BSU.length domain <= 255) - where - (localpart, domain) = parts address - --- | Validate an email address against a simple regex. This should --- catch common addresses but disallows a lot of (legal) weird stuff. -validate_regex :: Address -> Bool -validate_regex address = - case matches of - Nothing -> False - _ -> True - where - regex_str = "(\\w+)([\\w\\-\\.]*)@(([0-9a-zA-Z\\-]+\\.)+)[a-zA-Z]{2,4}" - regex_bs = BSU.fromString regex_str - regex = compile regex_bs [utf8] - matches = match regex address [] - --- | Validate the syntax of an email address by checking its length --- and validating it against a simple regex. -validate_syntax :: Address -> Bool -validate_syntax address = - (validate_length address) && (validate_regex address) - --- | Split a 'ByteString' @s@ at the first occurrence of @character@. -bytestring_split :: BSU.ByteString -> Char -> (BSU.ByteString, BSU.ByteString) -bytestring_split s character = - (before, after) - where - break_func = (== character) - (before, rest) = BSU.break break_func s - after = BS.tail rest + -- A list of one or more elements? + Right (_:_) -> return True + _ -> return False + + +-- | Check whether the given domain has a valid A record. +validate_a :: Resolver -> Domain -> IO Bool +validate_a resolver domain + | domain `elem` common_domains = return True + | otherwise = do + result <- lookupA resolver domain + case result of + Right (_:_) -> return True + _ -> return False -- | Validate an email address by doing some simple syntax checks and -- (if those fail) an MX lookup. We don't count an A record as a mail -- exchanger. -validate :: Resolver -> Address -> IO (Address, Bool) -validate resolver address = do - let valid_syntax = validate_syntax address +validate :: Resolver -> Bool -> Bool -> Address -> IO (Address, Bool) +validate resolver accept_a rfc5322 address = do + let valid_syntax = validate_syntax rfc5322 address if valid_syntax then do let (_,domain) = parts address mx_result <- validate_mx resolver domain - return (address, mx_result) - else do + if mx_result + then return (address, True) + else + if accept_a + then do + a_result <- validate_a resolver domain + return (address, a_result) + else + return (address, False) + else return (address, False) + + main :: IO () main = do - input <- hGetContents stdin + Args{..} <- get_args - let addresses = BSU.lines $ BSU.fromString input + -- Split stdin into lines, which should result in a list of addresses. + input <- BS.hGetContents stdin + let addresses = BS.lines input + + -- And remove the empty ones. let nonempty_addresses = filter (not . BS.null) addresses rs <- makeResolvSeed resolv_conf - withResolver rs $ \resolver -> do - let actions = map (validate resolver) nonempty_addresses - results <- parallel actions - let good_pairs = filter snd results - mapM_ ((hPutStrLn stdout) . BSU.toString . fst) good_pairs + let validate' addr = withResolver rs $ \resolver -> + validate resolver accept_a rfc5322 addr + + -- Construct a list of [IO (Address, Bool)]. The withResolver calls + -- are the ones that should be run in parallel. + let actions = map validate' nonempty_addresses + + -- Run the lookup actions in parallel. + results <- parallelInterleaved actions + + -- Filter the bad ones. + let valid_results = filter snd results + + -- Output the results. + let valid_addresses = map fst valid_results + mapM_ (BS.hPutStrLn stdout) valid_addresses stopGlobalPool hFlush stdout