X-Git-Url: http://gitweb.michael.orlitzky.com/?a=blobdiff_plain;f=src%2FMain.hs;h=9b22f5cfa20afe00ac3b82160a743781061ae754;hb=80e83309f0de0b4b89002564c94d9d988924bf9e;hp=bd3cc45f5ff741e936243c9de0cf5baa84f0e1ce;hpb=c9c873eead8a45dd7df8e8ef1f5861a5aa741d9e;p=email-validator.git diff --git a/src/Main.hs b/src/Main.hs index bd3cc45..9b22f5c 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -1,9 +1,11 @@ {-# LANGUAGE DoAndIfThenElse #-} +{-# LANGUAGE RecordWildCards #-} module Main where import Control.Concurrent.ParallelIO.Global (parallel, stopGlobalPool) +import Control.Monad (unless) import qualified Data.ByteString as BS import qualified Data.ByteString.UTF8 as BSU import Network.DNS ( @@ -14,10 +16,22 @@ import Network.DNS ( makeResolvSeed, withResolver) import Network.DNS.Lookup (lookupMX) -import System.IO (hGetContents, hFlush, hPutStrLn, stdin, stdout) -import Text.Regex.PCRE.Light (compile, match, utf8) +import System.Directory (doesFileExist) +import System.Exit (exitWith, ExitCode(..)) +import System.IO ( + Handle, + IOMode( WriteMode ), + hClose, + hFlush, + openFile, + stdin, + stdout) + + +import CommandLine (Args(..), apply_args) +import EmailAddress +import ExitCodes (exit_input_file_doesnt_exist) -type Address = BSU.ByteString -- | Resolver parameters. We increase the default timeout from 3 to 5 -- seconds. @@ -34,6 +48,8 @@ common_domains = map BSU.fromString [ "aol.com", "yahoo.com", "verizon.net" ] + +-- | Check whether the given domain has a valid MX record. validate_mx :: Resolver -> Domain -> IO Bool validate_mx resolver domain | domain `elem` common_domains = return True @@ -43,44 +59,6 @@ validate_mx resolver domain Nothing -> return False _ -> return True --- | Split an address into local/domain parts. -parts :: Address -> (BSU.ByteString, BSU.ByteString) -parts address = bytestring_split address '@' - --- | Check that the lengths of the local/domain parts are within spec. -validate_length :: Address -> Bool -validate_length address = - (BSU.length localpart <= 64) && (BSU.length domain <= 255) - where - (localpart, domain) = parts address - --- | Validate an email address against a simple regex. This should --- catch common addresses but disallows a lot of (legal) weird stuff. -validate_regex :: Address -> Bool -validate_regex address = - case matches of - Nothing -> False - _ -> True - where - regex_str = "(\\w+)([\\w\\-\\.]*)@(([0-9a-zA-Z\\-]+\\.)+)[a-zA-Z]{2,4}" - regex_bs = BSU.fromString regex_str - regex = compile regex_bs [utf8] - matches = match regex address [] - --- | Validate the syntax of an email address by checking its length --- and validating it against a simple regex. -validate_syntax :: Address -> Bool -validate_syntax address = - (validate_length address) && (validate_regex address) - --- | Split a 'ByteString' @s@ at the first occurrence of @character@. -bytestring_split :: BSU.ByteString -> Char -> (BSU.ByteString, BSU.ByteString) -bytestring_split s character = - (before, after) - where - break_func = (== character) - (before, rest) = BSU.break break_func s - after = BS.tail rest -- | Validate an email address by doing some simple syntax checks and @@ -93,22 +71,56 @@ validate resolver address = do let (_,domain) = parts address mx_result <- validate_mx resolver domain return (address, mx_result) - else do + else return (address, False) + +-- | Append a ByteString to a file Handle, followed by a newline. +append_handle_with_newline :: Handle -> BS.ByteString -> IO () +append_handle_with_newline h bs = do + BS.hPutStr h bs + BS.hPutStr h newline + where + newline = BSU.fromString "\n" + + main :: IO () main = do - input <- hGetContents stdin - - let addresses = BSU.lines $ BSU.fromString input + Args{..} <- apply_args + + -- Get the input from either stdin, or the file given on the command + -- line. + input <- case input_file of + Nothing -> BS.hGetContents stdin + Just path -> do + is_file <- doesFileExist path + unless is_file $ + exitWith (ExitFailure exit_input_file_doesnt_exist) + BS.readFile path + + -- Do the same for the output handle and stdout. + output_handle <- case output_file of + Nothing -> return stdout + Just path -> openFile path WriteMode + + -- Split the input into lines. + let addresses = BSU.lines input + + -- And remove the empty ones. let nonempty_addresses = filter (not . BS.null) addresses rs <- makeResolvSeed resolv_conf withResolver rs $ \resolver -> do + -- Construst a list of [IO (Address, Bool)] let actions = map (validate resolver) nonempty_addresses + -- And compute them in parallel. results <- parallel actions + stopGlobalPool + -- Find the pairs with a True in the second position. let good_pairs = filter snd results - mapM_ ((hPutStrLn stdout) . BSU.toString . fst) good_pairs + -- And output the results. + mapM_ ((append_handle_with_newline output_handle) . fst) good_pairs - stopGlobalPool - hFlush stdout + -- Clean up. It's safe to try to close stdout. + hFlush output_handle + hClose output_handle