1 {-# LANGUAGE DoAndIfThenElse #-}
2 {-# LANGUAGE RecordWildCards #-}
7 import Control.Concurrent.ParallelIO.Global (parallel, stopGlobalPool)
8 import Control.Monad (when)
9 import qualified Data.ByteString as BS
10 import qualified Data.ByteString.UTF8 as BSU
18 import Network.DNS.Lookup (lookupMX)
19 import System.Directory (doesFileExist)
20 import System.Exit (exitWith, ExitCode(..))
29 import Text.Regex.PCRE.Light (compile, match, utf8)
31 import CommandLine (Args(..), apply_args)
32 import ExitCodes (exit_input_file_doesnt_exist)
34 type Address = BSU.ByteString
36 -- | Resolver parameters. We increase the default timeout from 3 to 5
38 resolv_conf :: ResolvConf
39 resolv_conf = defaultResolvConf { resolvTimeout = 5 * 1000 * 1000 }
41 -- | A list of common domains, there's no need to waste MX lookups
43 common_domains :: [Domain]
44 common_domains = map BSU.fromString [ "aol.com",
51 validate_mx :: Resolver -> Domain -> IO Bool
52 validate_mx resolver domain
53 | domain `elem` common_domains = return True
55 result <- lookupMX resolver domain
57 Nothing -> return False
60 -- | Split an address into local/domain parts.
61 parts :: Address -> (BSU.ByteString, BSU.ByteString)
62 parts address = bytestring_split address '@'
64 -- | Check that the lengths of the local/domain parts are within spec.
65 validate_length :: Address -> Bool
66 validate_length address =
67 (BSU.length localpart <= 64) && (BSU.length domain <= 255)
69 (localpart, domain) = parts address
71 -- | Validate an email address against a simple regex. This should
72 -- catch common addresses but disallows a lot of (legal) weird stuff.
73 validate_regex :: Address -> Bool
74 validate_regex address =
79 regex_str = "(\\w+)([\\w\\-\\.]*)@(([0-9a-zA-Z\\-]+\\.)+)[a-zA-Z]{2,4}"
80 regex_bs = BSU.fromString regex_str
81 regex = compile regex_bs [utf8]
82 matches = match regex address []
84 -- | Validate the syntax of an email address by checking its length
85 -- and validating it against a simple regex.
86 validate_syntax :: Address -> Bool
87 validate_syntax address =
88 (validate_length address) && (validate_regex address)
90 -- | Split a 'ByteString' @s@ at the first occurrence of @character@.
91 bytestring_split :: BSU.ByteString -> Char -> (BSU.ByteString, BSU.ByteString)
92 bytestring_split s character =
95 break_func = (== character)
96 (before, rest) = BSU.break break_func s
100 -- | Validate an email address by doing some simple syntax checks and
101 -- (if those fail) an MX lookup. We don't count an A record as a mail
103 validate :: Resolver -> Address -> IO (Address, Bool)
104 validate resolver address = do
105 let valid_syntax = validate_syntax address
106 if valid_syntax then do
107 let (_,domain) = parts address
108 mx_result <- validate_mx resolver domain
109 return (address, mx_result)
111 return (address, False)
114 append_handle_with_newline :: Handle -> BS.ByteString -> IO ()
115 append_handle_with_newline h bs = do
119 newline = BSU.fromString "\n"
124 Args{..} <- apply_args
126 input <- case input_file of
127 Nothing -> BS.hGetContents stdin
129 is_file <- doesFileExist path
130 when (not is_file) $ do
131 exitWith (ExitFailure exit_input_file_doesnt_exist)
134 output_handle <- case output_file of
135 Nothing -> return stdout
136 Just path -> openFile path WriteMode
138 let addresses = BSU.lines input
139 let nonempty_addresses = filter (not . BS.null) addresses
141 rs <- makeResolvSeed resolv_conf
142 withResolver rs $ \resolver -> do
143 let actions = map (validate resolver) nonempty_addresses
144 results <- parallel actions
146 let good_pairs = filter snd results
147 mapM_ ((append_handle_with_newline output_handle) . fst) good_pairs