1 {-# LANGUAGE DoAndIfThenElse #-}
2 {-# LANGUAGE RecordWildCards #-}
7 import Control.Concurrent.ParallelIO.Global (parallel, stopGlobalPool)
8 import Control.Monad (when)
9 import qualified Data.ByteString as BS
10 import qualified Data.ByteString.UTF8 as BSU
18 import Network.DNS.Lookup (lookupMX)
19 import System.Directory (doesFileExist)
20 import System.Exit (exitWith, ExitCode(..))
29 import Text.Regex.PCRE.Light (compile, match, utf8)
31 import CommandLine (Args(..), apply_args)
32 import ExitCodes (exit_input_file_doesnt_exist)
34 type Address = BSU.ByteString
36 -- | Resolver parameters. We increase the default timeout from 3 to 5
38 resolv_conf :: ResolvConf
39 resolv_conf = defaultResolvConf { resolvTimeout = 5 * 1000 * 1000 }
41 -- | A list of common domains, there's no need to waste MX lookups
43 common_domains :: [Domain]
44 common_domains = map BSU.fromString [ "aol.com",
52 -- | Check whether the given domain has a valid MX record.
53 validate_mx :: Resolver -> Domain -> IO Bool
54 validate_mx resolver domain
55 | domain `elem` common_domains = return True
57 result <- lookupMX resolver domain
59 Nothing -> return False
62 -- | Split an address into local/domain parts.
63 parts :: Address -> (BSU.ByteString, BSU.ByteString)
64 parts address = bytestring_split address '@'
66 -- | Check that the lengths of the local/domain parts are within spec.
67 validate_length :: Address -> Bool
68 validate_length address =
69 (BSU.length localpart <= 64) && (BSU.length domain <= 255)
71 (localpart, domain) = parts address
73 -- | Validate an email address against a simple regex. This should
74 -- catch common addresses but disallows a lot of (legal) weird stuff.
75 validate_regex :: Address -> Bool
76 validate_regex address =
81 regex_str = "(\\w+)([\\w\\-\\.]*)@(([0-9a-zA-Z\\-]+\\.)+)[a-zA-Z]{2,4}"
82 regex_bs = BSU.fromString regex_str
83 regex = compile regex_bs [utf8]
84 matches = match regex address []
86 -- | Validate the syntax of an email address by checking its length
87 -- and validating it against a simple regex.
88 validate_syntax :: Address -> Bool
89 validate_syntax address =
90 (validate_length address) && (validate_regex address)
92 -- | Split a 'ByteString' @s@ at the first occurrence of @character@.
93 bytestring_split :: BSU.ByteString -> Char -> (BSU.ByteString, BSU.ByteString)
94 bytestring_split s character =
97 break_func = (== character)
98 (before, rest) = BSU.break break_func s
102 -- | Validate an email address by doing some simple syntax checks and
103 -- (if those fail) an MX lookup. We don't count an A record as a mail
105 validate :: Resolver -> Address -> IO (Address, Bool)
106 validate resolver address = do
107 let valid_syntax = validate_syntax address
108 if valid_syntax then do
109 let (_,domain) = parts address
110 mx_result <- validate_mx resolver domain
111 return (address, mx_result)
113 return (address, False)
116 -- | Append a ByteString to a file Handle, followed by a newline.
117 append_handle_with_newline :: Handle -> BS.ByteString -> IO ()
118 append_handle_with_newline h bs = do
122 newline = BSU.fromString "\n"
127 Args{..} <- apply_args
129 -- Get the input from either stdin, or the file given on the command
131 input <- case input_file of
132 Nothing -> BS.hGetContents stdin
134 is_file <- doesFileExist path
135 when (not is_file) $ do
136 exitWith (ExitFailure exit_input_file_doesnt_exist)
139 -- Do the same for the output handle and stdout.
140 output_handle <- case output_file of
141 Nothing -> return stdout
142 Just path -> openFile path WriteMode
144 -- Split the input into lines.
145 let addresses = BSU.lines input
147 -- And remove the empty ones.
148 let nonempty_addresses = filter (not . BS.null) addresses
150 rs <- makeResolvSeed resolv_conf
151 withResolver rs $ \resolver -> do
152 -- Construst a list of [IO (Address, Bool)]
153 let actions = map (validate resolver) nonempty_addresses
154 -- And compute them in parallel.
155 results <- parallel actions
157 -- Find the pairs with a True in the second position.
158 let good_pairs = filter snd results
159 -- And output the results.
160 mapM_ ((append_handle_with_newline output_handle) . fst) good_pairs
162 -- Clean up. It's safe to try to close stdout.