]> gitweb.michael.orlitzky.com - email-validator.git/blob - src/Main.hs
Add command-line processing.
[email-validator.git] / src / Main.hs
1 {-# LANGUAGE DoAndIfThenElse #-}
2 {-# LANGUAGE RecordWildCards #-}
3
4 module Main
5 where
6
7 import Control.Concurrent.ParallelIO.Global (parallel, stopGlobalPool)
8 import Control.Monad (when)
9 import qualified Data.ByteString as BS
10 import qualified Data.ByteString.UTF8 as BSU
11 import Network.DNS (
12 Domain,
13 Resolver,
14 ResolvConf(..),
15 defaultResolvConf,
16 makeResolvSeed,
17 withResolver)
18 import Network.DNS.Lookup (lookupMX)
19 import System.Directory (doesFileExist)
20 import System.Exit (exitWith, ExitCode(..))
21 import System.IO (
22 Handle,
23 IOMode( WriteMode ),
24 hClose,
25 hFlush,
26 openFile,
27 stdin,
28 stdout)
29 import Text.Regex.PCRE.Light (compile, match, utf8)
30
31 import CommandLine (Args(..), apply_args)
32 import ExitCodes (exit_input_file_doesnt_exist)
33
34 type Address = BSU.ByteString
35
36 -- | Resolver parameters. We increase the default timeout from 3 to 5
37 -- seconds.
38 resolv_conf :: ResolvConf
39 resolv_conf = defaultResolvConf { resolvTimeout = 5 * 1000 * 1000 }
40
41 -- | A list of common domains, there's no need to waste MX lookups
42 -- on these.
43 common_domains :: [Domain]
44 common_domains = map BSU.fromString [ "aol.com",
45 "comcast.net",
46 "gmail.com",
47 "msn.com",
48 "yahoo.com",
49 "verizon.net" ]
50
51 validate_mx :: Resolver -> Domain -> IO Bool
52 validate_mx resolver domain
53 | domain `elem` common_domains = return True
54 | otherwise = do
55 result <- lookupMX resolver domain
56 case result of
57 Nothing -> return False
58 _ -> return True
59
60 -- | Split an address into local/domain parts.
61 parts :: Address -> (BSU.ByteString, BSU.ByteString)
62 parts address = bytestring_split address '@'
63
64 -- | Check that the lengths of the local/domain parts are within spec.
65 validate_length :: Address -> Bool
66 validate_length address =
67 (BSU.length localpart <= 64) && (BSU.length domain <= 255)
68 where
69 (localpart, domain) = parts address
70
71 -- | Validate an email address against a simple regex. This should
72 -- catch common addresses but disallows a lot of (legal) weird stuff.
73 validate_regex :: Address -> Bool
74 validate_regex address =
75 case matches of
76 Nothing -> False
77 _ -> True
78 where
79 regex_str = "(\\w+)([\\w\\-\\.]*)@(([0-9a-zA-Z\\-]+\\.)+)[a-zA-Z]{2,4}"
80 regex_bs = BSU.fromString regex_str
81 regex = compile regex_bs [utf8]
82 matches = match regex address []
83
84 -- | Validate the syntax of an email address by checking its length
85 -- and validating it against a simple regex.
86 validate_syntax :: Address -> Bool
87 validate_syntax address =
88 (validate_length address) && (validate_regex address)
89
90 -- | Split a 'ByteString' @s@ at the first occurrence of @character@.
91 bytestring_split :: BSU.ByteString -> Char -> (BSU.ByteString, BSU.ByteString)
92 bytestring_split s character =
93 (before, after)
94 where
95 break_func = (== character)
96 (before, rest) = BSU.break break_func s
97 after = BS.tail rest
98
99
100 -- | Validate an email address by doing some simple syntax checks and
101 -- (if those fail) an MX lookup. We don't count an A record as a mail
102 -- exchanger.
103 validate :: Resolver -> Address -> IO (Address, Bool)
104 validate resolver address = do
105 let valid_syntax = validate_syntax address
106 if valid_syntax then do
107 let (_,domain) = parts address
108 mx_result <- validate_mx resolver domain
109 return (address, mx_result)
110 else do
111 return (address, False)
112
113
114 append_handle_with_newline :: Handle -> BS.ByteString -> IO ()
115 append_handle_with_newline h bs = do
116 BS.hPutStr h bs
117 BS.hPutStr h newline
118 where
119 newline = BSU.fromString "\n"
120
121
122 main :: IO ()
123 main = do
124 Args{..} <- apply_args
125
126 input <- case input_file of
127 Nothing -> BS.hGetContents stdin
128 Just path -> do
129 is_file <- doesFileExist path
130 when (not is_file) $ do
131 exitWith (ExitFailure exit_input_file_doesnt_exist)
132 BS.readFile path
133
134 output_handle <- case output_file of
135 Nothing -> return stdout
136 Just path -> openFile path WriteMode
137
138 let addresses = BSU.lines input
139 let nonempty_addresses = filter (not . BS.null) addresses
140
141 rs <- makeResolvSeed resolv_conf
142 withResolver rs $ \resolver -> do
143 let actions = map (validate resolver) nonempty_addresses
144 results <- parallel actions
145 stopGlobalPool
146 let good_pairs = filter snd results
147 mapM_ ((append_handle_with_newline output_handle) . fst) good_pairs
148 hFlush output_handle
149 hClose output_handle