]> gitweb.michael.orlitzky.com - email-validator.git/blob - src/Main.hs
bd3cc45f5ff741e936243c9de0cf5baa84f0e1ce
[email-validator.git] / src / Main.hs
1 {-# LANGUAGE DoAndIfThenElse #-}
2
3 module Main
4 where
5
6 import Control.Concurrent.ParallelIO.Global (parallel, stopGlobalPool)
7 import qualified Data.ByteString as BS
8 import qualified Data.ByteString.UTF8 as BSU
9 import Network.DNS (
10 Domain,
11 Resolver,
12 ResolvConf(..),
13 defaultResolvConf,
14 makeResolvSeed,
15 withResolver)
16 import Network.DNS.Lookup (lookupMX)
17 import System.IO (hGetContents, hFlush, hPutStrLn, stdin, stdout)
18 import Text.Regex.PCRE.Light (compile, match, utf8)
19
20 type Address = BSU.ByteString
21
22 -- | Resolver parameters. We increase the default timeout from 3 to 5
23 -- seconds.
24 resolv_conf :: ResolvConf
25 resolv_conf = defaultResolvConf { resolvTimeout = 5 * 1000 * 1000 }
26
27 -- | A list of common domains, there's no need to waste MX lookups
28 -- on these.
29 common_domains :: [Domain]
30 common_domains = map BSU.fromString [ "aol.com",
31 "comcast.net",
32 "gmail.com",
33 "msn.com",
34 "yahoo.com",
35 "verizon.net" ]
36
37 validate_mx :: Resolver -> Domain -> IO Bool
38 validate_mx resolver domain
39 | domain `elem` common_domains = return True
40 | otherwise = do
41 result <- lookupMX resolver domain
42 case result of
43 Nothing -> return False
44 _ -> return True
45
46 -- | Split an address into local/domain parts.
47 parts :: Address -> (BSU.ByteString, BSU.ByteString)
48 parts address = bytestring_split address '@'
49
50 -- | Check that the lengths of the local/domain parts are within spec.
51 validate_length :: Address -> Bool
52 validate_length address =
53 (BSU.length localpart <= 64) && (BSU.length domain <= 255)
54 where
55 (localpart, domain) = parts address
56
57 -- | Validate an email address against a simple regex. This should
58 -- catch common addresses but disallows a lot of (legal) weird stuff.
59 validate_regex :: Address -> Bool
60 validate_regex address =
61 case matches of
62 Nothing -> False
63 _ -> True
64 where
65 regex_str = "(\\w+)([\\w\\-\\.]*)@(([0-9a-zA-Z\\-]+\\.)+)[a-zA-Z]{2,4}"
66 regex_bs = BSU.fromString regex_str
67 regex = compile regex_bs [utf8]
68 matches = match regex address []
69
70 -- | Validate the syntax of an email address by checking its length
71 -- and validating it against a simple regex.
72 validate_syntax :: Address -> Bool
73 validate_syntax address =
74 (validate_length address) && (validate_regex address)
75
76 -- | Split a 'ByteString' @s@ at the first occurrence of @character@.
77 bytestring_split :: BSU.ByteString -> Char -> (BSU.ByteString, BSU.ByteString)
78 bytestring_split s character =
79 (before, after)
80 where
81 break_func = (== character)
82 (before, rest) = BSU.break break_func s
83 after = BS.tail rest
84
85
86 -- | Validate an email address by doing some simple syntax checks and
87 -- (if those fail) an MX lookup. We don't count an A record as a mail
88 -- exchanger.
89 validate :: Resolver -> Address -> IO (Address, Bool)
90 validate resolver address = do
91 let valid_syntax = validate_syntax address
92 if valid_syntax then do
93 let (_,domain) = parts address
94 mx_result <- validate_mx resolver domain
95 return (address, mx_result)
96 else do
97 return (address, False)
98
99 main :: IO ()
100 main = do
101 input <- hGetContents stdin
102
103 let addresses = BSU.lines $ BSU.fromString input
104 let nonempty_addresses = filter (not . BS.null) addresses
105
106 rs <- makeResolvSeed resolv_conf
107 withResolver rs $ \resolver -> do
108 let actions = map (validate resolver) nonempty_addresses
109 results <- parallel actions
110 let good_pairs = filter snd results
111 mapM_ ((hPutStrLn stdout) . BSU.toString . fst) good_pairs
112
113 stopGlobalPool
114 hFlush stdout