From 17dd116706c4a971e1f5c68daa1656af5eff5cd2 Mon Sep 17 00:00:00 2001 From: Michael Orlitzky Date: Tue, 28 Sep 2010 02:34:17 -0400 Subject: [PATCH] Initial commit. --- bin/.gitignore | 1 + makefile | 29 ++++++++ src/CommandLine.hs | 151 ++++++++++++++++++++++++++++++++++++++++++ src/Mail.hs | 62 +++++++++++++++++ src/Main.hs | 133 +++++++++++++++++++++++++++++++++++++ src/Twitter/Http.hs | 45 +++++++++++++ src/Twitter/Status.hs | 92 +++++++++++++++++++++++++ src/Twitter/User.hs | 23 +++++++ src/Twitter/Xml.hs | 53 +++++++++++++++ 9 files changed, 589 insertions(+) create mode 100644 bin/.gitignore create mode 100644 makefile create mode 100644 src/CommandLine.hs create mode 100644 src/Mail.hs create mode 100644 src/Main.hs create mode 100644 src/Twitter/Http.hs create mode 100644 src/Twitter/Status.hs create mode 100644 src/Twitter/User.hs create mode 100644 src/Twitter/Xml.hs diff --git a/bin/.gitignore b/bin/.gitignore new file mode 100644 index 0000000..13e4d83 --- /dev/null +++ b/bin/.gitignore @@ -0,0 +1 @@ +[^.]* diff --git a/makefile b/makefile new file mode 100644 index 0000000..da88076 --- /dev/null +++ b/makefile @@ -0,0 +1,29 @@ +GHC_WARNINGS := -Wall +GHC_WARNINGS += -fwarn-hi-shadowing +GHC_WARNINGS += -fwarn-missing-signatures +GHC_WARNINGS += -fwarn-name-shadowing +GHC_WARNINGS += -fwarn-orphans +GHC_WARNINGS += -fwarn-type-defaults + +BIN=bin/twat + +.PHONY : test + +all: $(BIN) + +$(BIN): src/Twitter/*.hs src/*.hs + ghc -O2 $(GHC_WARNINGS) --make -o $(BIN) src/Twitter/*.hs src/*.hs + +profile: src/Twitter/*.hs src/*.hs + ghc -O2 $(GHC_WARNINGS) -prof -auto-all --make -o $(BIN) src/Twitter/*.hs src/*.hs + +clean: + rm -f $(BIN) + rm -f src/*.hi + rm -f src/Twitter/*.hi + rm -f src/*.o + rm -f src/Twitter/*.o + rm -f *.prof + +test: + runghc -i"src" test/TestSuite.hs diff --git a/src/CommandLine.hs b/src/CommandLine.hs new file mode 100644 index 0000000..c5ddb4b --- /dev/null +++ b/src/CommandLine.hs @@ -0,0 +1,151 @@ +-- The CommandLine module handles parsing of the command-line options. +-- It should more or less be a black box, providing Main with only the +-- information it requires. + +module CommandLine +( help_set, + help_text, + from_email_address, + to_email_address, + parse_errors, + parse_usernames +) where + +import Data.Maybe (isJust, isNothing) +import System.Console.GetOpt +import System.Environment (getArgs) + + + +-- A record containing values for all available options. +data Options = Options { opt_help :: Bool, + opt_from :: Maybe String, + opt_to :: Maybe String } + + +-- This constructs an instance of Options, with each of its members +-- set to default values. +default_options :: Options +default_options = Options { opt_help = False, + opt_from = Nothing, + opt_to = Nothing } + + +-- The options list that we construct associates a function with each +-- option. This function is responsible for updating an Options record +-- with the appropriate value. +-- +-- For more information and an example of this idiom, see, +-- +-- http://www.haskell.org/haskellwiki/High-level_option_handling_with_GetOpt +-- +options :: [OptDescr (Options -> IO Options)] +options = + [ Option ['h'][] (NoArg set_help) "Prints this help message.", + Option ['t'][] (ReqArg set_to "email_address") "Send tweets TO email_address.", + Option ['f'][] (ReqArg set_from "email_address") "Send tweets FROM email_address." + ] + + +set_help :: Options -> IO Options +set_help opts = do + return opts { opt_help = True } + +set_to :: String -> Options -> IO Options +set_to arg opts = do + return opts { opt_to = Just arg } + +set_from :: String -> Options -> IO Options +set_from arg opts = do + return opts { opt_from = Just arg } + + +-- The usage header. +usage :: String +usage = "Usage: twat [-t to_address] [-f from_address] [username2, [username3]...]" + + +-- The usage header, and all available flags (as generated by GetOpt) +help_text :: String +help_text = usageInfo usage options + + +-- Return a list of options. +parse_options :: IO Options +parse_options = do + argv <- getArgs + let (actions, _, _) = getOpt Permute options argv + + -- This will execute each of the functions contained in our options + -- list, one after another, on a default_options record. The end + -- result should be an Options instance with all of its members set + -- correctly. + opts <- foldl (>>=) (return default_options) actions + + return opts + + +username_errors :: IO [String] +username_errors = do + argv <- getArgs + let (_, usernames, _) = getOpt Permute options argv + + if (null usernames) + then return ["No usernames provided."] + else return [] + + +to_errors :: IO [String] +to_errors = do + toaddr <- to_email_address + fromaddr <- from_email_address + if (isNothing toaddr) && (isJust fromaddr) + then return ["\"From\" address specified without \"To\" address."] + else return [] + +from_errors :: IO [String] +from_errors = do + toaddr <- to_email_address + fromaddr <- from_email_address + if (isJust toaddr) && (isNothing fromaddr) + then return ["\"To\" address specified without \"From\" address."] + else return [] + + +format_error :: String -> String +format_error err = "ERROR: " ++ err ++ "\n" + + +-- Return a list of errors. +parse_errors :: IO [String] +parse_errors = do + argv <- getArgs + let (_, _, errors) = getOpt Permute options argv + errs_username <- username_errors + errs_to <- to_errors + errs_from <- from_errors + return $ map format_error (errors ++ errs_username ++ errs_to ++ errs_from) + +-- Is the help option set? +help_set :: IO Bool +help_set = do + opts <- parse_options + return (opt_help opts) + + +to_email_address :: IO (Maybe String) +to_email_address = do + opts <- parse_options + return (opt_to opts) + +from_email_address :: IO (Maybe String) +from_email_address = do + opts <- parse_options + return (opt_from opts) + + +parse_usernames :: IO [String] +parse_usernames = do + argv <- getArgs + let (_, usernames, _) = getOpt Permute options argv + return usernames diff --git a/src/Mail.hs b/src/Mail.hs new file mode 100644 index 0000000..2b5445c --- /dev/null +++ b/src/Mail.hs @@ -0,0 +1,62 @@ +module Mail +where + +import Control.Concurrent +import Control.Concurrent.MVar +import Control.Exception (evaluate) +import Data.List (intercalate) +import System.Exit +import System.Process +import System.IO + +type Header = String + +data Message = Message { headers :: [Header], + subject :: String, + body :: String, + from :: String, + to :: String } + deriving (Eq) + +instance Show Message where + show m = + concat [ if (length (headers m) == 0) then "" else (intercalate "\n" (headers m)) ++ "\n", + "Subject: " ++ (subject m) ++ "\n", + "From: " ++ (from m) ++ "\n", + "To: " ++ (to m) ++ "\n", + "\n", + (body m) ] + + +sendmail :: Message -> IO (String, String, ExitCode) +sendmail message = do + let sendmail_args = ["-f", + (from message)] + + (inh, outh, errh, ph) <- + runInteractiveProcess "/usr/bin/sendmail" sendmail_args Nothing Nothing + + outm <- newEmptyMVar + outs <- hGetContents outh + + errm <- newEmptyMVar + errs <- hGetContents errh + + forkIO $ hPutStr inh (show message) >> hClose inh + forkIO $ evaluate (length outs) >> putMVar outm () + forkIO $ evaluate (length errs) >> putMVar errm () + + readMVar outm + readMVar errm + + ec <- waitForProcess ph + return (outs, errs, ec) + + +print_sendmail_result :: (String, String, ExitCode) -> IO () +print_sendmail_result (outs, errs, ec) = do + case ec of + ExitSuccess -> return () + _ -> putStrLn $ concat ["Output: " ++ outs, + "\nErrors: " ++ errs, + "\nExit Code: " ++ (show ec)] diff --git a/src/Main.hs b/src/Main.hs new file mode 100644 index 0000000..8c2f20b --- /dev/null +++ b/src/Main.hs @@ -0,0 +1,133 @@ +module Main +where + +import Control.Concurrent (forkIO, threadDelay) +import Control.Monad (forever, when) +import System.Exit (ExitCode(..), exitWith) +import System.IO (hPutStrLn, stderr) + +import CommandLine +import Mail +import Twitter.Http +import Twitter.Status +import Twitter.User + +exit_args_parse_failed :: Int +exit_args_parse_failed = 1 + +-- The length of all calls to sleep, in seconds. +heartbeat :: Int +heartbeat = 600 + +thread_sleep :: Int -> IO () +thread_sleep microseconds = do + let seconds = microseconds * (10 ^ (6 :: Int)) + threadDelay seconds + + +message_from_status :: Message -> Status -> Message +message_from_status message status = + message { subject = "Twat: " ++ (screen_name (user status)), + body = (pretty_print status) } + +recurse :: String -> Integer -> (Maybe Message) -> IO () +recurse username latest_status_id maybe_message = do + thread_sleep heartbeat + xmldata <- get_user_new_statuses username latest_status_id + + -- Parsing an empty result can blow up. Just pretend there are + -- no new statuses in that case. + let new_statuses = case xmldata of + Just xml -> parse_statuses xml + Nothing -> [] + + case (length new_statuses) of + 0 -> + recurse username latest_status_id maybe_message + _ -> do + let new_latest_status_id = get_max_status_id new_statuses + mapM (putStrLn . pretty_print) new_statuses + + case maybe_message of + Nothing -> do + recurse username new_latest_status_id maybe_message + return () + Just message -> do + let messages = map (message_from_status message) new_statuses + sendmail_results <- mapM sendmail messages + mapM print_sendmail_result sendmail_results + recurse username new_latest_status_id maybe_message + return () + + +get_latest_status_id :: String -> IO Integer +get_latest_status_id username = do + xmldata <- get_user_timeline username + + let initial_statuses = case xmldata of + Just xml -> parse_statuses xml + Nothing -> [] + + case (length initial_statuses) of + 0 -> do + -- If the HTTP part barfs, try again after a while. + putStrLn ("Couldn't retrieve " ++ username ++ "'s timeline. Retrying...") + thread_sleep heartbeat + get_latest_status_id username + _ -> return (get_max_status_id initial_statuses) + + + +run_twat :: Maybe Message -> String -> IO () +run_twat message username = do + latest_status_id <- get_latest_status_id username + recurse username latest_status_id message + return () + + +main :: IO () +main = do + errors <- parse_errors + + -- If there were errors parsing the command-line options, + -- print them and exit. + when (not (null errors)) $ do + hPutStrLn stderr (concat errors) + putStrLn help_text + exitWith (ExitFailure exit_args_parse_failed) + + -- Next, check to see if the 'help' option was passed to the + -- program. If it was, display the help, and exit successfully. + help_opt_set <- help_set + when help_opt_set $ do + putStrLn help_text + exitWith ExitSuccess + + usernames <- parse_usernames + + -- If we have both a "To" and "From" address, we'll create a + -- message object to be passed to all of our threads. + to_address <- to_email_address + from_address <- from_email_address + let message = case to_address of + Nothing -> Nothing + Just toaddr -> + case from_address of + Nothing -> Nothing + Just fromaddr -> + Just (Message { headers = [], + body = "", + subject = "", + to = toaddr, + from = fromaddr }) + + -- Execute run_twat on each username in a new thread. + mapM (forkIO . (run_twat message)) usernames + + forever $ do + -- This thread (the one executing main) doesn't do anything, + -- but when it terminates, so do all the threads we forked. + -- As a result, we need to keep this thread on life support. + thread_sleep heartbeat + + return () diff --git a/src/Twitter/Http.hs b/src/Twitter/Http.hs new file mode 100644 index 0000000..ddd6828 --- /dev/null +++ b/src/Twitter/Http.hs @@ -0,0 +1,45 @@ +module Twitter.Http +where + +import Network.Curl +import System.IO (hPutStrLn, stderr) + +-- +-- http://dev.twitter.com/doc/get/statuses/user_timeline +-- +user_timeline_url :: String -> String +user_timeline_url username = + concat [ "http://api.twitter.com/1/statuses/user_timeline.xml", + "?screen_name=" ++ username, + "&include_rts=true", + "&count=10" ] + +user_new_statuses_url :: String -> Integer -> String +user_new_statuses_url username last_status_id = + concat [ user_timeline_url username, + "&since_id=" ++ (show last_status_id) ] + + +get_user_timeline :: String -> IO (Maybe String) +get_user_timeline username = do + let uri = (user_timeline_url username) + timeline <- (http_get uri) + return timeline + + +get_user_new_statuses :: String -> Integer -> IO (Maybe String) +get_user_new_statuses username last_status_id = do + let uri = (user_new_statuses_url username last_status_id) + new_statuses <- (http_get uri) + return new_statuses + + +http_get :: String -> IO (Maybe String) +http_get uri = withCurlDo $ do + resp <- curlGetString uri [CurlTimeout 45] + + case resp of + (CurlOK, body) -> return (Just body) + (code, _) -> do + hPutStrLn stderr ("HTTP Error: " ++ (show code)) + return Nothing diff --git a/src/Twitter/Status.hs b/src/Twitter/Status.hs new file mode 100644 index 0000000..275e893 --- /dev/null +++ b/src/Twitter/Status.hs @@ -0,0 +1,92 @@ +module Twitter.Status +where + +import Data.Maybe +import Text.XML.HaXml + +import Twitter.User +import Twitter.Xml + +data Status = Status { status_id :: Integer, + created_at :: String, + text :: String, + user :: User } + deriving (Show, Eq) + + + + +status_from_content :: Content -> (Maybe Status) +status_from_content content = + + if (length status_ids) == 0 + || (length created_ats) == 0 + || (length texts) == 0 + || (length users) == 0 + then + Nothing + else + case first_status_id of + Nothing -> Nothing + (Just status_id_data) -> + case first_created_at of + Nothing -> Nothing + (Just created_at_data) -> + case first_user of + Nothing -> Nothing + (Just user_object) -> + case (reads status_id_data :: [(Integer, String)]) of + [] -> Nothing + parseresult:_ -> Just (Status (fst parseresult) created_at_data all_text user_object) + + where + status_ids = (unique_id content) + first_status_id = get_char_data (status_ids !! 0) + + created_ats = (status_created_at content) + first_created_at = get_char_data (created_ats !! 0) + + texts = (status_text content) + all_text = concat $ catMaybes (map get_char_data texts) + + users = (status_user content) + first_user = user_from_content (users !! 0) + + + +parse_statuses :: String -> [Status] +parse_statuses xml_data = + catMaybes maybe_statuses + where + (Document _ _ root _) = xmlParse xml_file_name xml_data + root_elem = CElem root + status_elements = (all_statuses root_elem) + maybe_statuses = map status_from_content status_elements + + +-- This is a required parameter to the xmlParse function used in +-- error reporting. We're not parsing a function, though, so we +-- leave it blank. +xml_file_name :: String +xml_file_name = "" + +pretty_print :: Status -> String +pretty_print status = + concat [ name, + " - ", + (created_at status), + "\n", + replicate ((length name) + 3 + (length (created_at status))) '-', + "\n", + replace_entities (text status), + "\n" ] + where + name = screen_name (user status) + + + +get_max_status_id :: [Status] -> Integer +get_max_status_id statuses = + maximum status_ids + where + status_ids = map status_id statuses \ No newline at end of file diff --git a/src/Twitter/User.hs b/src/Twitter/User.hs new file mode 100644 index 0000000..75cf4b6 --- /dev/null +++ b/src/Twitter/User.hs @@ -0,0 +1,23 @@ +module Twitter.User +where + +import Text.XML.HaXml + +import Twitter.Xml + +data User = User { screen_name :: String } + deriving (Show, Eq) + + +user_from_content :: Content -> (Maybe User) +user_from_content c = + if (length names) == 0 + then + Nothing + else + case (get_char_data (names !! 0)) of + Nothing -> Nothing + (Just content) -> Just (User (content)) + + where + names = user_screen_name c diff --git a/src/Twitter/Xml.hs b/src/Twitter/Xml.hs new file mode 100644 index 0000000..09013bf --- /dev/null +++ b/src/Twitter/Xml.hs @@ -0,0 +1,53 @@ +module Twitter.Xml +where + +import Data.Maybe +import Text.Regex (mkRegex, subRegex) +import Text.XML.HaXml + +get_char_data :: Content -> (Maybe CharData) +get_char_data (CString _ cd) = Just cd +get_char_data (CRef ref) = Just (verbatim ref) -- Entities. +get_char_data _ = Nothing + + +all_statuses :: CFilter +all_statuses = (tag "statuses" /> tag "status") + +-- Called unique_id here because status_id is used elsewhere. +unique_id :: CFilter +unique_id = keep /> (tag "id") /> txt + +status_created_at :: CFilter +status_created_at = keep /> (tag "created_at") /> txt + +status_text :: CFilter +status_text = keep /> (tag "text") /> txt + +status_user :: CFilter +status_user = keep /> (tag "user") + +user_screen_name :: CFilter +user_screen_name = keep /> (tag "screen_name") /> txt + + +xml_entities :: [(String, String)] +xml_entities = [("[lr]dquo", "\""), + ("[mn]dash", "-"), + ("nbsp", " "), + ("#8217", "'"), + ("amp", "&"), + ("lt", "<"), + ("gt", ">")] + +replace_entities :: String -> String +replace_entities target = unescape_recursive xml_entities target + +unescape_recursive :: [(String, String)] -> String -> String +unescape_recursive [] target = target +unescape_recursive replacements target = + unescape_recursive (tail replacements) (subRegex (mkRegex from) target to) + where + replacement = (replacements !! 0) + from = "&" ++ (fst replacement) ++ ";" + to = (snd replacement) -- 2.44.2