From: Michael Orlitzky Date: Thu, 4 Jul 2013 00:31:06 +0000 (-0400) Subject: Rewrite everything to use the JSON API with OAuth authentication. X-Git-Url: https://gitweb.michael.orlitzky.com/?a=commitdiff_plain;h=9b6d95a82745ced2a58d9bc4ded555ee36b36673;p=dead%2Fhalcyon.git Rewrite everything to use the JSON API with OAuth authentication. --- diff --git a/src/Mail.hs b/src/Mail.hs index c54255a..e1884af 100644 --- a/src/Mail.hs +++ b/src/Mail.hs @@ -15,7 +15,7 @@ import System.IO (hClose, hGetContents, hPutStr) type Header = String --- |A crude model of an RFC821 email message. +-- | A crude model of an RFC821 email message. data Message = Message { headers :: [Header], subject :: String, body :: String, @@ -23,35 +23,39 @@ data Message = Message { headers :: [Header], to :: String } deriving (Eq) --- |The default headers attached to each message. --- The MIME junk is needed for UTF-8 to work properly. --- Note that your mail server should support the 8BITMIME extension. +-- | The default headers attached to each message. The MIME junk is +-- needed for UTF-8 to work properly. Note that your mail server +-- should support the 8BITMIME extension. default_headers :: [Header] default_headers = ["MIME-Version: 1.0", "Content-Type: text/plain; charset=UTF-8", "Content-Transfer-Encoding: 8bit"] --- |Showing a message will print it in roughly RFC-compliant --- form. This form is sufficient for handing the message off to --- sendmail (or compatible). +-- | Showing a message will print it in roughly RFC-compliant +-- form. This form is sufficient for handing the message off to +-- sendmail (or compatible). instance Show Message where - show m = - concat [ if (length (headers m) == 0) then "" else (intercalate "\n" (headers m)) ++ "\n", - "Subject: " ++ (subject m) ++ "\n", - "From: " ++ (from m) ++ "\n", - "To: " ++ (to m) ++ "\n", - "\n", - (body m) ] - + show m = + concat [ formatted_headers, + "Subject: " ++ (subject m) ++ "\n", + "From: " ++ (from m) ++ "\n", + "To: " ++ (to m) ++ "\n", + "\n", + (body m) ] + where + formatted_headers = + if (length (headers m) == 0) + then "" + else (intercalate "\n" (headers m)) ++ "\n" -- |Pad a string on the left with zeros until the entire string has -- length n. pad_left :: String -> Int -> String pad_left str n - | n < (length str) = str - | otherwise = (replicate num_zeros '0') ++ str - where num_zeros = n - (length str) + | n < (length str) = str + | otherwise = (replicate num_zeros '0') ++ str + where num_zeros = n - (length str) @@ -97,8 +101,8 @@ sendmail sendmail_path message = do -- three-tuples. print_sendmail_result :: (String, String, ExitCode) -> IO () print_sendmail_result (outs, errs, ec) = do - case ec of - ExitSuccess -> return () - _ -> putStrLn $ concat ["Output: " ++ outs, - "\nErrors: " ++ errs, - "\nExit Code: " ++ (show ec)] + case ec of + ExitSuccess -> return () + _ -> putStrLn $ concat ["Output: " ++ outs, + "\nErrors: " ++ errs, + "\nExit Code: " ++ (show ec)] diff --git a/src/Main.hs b/src/Main.hs index a52a73f..bf99fe8 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -3,7 +3,9 @@ where import Control.Concurrent (forkIO, threadDelay) import Control.Monad (forever, when) +import Data.Aeson (decode) import Data.List ((\\)) +import Data.Time.LocalTime (TimeZone, getCurrentTimeZone) import System.Exit (ExitCode(..), exitWith) import System.IO (hPutStrLn, stderr) @@ -16,38 +18,37 @@ import Twitter.Status import Twitter.User --- |A wrapper around threadDelay which takes seconds instead of --- microseconds as its argument. +-- | A wrapper around threadDelay which takes seconds instead of +-- microseconds as its argument. thread_sleep :: Int -> IO () thread_sleep seconds = do let microseconds = seconds * (10 ^ (6 :: Int)) threadDelay microseconds --- |Given a 'Message', 'Status', and default date, update that --- message's body and subject with the information contained in the --- status. Adds a /Date: / header, and returns the updated message. -message_from_status :: Message -> String -> Status -> Message -message_from_status message default_date status = +-- | Given a 'Message', 'Status', and date, update that message's body +-- and subject with the information contained in the status. Adds a +-- /Date: / header, and returns the updated message. +message_from_status :: Maybe TimeZone -> Message -> String -> Status -> Message +message_from_status mtz message default_date status = message { subject = "Twat: " ++ (screen_name (user status)), - body = (pretty_print status), + body = (pretty_print mtz status), headers = ((headers message) ++ ["Date: " ++ date])} where - -- Use the Status' created_at date if it can be coerced into - -- RFC822 format. - date = case (created_at_to_rfc822 $ created_at status) of - Nothing -> default_date - Just c -> c + date = + case created_at status of + Nothing -> default_date + Just c -> utc_time_to_rfc822 mtz c -- | If the given Message is not Nothing, send a copy of it for every -- Status in the list. -send_messages :: Cfg -> Maybe Message -> [Status] -> IO () -send_messages cfg maybe_message statuses = +send_messages :: Cfg -> Maybe TimeZone -> Maybe Message -> [Status] -> IO () +send_messages cfg mtz maybe_message statuses = case maybe_message of Nothing -> return () Just message -> do default_date <- rfc822_now - let mfs = message_from_status message (default_date) + let mfs = message_from_status mtz message default_date let messages = map mfs statuses sendmail_results <- mapM sendmail' messages _ <- mapM print_sendmail_result sendmail_results @@ -69,7 +70,7 @@ mention_replies cfg ss = do -- and verbose is enabled. mention_retweets :: Cfg -> [Status] -> IO () mention_retweets cfg ss = do - let retweets = filter retweet ss + let retweets = filter retweeted ss when ((ignore_retweets cfg) && (verbose cfg)) $ do let countstr = show $ length retweets putStrLn $ "Ignoring " ++ countstr ++ " retweets." @@ -82,7 +83,7 @@ filter_statuses cfg ss = good_statuses where replies = filter reply ss - retweets = filter retweet ss + retweets = filter retweeted ss good_statuses' = case (ignore_replies cfg) of True -> ss \\ replies @@ -103,13 +104,10 @@ filter_statuses cfg ss = recurse :: Cfg -> String -> Integer -> (Maybe Message) -> IO () recurse cfg username latest_status_id maybe_message = do thread_sleep (heartbeat cfg) - xmldata <- get_user_new_statuses username latest_status_id + timeline <- get_user_new_statuses username latest_status_id - -- Parsing an empty result can blow up. Just pretend there are - -- no new statuses in that case. - let new_statuses = case xmldata of - Just xml -> parse_statuses xml - Nothing -> [] + -- FIXME + let Just new_statuses = decode timeline :: Maybe Timeline case (length new_statuses) of 0 -> @@ -121,9 +119,11 @@ recurse cfg username latest_status_id maybe_message = do let good_statuses = filter_statuses cfg new_statuses - _ <- mapM (putStrLn . pretty_print) good_statuses + tz <- getCurrentTimeZone + let mtz = Just tz + mapM_ (putStrLn . (pretty_print mtz)) good_statuses - send_messages cfg maybe_message good_statuses + send_messages cfg mtz maybe_message good_statuses let new_latest_status_id = get_max_status_id new_statuses do_recurse new_latest_status_id @@ -139,19 +139,16 @@ recurse cfg username latest_status_id maybe_message = do -- latest status id to be posted once we have done so. get_latest_status_id :: Int -> String -> IO Integer get_latest_status_id delay username = do - xmldata <- get_user_timeline username + timeline <- get_user_timeline username + let Just initial_timeline = decode timeline :: Maybe Timeline - let initial_statuses = case xmldata of - Just xml -> parse_statuses xml - Nothing -> [] - - case (length initial_statuses) of + case (length initial_timeline) of 0 -> do -- If the HTTP part barfs, try again after a while. putStrLn ("Couldn't retrieve " ++ username ++ "'s timeline. Retrying...") thread_sleep delay get_latest_status_id delay username - _ -> return (get_max_status_id initial_statuses) + _ -> return (get_max_status_id initial_timeline) @@ -224,36 +221,3 @@ main = do thread_sleep (heartbeat cfg) return () - - --- | A debugging tool that will parse, print, and email a single --- status (given by its id). -twat_single_status :: Cfg -> Integer -> (Maybe Message) -> IO () -twat_single_status cfg the_status_id maybe_message = do - xmldata <- get_status the_status_id - - -- Parsing an empty result can blow up. Just pretend there are - -- no new statuses in that case. - let statuses = case xmldata of - Just xml -> parse_status xml - Nothing -> [] - - case (length statuses) of - 0 -> do - putStrLn "No statuses returned." - return () - _ -> do - _ <- mapM (putStrLn . pretty_print) statuses - - case maybe_message of - Nothing -> do - putStrLn "No message object given." - return () - Just message -> do - default_date <- rfc822_now - let messages = map (message_from_status message (default_date)) statuses - sendmail_results <- mapM sendmail' messages - _ <- mapM print_sendmail_result sendmail_results - return () - where - sendmail' = sendmail (sendmail_path cfg) \ No newline at end of file diff --git a/src/StringUtils.hs b/src/StringUtils.hs index b7d291c..f9d7dfc 100644 --- a/src/StringUtils.hs +++ b/src/StringUtils.hs @@ -5,11 +5,13 @@ where import Test.HUnit --- |Takes a list of strings, call them string1, string2, etc. and --- numbers them like a list. So, --- 1. string1 --- 2. string2 --- 3. etc. +-- | Takes a list of strings, call them string1, string2, etc. and +-- numbers them like a list. So, +-- +-- 1. string1 +-- 2. string2 +-- 3. etc. +-- listify :: [String] -> [String] listify items = zipWith (++) list_numbers items diff --git a/src/Twitter/Http.hs b/src/Twitter/Http.hs index 4c7c608..112c2ea 100644 --- a/src/Twitter/Http.hs +++ b/src/Twitter/Http.hs @@ -1,92 +1,98 @@ module Twitter.Http where -import Network.Curl -import System.IO (hPutStrLn, stderr) +import qualified Data.ByteString.Lazy as B +import qualified Data.ByteString.Char8 as BC +import qualified Data.Conduit as C +import Data.Conduit.Binary (sinkLbs) +import Network.HTTP.Conduit +import Web.Authenticate.OAuth ( + OAuth(..), + Credential, + newCredential, + newOAuth, + signOAuth) -- |The API URL of username's timeline. -- -- See, -- --- +-- -- user_timeline_url :: String -> String user_timeline_url username = - concat [ "http://api.twitter.com/1/statuses/user_timeline.xml", - "?screen_name=" ++ username, - "&include_rts=true", - "&count=10" ] + concat [ "https://api.twitter.com/", + "1.1/", + "statuses/", + "user_timeline.json?", + "screen_name=", + username, + "&include_rts=true&", + "count=10" ] status_url :: Integer -> String status_url status_id = - concat [ "http://api.twitter.com/1/statuses/show/", - (show status_id), - ".xml" ] - --- |Given username's last status id, constructs the API URL for --- username's new statuses. Essentially, 'user_timeline_url' with a --- "since_id" parameter tacked on. + concat [ "https://api.twitter.com/", + "1.1/", + "statuses/", + "show.json?id=", + (show status_id) ] + +-- | Given username's last status id, constructs the API URL for +-- username's new statuses. Essentially, 'user_timeline_url' with a +-- "since_id" parameter tacked on. user_new_statuses_url :: String -> Integer -> String user_new_statuses_url username last_status_id = - concat [ user_timeline_url username, - "&since_id=" ++ (show last_status_id) ] + concat [ user_timeline_url username, + "&since_id=" ++ (show last_status_id) ] -get_status :: Integer -> IO (Maybe String) +get_status :: Integer -> IO B.ByteString get_status status_id = do - let uri = (status_url status_id) - status <- (http_get uri) - return status + let uri = (status_url status_id) + status <- (http_get uri) + return status --- |Return's username's timeline, or 'Nothing' if there was an error. -get_user_timeline :: String -> IO (Maybe String) +-- | Return's username's timeline. +get_user_timeline :: String -> IO B.ByteString get_user_timeline username = do let uri = (user_timeline_url username) timeline <- (http_get uri) return timeline --- | Returns the XML representing all of username's statuses that are +-- | Returns the JSON representing all of username's statuses that are -- newer than last_status_id. -get_user_new_statuses :: String -> Integer -> IO (Maybe String) +get_user_new_statuses :: String -> Integer -> IO B.ByteString get_user_new_statuses username last_status_id = do let uri = (user_new_statuses_url username last_status_id) new_statuses <- (http_get uri) return new_statuses --- | Options that will be passed to every curl request. -curl_options :: [CurlOption] -curl_options = - [ CurlTimeout 45, - -- The Global cache is not thread-friendly. - CurlDNSUseGlobalCache False, - -- And we don't want to use a DNS cache anyway. - CurlDNSCacheTimeout 0 ] - - --- | Uses the CURL API to retrieve uri. Returns 'Nothing' if there was --- an error. -http_get :: String -> IO (Maybe String) -http_get uri = - withCurlDo $ do - -- Create a Curl instance. - curl <- initialize - - -- Perform the request, and get back a CurlResponse object. - -- The cast is needed to specify how we would like our headers - -- and body returned (Strings). - resp <- do_curl_ curl uri curl_options :: IO CurlResponse - - -- Pull out the response code as a CurlCode. - let code = respCurlCode resp - - case code of - CurlOK -> return $ Just (respBody resp) - error_code -> do - hPutStrLn stderr ("HTTP Error: " ++ (show error_code)) - -- If an error occurred, we want to dump as much information as - -- possible. If this becomes a problem, we can use respGetInfo to - -- query the response object for more information - return Nothing +-- | Retrieve a URL, or crash. +http_get :: String -> IO B.ByteString +http_get url = do + manager <- newManager def + request <- parseUrl url + + C.runResourceT $ do + signed_request <- signOAuth oauth credential request + response <- http signed_request manager + responseBody response C.$$+- sinkLbs + + where + consumer_key = BC.pack "" + consumer_secret = BC.pack "" + access_token = BC.pack "" + access_secret = BC.pack "" + + oauth :: OAuth + oauth = newOAuth { + oauthConsumerKey = consumer_key, + oauthConsumerSecret = consumer_secret + } + + credential :: Credential + credential = newCredential access_token access_secret diff --git a/src/Twitter/Status.hs b/src/Twitter/Status.hs index 3414d7c..ef4e103 100644 --- a/src/Twitter/Status.hs +++ b/src/Twitter/Status.hs @@ -1,178 +1,136 @@ --- |Functions and data for working with Twitter statuses. +-- | Functions and data for working with Twitter statuses. module Twitter.Status where -import Data.Maybe +import Control.Applicative ((<$>), (<*>)) +import Control.Monad (liftM) +import Data.Aeson ((.:), FromJSON(..), Value(Object)) +import Data.Maybe (catMaybes, isJust) +import Data.Monoid (mempty) import Data.String.Utils (join, splitWs) -import Data.Time (ZonedTime, formatTime, readsTime) +import Data.Text (pack) +import Data.Time (formatTime) +import Data.Time.Clock (UTCTime) +import Data.Time.Format (parseTime) +import Data.Time.LocalTime (TimeZone, utcToZonedTime) import System.Locale (defaultTimeLocale, rfc822DateFormat) import Test.HUnit import Text.Regex (matchRegex, mkRegex) -import Text.XML.HaXml -import Text.XML.HaXml.Posn (noPos) import StringUtils (listify) import Twitter.User -import Twitter.Xml - --- |Represents one Twitter status. We don't care about any of their --- other properties. -data Status = Status { status_id :: Integer, - created_at :: String, - text :: String, - user :: User, - reply :: Bool, - retweet :: Bool } - deriving (Show, Eq) - - --- |Given some XML content, create a 'Status' from it. -status_from_content :: Content i -> Maybe Status -status_from_content content = - if (length status_ids) == 0 - || (length created_ats) == 0 - || (length texts) == 0 - || (length users) == 0 - || (length retweeteds) == 0 - then - Nothing - else do - first_status_id <- get_char_data (status_ids !! 0) - integer_status_id <- parse_status_id first_status_id - first_created_at <- get_char_data (created_ats !! 0) - first_user <- user_from_content (users !! 0) - first_retweeted <- get_char_data (retweeteds !! 0) - - let is_reply = case (length reply_to_status_ids) of - 0 -> False - _ -> True - - let is_retweet = case first_retweeted of - "true" -> True - _ -> False - - return (Status - integer_status_id - first_created_at - all_text - first_user - is_reply - is_retweet) - where - status_ids = (unique_id content) - created_ats = (status_created_at content) - texts = (status_text content) - users = (status_user content) - retweeteds = (status_retweeted content) - reply_to_status_ids = (status_reply_to_status_id content) - - all_text = concat $ catMaybes (map get_char_data texts) - - parse_status_id :: String -> Maybe Integer - parse_status_id s = - case (reads s) of - [] -> Nothing - parseresult:_ -> Just (fst parseresult) - --- |Takes an XML String as an argument, and returns the --- status that was parsed from it. Should only be used --- on XML string where a is a top-level element. -parse_status :: String -> [Status] -parse_status xml_data = - catMaybes maybe_status - where - (Document _ _ root _) = xmlParse xml_file_name xml_data - root_elem = CElem root noPos - status_element = (single_status root_elem) - maybe_status = map status_from_content status_element - --- |Takes an XML String as an argument, and returns the list of --- statuses that can be parsed from it. -parse_statuses :: String -> [Status] -parse_statuses xml_data = - catMaybes maybe_statuses +data Status = Status { + created_at :: Maybe UTCTime, + status_id :: Integer, + reply :: Bool, + retweeted :: Bool, + text :: String, + user :: User + } deriving (Show, Eq) + +type Timeline = [Status] + +instance FromJSON Status where + parseJSON (Object t) = + Status <$> + liftM parse_status_time (t .: created_at_field) <*> + (t .: id_field) <*> + liftM isJustInt (t .: in_reply_to_status_id_field) <*> + (t .: retweeted_field) <*> + (t .: text_field) <*> + (t .: user_field) where - (Document _ _ root _) = xmlParse xml_file_name xml_data - root_elem = CElem root noPos - status_elements = (all_statuses root_elem) - maybe_statuses = map status_from_content status_elements - - --- |This is a required parameter to the xmlParse function used in --- error reporting. We're not parsing a function, though, so we leave --- it blank. -xml_file_name :: String -xml_file_name = "" + -- The typechecker flips out without this. + isJustInt :: Maybe Int -> Bool + isJustInt = isJust + + created_at_field = pack "created_at" + id_field = pack "id" + in_reply_to_status_id_field = pack "in_reply_to_status_id" + retweeted_field = pack "retweeted" + text_field = pack "text" + user_field = pack "user" + + -- Do whatever. + parseJSON _ = mempty + +parse_status_time :: String -> Maybe UTCTime +parse_status_time = + parseTime defaultTimeLocale status_format + where + -- | Should match e.g. "Sun Oct 24 18:21:41 +0000 2010" + status_format :: String + status_format = "%a %b %d %H:%M:%S %z %Y" + +utc_time_to_rfc822 :: Maybe TimeZone -> UTCTime -> String +utc_time_to_rfc822 mtz utc = + case mtz of + Nothing -> foo utc + Just tz -> foo $ utcToZonedTime tz utc + where + foo = formatTime defaultTimeLocale rfc822DateFormat + + +show_created_at :: Maybe TimeZone -> Status -> String +show_created_at mtz = + (maybe "" id) . (fmap $ utc_time_to_rfc822 mtz) . created_at + +-- | Returns a nicely-formatted String representing the given 'Status' +-- object. +pretty_print :: Maybe TimeZone -> Status -> String +pretty_print mtz status = + concat [ name, + " - ", + sca, + "\n", + replicate bar_length '-', + "\n", + text status, + "\n\n", + join "\n" user_timeline_urls, + "\n" ] + where + sca = show_created_at mtz status + name = screen_name (user status) + user_timeline_urls = listify (make_user_timeline_urls status) + bar_length = (length name) + 3 + (length sca) -created_at_to_rfc822 :: String -> Maybe String -created_at_to_rfc822 s = - case reads_result of - [(t,_)] -> - Just $ formatTime defaultTimeLocale rfc822DateFormat t - _ -> Nothing - where - -- Should match e.g. "Sun Oct 24 18:21:41 +0000 2010" - fmt :: String - fmt = "%a %b %d %H:%M:%S %z %Y" - - reads_result :: [(ZonedTime, String)] - reads_result = readsTime defaultTimeLocale fmt s - --- |Returns a nicely-formatted String representing the given 'Status' --- object. -pretty_print :: Status -> String -pretty_print status = - concat [ name, - " - ", - (created_at status), - "\n", - replicate ((length name) + 3 + (length (created_at status))) '-', - "\n", - replace_entities (text status), - "\n\n", - join "\n" user_timeline_urls, - "\n" ] - where - name = screen_name (user status) - user_timeline_urls = listify (make_user_timeline_urls status) - - --- |Given a list of statuses, returns the greatest status_id belonging --- to one of the statuses in the list. -get_max_status_id :: [Status] -> Integer +-- | Given a list of statuses, returns the greatest status_id +-- belonging to one of the statuses in the list. +get_max_status_id :: Timeline -> Integer get_max_status_id statuses = maximum status_ids where status_ids = map status_id statuses --- |Parse one username from a word. +-- | Parse one username from a word. parse_username :: String -> Maybe String parse_username word = - case matches of - Nothing -> Nothing - Just [] -> Nothing - Just (first_match:_) -> Just first_match - where - username_regex = mkRegex "@([a-zA-Z0-9_]+)" - matches = matchRegex username_regex word + case matches of + Nothing -> Nothing + Just [] -> Nothing + Just (first_match:_) -> Just first_match + where + username_regex = mkRegex "@([a-zA-Z0-9_]+)" + matches = matchRegex username_regex word --- |Parse all usernames of the form \@username from a status. +-- | Parse all usernames of the form \@username from a status. parse_usernames_from_status :: Status -> [String] parse_usernames_from_status status = - catMaybes (map parse_username status_words) - where - status_words = splitWs (text status) + catMaybes (map parse_username status_words) + where + status_words = splitWs (text status) --- |Get all referenced users' timeline URLs. +-- | Get all referenced users' timeline URLs. make_user_timeline_urls :: Status -> [String] make_user_timeline_urls status = - map screen_name_to_timeline_url usernames - where - usernames = parse_usernames_from_status status + map screen_name_to_timeline_url usernames + where + usernames = parse_usernames_from_status status status_tests :: [Test] @@ -189,11 +147,11 @@ test_parse_usernames = where dummy_user = User { screen_name = "nobody" } dummy_status = Status { status_id = 1, - created_at = "never", + created_at = Nothing, text = "Hypothesis: @donsbot and @bonus500 are two personalities belonging to the same person.", user = dummy_user, reply = False, - retweet = False + retweeted = False } actual_usernames = parse_usernames_from_status dummy_status diff --git a/src/Twitter/User.hs b/src/Twitter/User.hs index 2c199c9..a1eed3a 100644 --- a/src/Twitter/User.hs +++ b/src/Twitter/User.hs @@ -2,32 +2,25 @@ module Twitter.User where -import Text.XML.HaXml - -import Twitter.Xml - --- |Represents a Twitter user, and contains the only attribute thereof --- that we care about: the screen (user) name. -data User = User { screen_name :: String } - deriving (Show, Eq) - - --- |Create a 'User' from HaXML 'Content'. -user_from_content :: Content i -> (Maybe User) -user_from_content c = - if (length names) == 0 - then - Nothing - else - case (get_char_data (names !! 0)) of - Nothing -> Nothing - (Just content) -> Just (User (content)) - +import Control.Applicative ((<$>)) +import Data.Aeson ((.:), FromJSON(..), Value(Object)) +import Data.Text (pack) +import Data.Monoid (mempty) + +-- | Represents a Twitter user, and contains the only attribute +-- thereof that we care about: the screen (user) name. +data User = User { screen_name :: String } deriving (Eq, Show) + +instance FromJSON User where + parseJSON (Object u) = + User <$> (u .: screen_name_field) where - names = user_screen_name c + screen_name_field = pack "screen_name" + -- Do whatever. + parseJSON _ = mempty -- |Get the URL for the given screen name's timeline. screen_name_to_timeline_url :: String -> String -screen_name_to_timeline_url sn = - "http://twitter.com/" ++ sn +screen_name_to_timeline_url = + ("http://twitter.com/" ++) diff --git a/src/Twitter/Xml.hs b/src/Twitter/Xml.hs deleted file mode 100644 index 871d216..0000000 --- a/src/Twitter/Xml.hs +++ /dev/null @@ -1,154 +0,0 @@ --- |Application-specific XML functions. -module Twitter.Xml -where - -import Data.Char (chr) -import Test.HUnit -import Text.Regex (matchRegex, mkRegex, subRegex) -import Text.XML.HaXml - --- |Returns the 'CharData' contained within the given 'Content', or --- 'Nothing' if no acceptable CharData was found. It will parse either --- a 'CString' ('String') or 'CRef' (XML entity reference). -get_char_data :: Content i -> (Maybe CharData) -get_char_data (CString _ cd _) = Just cd -get_char_data (CRef ref _) = Just (verbatim ref) -- Entities. -get_char_data _ = Nothing - - --- |A 'CFilter' returning all top-level elements. --- The name is due to the fact that if we retrieve more than --- one status, they will be wrapped in a tag, and --- thus not be top-level. -single_status :: CFilter i -single_status = (tag "status") - --- |A 'CFilter' returning all tags within . -all_statuses :: CFilter i -all_statuses = (tag "statuses" /> tag "status") - --- |Finds the text of the element contained within some other --- content. Called unique_id here because status_id is used elsewhere. -unique_id :: CFilter i -unique_id = keep /> (tag "id") /> txt - --- |Finds the text of the element contained within some --- other element. -status_created_at :: CFilter i -status_created_at = keep /> (tag "created_at") /> txt - --- |Finds the text of the element contained within some --- other element. -status_text :: CFilter i -status_text = keep /> (tag "text") /> txt - --- |Finds the XML of the element contained within some other --- element. -status_user :: CFilter i -status_user = keep /> (tag "user") - --- | Finds the text of the element contained within some --- other element. -status_retweeted :: CFilter i -status_retweeted = keep /> (tag "retweeted") /> txt - --- | Finds the text of the element contained --- within some other element. -status_reply_to_status_id :: CFilter i -status_reply_to_status_id = keep /> (tag "in_reply_to_status_id") /> txt - --- |Finds the text of the element contained within some --- other element. -user_screen_name :: CFilter i -user_screen_name = keep /> (tag "screen_name") /> txt - --- |A wrapper around the 'read' function which returns either Nothing --- or (Just ). -maybe_read :: (Read a) => String -> Maybe a -maybe_read str = - case (reads str) of - [] -> Nothing - ((y,_):_) -> Just y - --- |Takes a unicode codepoint in decimal and returns it as a --- one-character string. -entity_from_codepoint :: String -> String -entity_from_codepoint codepoint = - case (maybe_read codepoint) of - Nothing -> "" - Just num -> [(chr num)] - - --- | A list of tuples whose first entry is a regular expression --- matching XML entities, and whose second entry is the ASCII --- character represented by that entity. --- --- For some reason, ampersands are escaped twice in the status --- text. Rather than unescape everything twice, we just stick "amp" --- in the list again. -xml_entities :: [(String, String)] -xml_entities = [("[lr]dquo", "\""), - ("quot", "\""), - ("[mn]dash", "-"), - ("nbsp", " "), - ("amp", "&"), - ("amp", "&"), - ("lt", "<"), - ("gt", ">"), - ("hellip", "…")] - --- |Replace all of the XML entities in target. -replace_entities :: String -> String -replace_entities target = - unescape_numeric (unescape_recursive xml_entities target) - --- |Recursively unescape all numeric entities in the given String. -unescape_numeric :: String -> String -unescape_numeric target = - case match of - Nothing -> target - Just subexprs -> - case subexprs of - [] -> target - s1:_ -> - let this_entity_regex = mkRegex ("&#" ++ s1 ++ ";") in - let replacement = entity_from_codepoint s1 in - let new_target = subRegex this_entity_regex target replacement in - unescape_numeric new_target - where - from = "&#([0-9]+);" - match = matchRegex (mkRegex from) target - - - --- |The recursive function which does the real work for --- 'replace_entities'. -unescape_recursive :: [(String, String)] -> String -> String -unescape_recursive [] target = target -unescape_recursive replacements target = - unescape_recursive (tail replacements) (subRegex (mkRegex from) target to) - where - replacement = (replacements !! 0) - from = "&" ++ (fst replacement) ++ ";" - to = (snd replacement) - - - -xml_tests :: [Test] -xml_tests = [ test_replace_entities, test_double_unescape ] - - -test_replace_entities :: Test -test_replace_entities = - TestCase $ assertEqual "All entities are replaced correctly." expected_text actual_text - where - actual_text = (replace_entities ""The moon is gay……" said <insert the current president of the United States of America>. “It’s OK—–he’s not a real doctor.”") - expected_text = "\"The moon is gay……\" said . \"It’s OK--he’s not a real doctor.\"" - - -test_double_unescape :: Test -test_double_unescape = - TestCase $ assertEqual "The status text is unescaped twice." expected_text actual_text - where - actual_text = (replace_entities "As a kid, I'd pull a girl's hair to let her know I liked her, but now that I'm older &amp; wiser I simply hit her with my car.") - expected_text = "As a kid, I'd pull a girl's hair to let her know I liked her, but now that I'm older & wiser I simply hit her with my car." diff --git a/twat.cabal b/twat.cabal index 6d968de..8a11081 100644 --- a/twat.cabal +++ b/twat.cabal @@ -9,15 +9,20 @@ build-type: Simple executable twat build-depends: + aeson == 0.6.*, + authenticate-oauth == 1.4.*, base == 4.*, - curl == 1.3.*, - directory == 1.1.*, - HaXml == 1.23.*, + bytestring == 0.10.*, + conduit == 1.*, + directory == 1.2.*, + HaXml == 1.24.*, + http-conduit == 1.9.*, HUnit == 1.2.*, MissingH == 1.*, process == 1.*, old-locale == 1.*, regex-compat == 0.*, + text == 0.11.*, time == 1.*