X-Git-Url: http://gitweb.michael.orlitzky.com/?p=dead%2Fhalcyon.git;a=blobdiff_plain;f=src%2FTwitter%2FStatus.hs;h=3414d7c926d57a32eabd96096d35d342c53f76e8;hp=275e89352fea47e02c453404474c5e6041bfd110;hb=ec4132ae41860a77f97b382acad5cf45b117e75d;hpb=17dd116706c4a971e1f5c68daa1656af5eff5cd2 diff --git a/src/Twitter/Status.hs b/src/Twitter/Status.hs index 275e893..3414d7c 100644 --- a/src/Twitter/Status.hs +++ b/src/Twitter/Status.hs @@ -1,75 +1,127 @@ +-- |Functions and data for working with Twitter statuses. module Twitter.Status where import Data.Maybe +import Data.String.Utils (join, splitWs) +import Data.Time (ZonedTime, formatTime, readsTime) +import System.Locale (defaultTimeLocale, rfc822DateFormat) +import Test.HUnit +import Text.Regex (matchRegex, mkRegex) import Text.XML.HaXml +import Text.XML.HaXml.Posn (noPos) +import StringUtils (listify) import Twitter.User import Twitter.Xml +-- |Represents one Twitter status. We don't care about any of their +-- other properties. data Status = Status { status_id :: Integer, created_at :: String, text :: String, - user :: User } + user :: User, + reply :: Bool, + retweet :: Bool } deriving (Show, Eq) - - -status_from_content :: Content -> (Maybe Status) +-- |Given some XML content, create a 'Status' from it. +status_from_content :: Content i -> Maybe Status status_from_content content = - if (length status_ids) == 0 || (length created_ats) == 0 || (length texts) == 0 || (length users) == 0 + || (length retweeteds) == 0 then Nothing - else - case first_status_id of - Nothing -> Nothing - (Just status_id_data) -> - case first_created_at of - Nothing -> Nothing - (Just created_at_data) -> - case first_user of - Nothing -> Nothing - (Just user_object) -> - case (reads status_id_data :: [(Integer, String)]) of - [] -> Nothing - parseresult:_ -> Just (Status (fst parseresult) created_at_data all_text user_object) - + else do + first_status_id <- get_char_data (status_ids !! 0) + integer_status_id <- parse_status_id first_status_id + first_created_at <- get_char_data (created_ats !! 0) + first_user <- user_from_content (users !! 0) + first_retweeted <- get_char_data (retweeteds !! 0) + + let is_reply = case (length reply_to_status_ids) of + 0 -> False + _ -> True + + let is_retweet = case first_retweeted of + "true" -> True + _ -> False + + return (Status + integer_status_id + first_created_at + all_text + first_user + is_reply + is_retweet) where status_ids = (unique_id content) - first_status_id = get_char_data (status_ids !! 0) - created_ats = (status_created_at content) - first_created_at = get_char_data (created_ats !! 0) - texts = (status_text content) - all_text = concat $ catMaybes (map get_char_data texts) - users = (status_user content) - first_user = user_from_content (users !! 0) + retweeteds = (status_retweeted content) + reply_to_status_ids = (status_reply_to_status_id content) + all_text = concat $ catMaybes (map get_char_data texts) + parse_status_id :: String -> Maybe Integer + parse_status_id s = + case (reads s) of + [] -> Nothing + parseresult:_ -> Just (fst parseresult) + +-- |Takes an XML String as an argument, and returns the +-- status that was parsed from it. Should only be used +-- on XML string where a is a top-level element. +parse_status :: String -> [Status] +parse_status xml_data = + catMaybes maybe_status + where + (Document _ _ root _) = xmlParse xml_file_name xml_data + root_elem = CElem root noPos + status_element = (single_status root_elem) + maybe_status = map status_from_content status_element + +-- |Takes an XML String as an argument, and returns the list of +-- statuses that can be parsed from it. parse_statuses :: String -> [Status] parse_statuses xml_data = catMaybes maybe_statuses where (Document _ _ root _) = xmlParse xml_file_name xml_data - root_elem = CElem root + root_elem = CElem root noPos status_elements = (all_statuses root_elem) maybe_statuses = map status_from_content status_elements --- This is a required parameter to the xmlParse function used in --- error reporting. We're not parsing a function, though, so we --- leave it blank. +-- |This is a required parameter to the xmlParse function used in +-- error reporting. We're not parsing a function, though, so we leave +-- it blank. xml_file_name :: String xml_file_name = "" + +created_at_to_rfc822 :: String -> Maybe String +created_at_to_rfc822 s = + case reads_result of + [(t,_)] -> + Just $ formatTime defaultTimeLocale rfc822DateFormat t + _ -> Nothing + where + -- Should match e.g. "Sun Oct 24 18:21:41 +0000 2010" + fmt :: String + fmt = "%a %b %d %H:%M:%S %z %Y" + + reads_result :: [(ZonedTime, String)] + reads_result = readsTime defaultTimeLocale fmt s + +-- |Returns a nicely-formatted String representing the given 'Status' +-- object. pretty_print :: Status -> String pretty_print status = concat [ name, @@ -79,14 +131,70 @@ pretty_print status = replicate ((length name) + 3 + (length (created_at status))) '-', "\n", replace_entities (text status), + "\n\n", + join "\n" user_timeline_urls, "\n" ] where name = screen_name (user status) + user_timeline_urls = listify (make_user_timeline_urls status) - +-- |Given a list of statuses, returns the greatest status_id belonging +-- to one of the statuses in the list. get_max_status_id :: [Status] -> Integer -get_max_status_id statuses = +get_max_status_id statuses = maximum status_ids where - status_ids = map status_id statuses \ No newline at end of file + status_ids = map status_id statuses + + +-- |Parse one username from a word. +parse_username :: String -> Maybe String +parse_username word = + case matches of + Nothing -> Nothing + Just [] -> Nothing + Just (first_match:_) -> Just first_match + where + username_regex = mkRegex "@([a-zA-Z0-9_]+)" + matches = matchRegex username_regex word + + +-- |Parse all usernames of the form \@username from a status. +parse_usernames_from_status :: Status -> [String] +parse_usernames_from_status status = + catMaybes (map parse_username status_words) + where + status_words = splitWs (text status) + +-- |Get all referenced users' timeline URLs. +make_user_timeline_urls :: Status -> [String] +make_user_timeline_urls status = + map screen_name_to_timeline_url usernames + where + usernames = parse_usernames_from_status status + + +status_tests :: [Test] +status_tests = [ test_parse_usernames ] + + +test_parse_usernames :: Test +test_parse_usernames = + TestCase $ + assertEqual + "All usernames are parsed." + expected_usernames + actual_usernames + where + dummy_user = User { screen_name = "nobody" } + dummy_status = Status { status_id = 1, + created_at = "never", + text = "Hypothesis: @donsbot and @bonus500 are two personalities belonging to the same person.", + user = dummy_user, + reply = False, + retweet = False + } + + actual_usernames = parse_usernames_from_status dummy_status + expected_usernames = ["donsbot", "bonus500"]