-- |Functions and data for working with Twitter statuses. module Twitter.Status where import Data.Maybe import Data.String.Utils (join, splitWs) import Data.Time (ZonedTime, formatTime, readsTime) import System.Locale (defaultTimeLocale, rfc822DateFormat) import Test.HUnit import Text.Regex (matchRegex, mkRegex) import Text.XML.HaXml import Text.XML.HaXml.Posn (noPos) import StringUtils (listify) import Twitter.User import Twitter.Xml -- |Represents one Twitter status. We don't care about any of their -- other properties. data Status = Status { status_id :: Integer, created_at :: String, text :: String, user :: User } deriving (Show, Eq) -- |Given some XML content, create a 'Status' from it. status_from_content :: Content i -> (Maybe Status) status_from_content content = if (length status_ids) == 0 || (length created_ats) == 0 || (length texts) == 0 || (length users) == 0 then Nothing else case first_status_id of Nothing -> Nothing (Just status_id_data) -> case first_created_at of Nothing -> Nothing (Just created_at_data) -> case first_user of Nothing -> Nothing (Just user_object) -> case (reads status_id_data :: [(Integer, String)]) of [] -> Nothing parseresult:_ -> Just (Status (fst parseresult) created_at_data all_text user_object) where status_ids = (unique_id content) first_status_id = get_char_data (status_ids !! 0) created_ats = (status_created_at content) first_created_at = get_char_data (created_ats !! 0) texts = (status_text content) all_text = concat $ catMaybes (map get_char_data texts) users = (status_user content) first_user = user_from_content (users !! 0) -- |Takes an XML String as an argument, and returns the -- status that was parsed from it. Should only be used -- on XML string where a is a top-level element. parse_status :: String -> [Status] parse_status xml_data = catMaybes maybe_status where (Document _ _ root _) = xmlParse xml_file_name xml_data root_elem = CElem root noPos status_element = (single_status root_elem) maybe_status = map status_from_content status_element -- |Takes an XML String as an argument, and returns the list of -- statuses that can be parsed from it. parse_statuses :: String -> [Status] parse_statuses xml_data = catMaybes maybe_statuses where (Document _ _ root _) = xmlParse xml_file_name xml_data root_elem = CElem root noPos status_elements = (all_statuses root_elem) maybe_statuses = map status_from_content status_elements -- |This is a required parameter to the xmlParse function used in -- error reporting. We're not parsing a function, though, so we leave -- it blank. xml_file_name :: String xml_file_name = "" created_at_to_rfc822 :: String -> Maybe String created_at_to_rfc822 s = case reads_result of [(t,_)] -> Just $ formatTime defaultTimeLocale rfc822DateFormat t _ -> Nothing where -- Should match e.g. "Sun Oct 24 18:21:41 +0000 2010" fmt :: String fmt = "%a %b %d %H:%M:%S %z %Y" reads_result :: [(ZonedTime, String)] reads_result = readsTime defaultTimeLocale fmt s -- |Returns a nicely-formatted String representing the given 'Status' -- object. pretty_print :: Status -> String pretty_print status = concat [ name, " - ", (created_at status), "\n", replicate ((length name) + 3 + (length (created_at status))) '-', "\n", replace_entities (text status), "\n\n", join "\n" user_timeline_urls, "\n" ] where name = screen_name (user status) user_timeline_urls = listify (make_user_timeline_urls status) -- |Given a list of statuses, returns the greatest status_id belonging -- to one of the statuses in the list. get_max_status_id :: [Status] -> Integer get_max_status_id statuses = maximum status_ids where status_ids = map status_id statuses -- |Parse one username from a word. parse_username :: String -> Maybe String parse_username word = case matches of Nothing -> Nothing Just [] -> Nothing Just (first_match:_) -> Just first_match where username_regex = mkRegex "@([a-zA-Z0-9_]+)" matches = matchRegex username_regex word -- |Parse all usernames of the form \@username from a status. parse_usernames_from_status :: Status -> [String] parse_usernames_from_status status = catMaybes (map parse_username status_words) where status_words = splitWs (text status) -- |Get all referenced users' timeline URLs. make_user_timeline_urls :: Status -> [String] make_user_timeline_urls status = map screen_name_to_timeline_url usernames where usernames = parse_usernames_from_status status status_tests :: [Test] status_tests = [ test_parse_usernames ] test_parse_usernames :: Test test_parse_usernames = TestCase $ assertEqual "All usernames are parsed." expected_usernames actual_usernames where dummy_user = User { screen_name = "nobody" } dummy_status = Status { status_id = 1, created_at = "never", text = "Hypothesis: @donsbot and @bonus500 are two personalities belonging to the same person.", user = dummy_user } actual_usernames = parse_usernames_from_status dummy_status expected_usernames = ["donsbot", "bonus500"]