-- | Functions and data for working with Twitter statuses. module Twitter.Status where import Control.Applicative ((<$>), (<*>)) import Control.Monad (liftM) import Data.Aeson ((.:), FromJSON(..), Value(Object)) import Data.Maybe (catMaybes, isJust) import Data.Monoid (mempty) import Data.String.Utils (join, splitWs) import Data.Text (pack) import Data.Time (formatTime) import Data.Time.Clock (UTCTime) import Data.Time.Format (parseTime) import Data.Time.LocalTime (TimeZone, utcToZonedTime) import System.Locale (defaultTimeLocale, rfc822DateFormat) import Test.HUnit import Text.Regex (matchRegex, mkRegex) import StringUtils (listify) import Twitter.User data Status = Status { created_at :: Maybe UTCTime, status_id :: Integer, reply :: Bool, retweeted :: Bool, text :: String, user :: User } deriving (Show, Eq) type Timeline = [Status] instance FromJSON Status where parseJSON (Object t) = Status <$> liftM parse_status_time (t .: created_at_field) <*> (t .: id_field) <*> liftM isJustInt (t .: in_reply_to_status_id_field) <*> (t .: retweeted_field) <*> (t .: text_field) <*> (t .: user_field) where -- The typechecker flips out without this. isJustInt :: Maybe Int -> Bool isJustInt = isJust created_at_field = pack "created_at" id_field = pack "id" in_reply_to_status_id_field = pack "in_reply_to_status_id" retweeted_field = pack "retweeted" text_field = pack "text" user_field = pack "user" -- Do whatever. parseJSON _ = mempty parse_status_time :: String -> Maybe UTCTime parse_status_time = parseTime defaultTimeLocale status_format where -- | Should match e.g. "Sun Oct 24 18:21:41 +0000 2010" status_format :: String status_format = "%a %b %d %H:%M:%S %z %Y" utc_time_to_rfc822 :: Maybe TimeZone -> UTCTime -> String utc_time_to_rfc822 mtz utc = case mtz of Nothing -> foo utc Just tz -> foo $ utcToZonedTime tz utc where foo = formatTime defaultTimeLocale rfc822DateFormat show_created_at :: Maybe TimeZone -> Status -> String show_created_at mtz = (maybe "" id) . (fmap $ utc_time_to_rfc822 mtz) . created_at -- | Returns a nicely-formatted String representing the given 'Status' -- object. pretty_print :: Maybe TimeZone -> Status -> String pretty_print mtz status = concat [ name, " - ", sca, "\n", replicate bar_length '-', "\n", text status, "\n\n", join "\n" user_timeline_urls, "\n" ] where sca = show_created_at mtz status name = screen_name (user status) user_timeline_urls = listify (make_user_timeline_urls status) bar_length = (length name) + 3 + (length sca) -- | Given a list of statuses, returns the greatest status_id -- belonging to one of the statuses in the list. get_max_status_id :: Timeline -> Integer get_max_status_id statuses = maximum status_ids where status_ids = map status_id statuses -- | Parse one username from a word. parse_username :: String -> Maybe String parse_username word = case matches of Nothing -> Nothing Just [] -> Nothing Just (first_match:_) -> Just first_match where username_regex = mkRegex "@([a-zA-Z0-9_]+)" matches = matchRegex username_regex word -- | Parse all usernames of the form \@username from a status. parse_usernames_from_status :: Status -> [String] parse_usernames_from_status status = catMaybes (map parse_username status_words) where status_words = splitWs (text status) -- | Get all referenced users' timeline URLs. make_user_timeline_urls :: Status -> [String] make_user_timeline_urls status = map screen_name_to_timeline_url usernames where usernames = parse_usernames_from_status status status_tests :: [Test] status_tests = [ test_parse_usernames ] test_parse_usernames :: Test test_parse_usernames = TestCase $ assertEqual "All usernames are parsed." expected_usernames actual_usernames where dummy_user = User { screen_name = "nobody" } dummy_status = Status { status_id = 1, created_at = Nothing, text = "Hypothesis: @donsbot and @bonus500 are two personalities belonging to the same person.", user = dummy_user, reply = False, retweeted = False } actual_usernames = parse_usernames_from_status dummy_status expected_usernames = ["donsbot", "bonus500"]