X-Git-Url: http://gitweb.michael.orlitzky.com/?a=blobdiff_plain;f=src%2FTwitter%2FStatus.hs;h=640072c69102da982d3a02a77c5ef527509783b6;hb=HEAD;hp=93b3f4e1cc2621cebac388766d567fa2401a0f0d;hpb=94484087fbfe98d6735aa82798a9bf506f97fd19;p=dead%2Fhalcyon.git diff --git a/src/Twitter/Status.hs b/src/Twitter/Status.hs index 93b3f4e..640072c 100644 --- a/src/Twitter/Status.hs +++ b/src/Twitter/Status.hs @@ -1,91 +1,292 @@ -module Twitter.Status +{-# LANGUAGE NoMonomorphismRestriction #-} + +-- | Functions and data for working with Twitter statuses. +module Twitter.Status ( + Status(..), + Timeline, + get_max_status_id, + pretty_print, + status_tests, + utc_time_to_rfc822 ) where -import Data.Maybe -import Text.XML.HaXml - -import Twitter.User -import Twitter.Xml - -data Status = Status { status_id :: Integer, - created_at :: String, - text :: String, - user :: User } - deriving (Show, Eq) - - - -status_from_content :: Content -> (Maybe Status) -status_from_content content = - - if (length status_ids) == 0 - || (length created_ats) == 0 - || (length texts) == 0 - || (length users) == 0 - then - Nothing - else - case first_status_id of - Nothing -> Nothing - (Just status_id_data) -> - case first_created_at of - Nothing -> Nothing - (Just created_at_data) -> - case first_user of - Nothing -> Nothing - (Just user_object) -> - case (reads status_id_data :: [(Integer, String)]) of - [] -> Nothing - parseresult:_ -> Just (Status (fst parseresult) created_at_data all_text user_object) +import Control.Applicative ( (<$>), (<*>) ) +import Control.Monad ( liftM ) +import Data.Aeson ( (.:), FromJSON(..), Value(Object) ) +import Data.Maybe ( mapMaybe, isJust ) +import Data.Monoid ( mempty ) +import Data.String.Utils ( join, splitWs ) +import Data.Text ( pack ) +import Data.Time ( formatTime ) +import Data.Time.Clock ( UTCTime ) +import Data.Time.Format ( parseTime ) +import Data.Time.LocalTime ( TimeZone, utcToZonedTime ) +import System.Locale ( defaultTimeLocale, rfc822DateFormat ) +import Test.Tasty ( TestTree, testGroup ) +import Test.Tasty.HUnit ( (@?=), testCase ) + +import Html ( replace_entities ) +import StringUtils ( listify ) +import Twitter.User ( User(..), screen_name_to_timeline_url ) + + +-- | Representation of a Twitter user status. We only care about a few +-- of the fields, and those are all that we bother to include in the +-- representation. +-- +data Status = Status { + created_at :: Maybe UTCTime, + status_id :: Integer, + reply :: Bool, + retweeted :: Bool, + text :: String, + user :: User } + deriving (Eq, Show) + +type Timeline = [Status] + +instance FromJSON Status where + -- | Use a bunch of applicative magic to parse a 'Status' out of the + -- JSON that we get from the Twitter API. + -- + parseJSON (Object t) = + Status <$> + liftM parse_status_time (t .: created_at_field) <*> + (t .: id_field) <*> + liftM isJustInt (t .: in_reply_to_status_id_field) <*> + (t .: retweeted_field) <*> + liftM replace_entities (t .: text_field) <*> + (t .: user_field) where - status_ids = (unique_id content) - first_status_id = get_char_data (status_ids !! 0) + -- | The typechecker flips out without this; it's just a copy if + -- 'isJust' specialized to the 'Int' type. + -- + isJustInt :: Maybe Int -> Bool + isJustInt = isJust - created_ats = (status_created_at content) - first_created_at = get_char_data (created_ats !! 0) + created_at_field = pack "created_at" + id_field = pack "id" + in_reply_to_status_id_field = pack "in_reply_to_status_id" + retweeted_field = pack "retweeted" + text_field = pack "text" + user_field = pack "user" - texts = (status_text content) - all_text = concat $ catMaybes (map get_char_data texts) + -- Do whatever. + parseJSON _ = mempty - users = (status_user content) - first_user = user_from_content (users !! 0) +-- | Parse a timestamp from a status into a UTCTime (or Nothing). +-- +-- Examples: +-- +-- >>> let s = "Sun Oct 24 18:21:41 +0000 2010" +-- >>> parse_status_time s +-- Just 2010-10-24 18:21:41 UTC +-- +-- >>> parse_status_time "what's up dawg" +-- Nothing +-- +parse_status_time :: String -> Maybe UTCTime +parse_status_time = + parseTime defaultTimeLocale status_format + where + status_format :: String + status_format = "%a %b %d %H:%M:%S %z %Y" -parse_statuses :: String -> [Status] -parse_statuses xml_data = - catMaybes maybe_statuses - where - (Document _ _ root _) = xmlParse xml_file_name xml_data - root_elem = CElem root - status_elements = (all_statuses root_elem) - maybe_statuses = map status_from_content status_elements - - --- This is a required parameter to the xmlParse function used in --- error reporting. We're not parsing a function, though, so we --- leave it blank. -xml_file_name :: String -xml_file_name = "" - -pretty_print :: Status -> String -pretty_print status = - concat [ name, - " - ", - (created_at status), - "\n", - replicate ((length name) + 3 + (length (created_at status))) '-', - "\n", - replace_entities (text status), - "\n" ] - where - name = screen_name (user status) - - - -get_max_status_id :: [Status] -> Integer +-- | Given a 'TimeZone', convert a 'UTCTime' into an RFC822-format +-- time string. If no 'TimeZone' is given, assume UTC. +-- +-- Examples: +-- +-- >>> let s = "Sun Oct 24 18:21:41 +0000 2010" +-- >>> let Just t = parse_status_time s +-- >>> utc_time_to_rfc822 Nothing t +-- "Sun, 24 Oct 2010 18:21:41 UTC" +-- +utc_time_to_rfc822 :: Maybe TimeZone -> UTCTime -> String +utc_time_to_rfc822 mtz utc = + case mtz of + Nothing -> foo utc + Just tz -> foo $ utcToZonedTime tz utc + where + foo = formatTime defaultTimeLocale rfc822DateFormat + + +-- | Get the 'created_at' time out of a 'Status' and display it as an +-- RFC822-format time string. If there's no created-at time in the +-- status, you'll get an empty string instead. +-- +-- >>> let u = User "washington_irving" +-- >>> let created = parse_status_time "Sun Oct 24 18:21:41 +0000 2010" +-- >>> let s = Status created 8675309 False False "IM TWITTERING" u +-- >>> show_created_at Nothing s +-- "Sun, 24 Oct 2010 18:21:41 UTC" +-- >>> show_created_at Nothing s{ created_at = Nothing } +-- "" +-- +show_created_at :: Maybe TimeZone -> Status -> String +show_created_at mtz = + (maybe "" (utc_time_to_rfc822 mtz)) . created_at + + +-- | Returns a nicely-formatted String representing the given 'Status' +-- object. +-- +-- Examples: +-- +-- >>> let u = User "washington_irving" +-- >>> let created = parse_status_time "Sun Oct 24 18:21:41 +0000 2010" +-- >>> let s = Status created 8675309 False False "IM TWITTERING" u +-- >>> putStr $ pretty_print Nothing s +-- washington_irving - Sun, 24 Oct 2010 18:21:41 UTC +-- ------------------------------------------------- +-- IM TWITTERING +-- +-- +-- +pretty_print :: Maybe TimeZone -> Status -> String +pretty_print mtz status = + concat [ name, + " - ", + sca, + "\n", + replicate bar_length '-', + "\n", + text status, + "\n\n", + join "\n" user_timeline_urls, + "\n" ] + where + sca = show_created_at mtz status + name = screen_name (user status) + user_timeline_urls = listify (make_user_timeline_urls status) + bar_length = (length name) + 3 + (length sca) + + +-- | Given a list of statuses, returns the greatest status_id +-- belonging to one of the statuses in the list. +-- +-- Examples: +-- +-- >>> let u = User "washington_irving" +-- >>> let created = parse_status_time "Sun Oct 24 18:21:41 +0000 2010" +-- >>> let s = Status created 8675309 False False "IM TWITTERING" u +-- >>> let timeline = [s,s,s,s,s] +-- >>> get_max_status_id timeline +-- 8675309 +-- +get_max_status_id :: Timeline -> Integer get_max_status_id statuses = maximum status_ids where - status_ids = map status_id statuses \ No newline at end of file + status_ids = map status_id statuses + + +-- | Parse one username from a 'String'. +-- +-- Examples: +-- +-- >>> parse_username "@washington_irving" +-- Just "washington_irving" +-- >>> parse_username "washington_irving" +-- Nothing +-- >>> parse_username "Everbody loves @washington_irving, even Raymond" +-- Just "washington_irving" +-- +-- >>> parse_username "herp @@@ derp @washington_irving foo@@BAR" +-- Just "washington_irving" +-- +-- >>> parse_username "tailing at sign y'all @" +-- Nothing +-- +parse_username :: String -> Maybe String +parse_username s + | null parse_result = Nothing + | otherwise = Just parse_result + where + -- | A list of characters valid in a Twitter username. + -- + username_chars :: String + username_chars = ['a'..'z'] ++ ['A'..'Z'] ++ ['0'..'9'] ++ "_" + + -- | Take a string and drop everything (including the \'@\') up to + -- the first character of the first username (if one exists). + -- + start_name :: String -> String + start_name w = + case dropWhile (/= '@') w of + [] -> [] + (_:xs) -> xs + + parse_userchars :: String -> String + parse_userchars = takeWhile (`elem` username_chars) + + -- | Parse a username from the given String by dropping all + -- characters that don't belong to it. This function calls + -- itself recursively until it gets a username or runs out of + -- string. + -- + parse_name :: String -> String + parse_name [] = [] + parse_name rest@(_:xs) = + let ucs = (parse_userchars . start_name) rest in + case ucs of + [] -> parse_name xs + _ -> ucs + + parse_result :: String + parse_result = parse_name s + + +-- | Parse all usernames of the form \@username from a status. +-- +-- Examples: +-- +-- >>> let u = User "washington_irving" +-- >>> let b = "YO WHERE'S @BONUS500 and @@@ I LOVE @AT SIGNS@" +-- >>> let s = Status Nothing 8675309 False False b u +-- >>> parse_usernames_from_status s +-- ["BONUS500","AT"] +-- +parse_usernames_from_status :: Status -> [String] +parse_usernames_from_status status = + mapMaybe parse_username status_words + where + status_words = splitWs (text status) + + +-- | Get all referenced users' timeline URLs. +-- +make_user_timeline_urls :: Status -> [String] +make_user_timeline_urls status = + map screen_name_to_timeline_url usernames + where + usernames = parse_usernames_from_status status + + +status_tests :: TestTree +status_tests = + testGroup "Status Tests" [ test_parse_usernames ] + + +test_parse_usernames :: TestTree +test_parse_usernames = + testCase description $ actual @?= expected + where + description = "all usernames are parsed" + + dummy_user = User { screen_name = "nobody" } + dummy_text = "Hypothesis: @donsbot and @bonus500 are two " ++ + "personalities belonging to the same person." + dummy_status = Status { status_id = 1, + created_at = Nothing, + text = dummy_text, + user = dummy_user, + reply = False, + retweeted = False + } + + actual = parse_usernames_from_status dummy_status + expected = ["donsbot", "bonus500"]