--- |Functions and data for working with Twitter statuses.
-module Twitter.Status
+{-# LANGUAGE NoMonomorphismRestriction #-}
+
+-- | Functions and data for working with Twitter statuses.
+module Twitter.Status (
+ Status(..),
+ Timeline,
+ get_max_status_id,
+ pretty_print,
+ status_tests,
+ utc_time_to_rfc822 )
where
-import Data.Maybe
-import Text.XML.HaXml
-
-import Twitter.User
-import Twitter.Xml
-
--- |Represents one Twitter status. We don't care about any of their
--- other properties.
-data Status = Status { status_id :: Integer,
- created_at :: String,
- text :: String,
- user :: User }
- deriving (Show, Eq)
-
-
--- |Given some XML content, create a 'Status' from it.
-status_from_content :: Content -> (Maybe Status)
-status_from_content content =
-
- if (length status_ids) == 0
- || (length created_ats) == 0
- || (length texts) == 0
- || (length users) == 0
- then
- Nothing
- else
- case first_status_id of
- Nothing -> Nothing
- (Just status_id_data) ->
- case first_created_at of
- Nothing -> Nothing
- (Just created_at_data) ->
- case first_user of
- Nothing -> Nothing
- (Just user_object) ->
- case (reads status_id_data :: [(Integer, String)]) of
- [] -> Nothing
- parseresult:_ -> Just (Status (fst parseresult) created_at_data all_text user_object)
+import Control.Applicative ( (<$>), (<*>) )
+import Control.Monad ( liftM )
+import Data.Aeson ( (.:), FromJSON(..), Value(Object) )
+import Data.Maybe ( mapMaybe, isJust )
+import Data.Monoid ( mempty )
+import Data.String.Utils ( join, splitWs )
+import Data.Text ( pack )
+import Data.Time ( formatTime )
+import Data.Time.Clock ( UTCTime )
+import Data.Time.Format ( parseTime )
+import Data.Time.LocalTime ( TimeZone, utcToZonedTime )
+import System.Locale ( defaultTimeLocale, rfc822DateFormat )
+import Test.Tasty ( TestTree, testGroup )
+import Test.Tasty.HUnit ( (@?=), testCase )
- where
- status_ids = (unique_id content)
- first_status_id = get_char_data (status_ids !! 0)
+import Html ( replace_entities )
+import StringUtils ( listify )
+import Twitter.User ( User(..), screen_name_to_timeline_url )
- created_ats = (status_created_at content)
- first_created_at = get_char_data (created_ats !! 0)
- texts = (status_text content)
- all_text = concat $ catMaybes (map get_char_data texts)
+-- | Representation of a Twitter user status. We only care about a few
+-- of the fields, and those are all that we bother to include in the
+-- representation.
+--
+data Status = Status {
+ created_at :: Maybe UTCTime,
+ status_id :: Integer,
+ reply :: Bool,
+ retweeted :: Bool,
+ text :: String,
+ user :: User }
+ deriving (Eq, Show)
- users = (status_user content)
- first_user = user_from_content (users !! 0)
+type Timeline = [Status]
--- |Takes an XML String as an argument, and returns the
--- status that was parsed from it. Should only be used
--- on XML string where a <status> is a top-level element.
-parse_status :: String -> [Status]
-parse_status xml_data =
- catMaybes maybe_status
+instance FromJSON Status where
+ -- | Use a bunch of applicative magic to parse a 'Status' out of the
+ -- JSON that we get from the Twitter API.
+ --
+ parseJSON (Object t) =
+ Status <$>
+ liftM parse_status_time (t .: created_at_field) <*>
+ (t .: id_field) <*>
+ liftM isJustInt (t .: in_reply_to_status_id_field) <*>
+ (t .: retweeted_field) <*>
+ liftM replace_entities (t .: text_field) <*>
+ (t .: user_field)
where
- (Document _ _ root _) = xmlParse xml_file_name xml_data
- root_elem = CElem root
- status_element = (single_status root_elem)
- maybe_status = map status_from_content status_element
+ -- | The typechecker flips out without this; it's just a copy if
+ -- 'isJust' specialized to the 'Int' type.
+ --
+ isJustInt :: Maybe Int -> Bool
+ isJustInt = isJust
+ created_at_field = pack "created_at"
+ id_field = pack "id"
+ in_reply_to_status_id_field = pack "in_reply_to_status_id"
+ retweeted_field = pack "retweeted"
+ text_field = pack "text"
+ user_field = pack "user"
--- |Takes an XML String as an argument, and returns the list of
--- statuses that can be parsed from it.
-parse_statuses :: String -> [Status]
-parse_statuses xml_data =
- catMaybes maybe_statuses
- where
- (Document _ _ root _) = xmlParse xml_file_name xml_data
- root_elem = CElem root
- status_elements = (all_statuses root_elem)
- maybe_statuses = map status_from_content status_elements
-
-
--- |This is a required parameter to the xmlParse function used in
--- error reporting. We're not parsing a function, though, so we leave
--- it blank.
-xml_file_name :: String
-xml_file_name = ""
-
--- |Returns a nicely-formatted String representing the given 'Status'
--- object.
-pretty_print :: Status -> String
-pretty_print status =
- concat [ name,
- " - ",
- (created_at status),
- "\n",
- replicate ((length name) + 3 + (length (created_at status))) '-',
- "\n",
- replace_entities (text status),
- "\n" ]
- where
- name = screen_name (user status)
-
-
-
--- |Given a list of statuses, returns the greatest status_id belonging
--- to one of the statuses in the list.
-get_max_status_id :: [Status] -> Integer
+ -- Do whatever.
+ parseJSON _ = mempty
+
+
+-- | Parse a timestamp from a status into a UTCTime (or Nothing).
+--
+-- Examples:
+--
+-- >>> let s = "Sun Oct 24 18:21:41 +0000 2010"
+-- >>> parse_status_time s
+-- Just 2010-10-24 18:21:41 UTC
+--
+-- >>> parse_status_time "what's up dawg"
+-- Nothing
+--
+parse_status_time :: String -> Maybe UTCTime
+parse_status_time =
+ parseTime defaultTimeLocale status_format
+ where
+ status_format :: String
+ status_format = "%a %b %d %H:%M:%S %z %Y"
+
+
+-- | Given a 'TimeZone', convert a 'UTCTime' into an RFC822-format
+-- time string. If no 'TimeZone' is given, assume UTC.
+--
+-- Examples:
+--
+-- >>> let s = "Sun Oct 24 18:21:41 +0000 2010"
+-- >>> let Just t = parse_status_time s
+-- >>> utc_time_to_rfc822 Nothing t
+-- "Sun, 24 Oct 2010 18:21:41 UTC"
+--
+utc_time_to_rfc822 :: Maybe TimeZone -> UTCTime -> String
+utc_time_to_rfc822 mtz utc =
+ case mtz of
+ Nothing -> foo utc
+ Just tz -> foo $ utcToZonedTime tz utc
+ where
+ foo = formatTime defaultTimeLocale rfc822DateFormat
+
+
+-- | Get the 'created_at' time out of a 'Status' and display it as an
+-- RFC822-format time string. If there's no created-at time in the
+-- status, you'll get an empty string instead.
+--
+-- >>> let u = User "washington_irving"
+-- >>> let created = parse_status_time "Sun Oct 24 18:21:41 +0000 2010"
+-- >>> let s = Status created 8675309 False False "IM TWITTERING" u
+-- >>> show_created_at Nothing s
+-- "Sun, 24 Oct 2010 18:21:41 UTC"
+-- >>> show_created_at Nothing s{ created_at = Nothing }
+-- ""
+--
+show_created_at :: Maybe TimeZone -> Status -> String
+show_created_at mtz =
+ (maybe "" (utc_time_to_rfc822 mtz)) . created_at
+
+
+-- | Returns a nicely-formatted String representing the given 'Status'
+-- object.
+--
+-- Examples:
+--
+-- >>> let u = User "washington_irving"
+-- >>> let created = parse_status_time "Sun Oct 24 18:21:41 +0000 2010"
+-- >>> let s = Status created 8675309 False False "IM TWITTERING" u
+-- >>> putStr $ pretty_print Nothing s
+-- washington_irving - Sun, 24 Oct 2010 18:21:41 UTC
+-- -------------------------------------------------
+-- IM TWITTERING
+-- <BLANKLINE>
+-- <BLANKLINE>
+--
+pretty_print :: Maybe TimeZone -> Status -> String
+pretty_print mtz status =
+ concat [ name,
+ " - ",
+ sca,
+ "\n",
+ replicate bar_length '-',
+ "\n",
+ text status,
+ "\n\n",
+ join "\n" user_timeline_urls,
+ "\n" ]
+ where
+ sca = show_created_at mtz status
+ name = screen_name (user status)
+ user_timeline_urls = listify (make_user_timeline_urls status)
+ bar_length = (length name) + 3 + (length sca)
+
+
+-- | Given a list of statuses, returns the greatest status_id
+-- belonging to one of the statuses in the list.
+--
+-- Examples:
+--
+-- >>> let u = User "washington_irving"
+-- >>> let created = parse_status_time "Sun Oct 24 18:21:41 +0000 2010"
+-- >>> let s = Status created 8675309 False False "IM TWITTERING" u
+-- >>> let timeline = [s,s,s,s,s]
+-- >>> get_max_status_id timeline
+-- 8675309
+--
+get_max_status_id :: Timeline -> Integer
get_max_status_id statuses =
maximum status_ids
where
status_ids = map status_id statuses
+
+
+-- | Parse one username from a 'String'.
+--
+-- Examples:
+--
+-- >>> parse_username "@washington_irving"
+-- Just "washington_irving"
+-- >>> parse_username "washington_irving"
+-- Nothing
+-- >>> parse_username "Everbody loves @washington_irving, even Raymond"
+-- Just "washington_irving"
+--
+-- >>> parse_username "herp @@@ derp @washington_irving foo@@BAR"
+-- Just "washington_irving"
+--
+-- >>> parse_username "tailing at sign y'all @"
+-- Nothing
+--
+parse_username :: String -> Maybe String
+parse_username s
+ | null parse_result = Nothing
+ | otherwise = Just parse_result
+ where
+ -- | A list of characters valid in a Twitter username.
+ --
+ username_chars :: String
+ username_chars = ['a'..'z'] ++ ['A'..'Z'] ++ ['0'..'9'] ++ "_"
+
+ -- | Take a string and drop everything (including the \'@\') up to
+ -- the first character of the first username (if one exists).
+ --
+ start_name :: String -> String
+ start_name w =
+ case dropWhile (/= '@') w of
+ [] -> []
+ (_:xs) -> xs
+
+ parse_userchars :: String -> String
+ parse_userchars = takeWhile (`elem` username_chars)
+
+ -- | Parse a username from the given String by dropping all
+ -- characters that don't belong to it. This function calls
+ -- itself recursively until it gets a username or runs out of
+ -- string.
+ --
+ parse_name :: String -> String
+ parse_name [] = []
+ parse_name rest@(_:xs) =
+ let ucs = (parse_userchars . start_name) rest in
+ case ucs of
+ [] -> parse_name xs
+ _ -> ucs
+
+ parse_result :: String
+ parse_result = parse_name s
+
+
+-- | Parse all usernames of the form \@username from a status.
+--
+-- Examples:
+--
+-- >>> let u = User "washington_irving"
+-- >>> let b = "YO WHERE'S @BONUS500 and @@@ I LOVE @AT SIGNS@"
+-- >>> let s = Status Nothing 8675309 False False b u
+-- >>> parse_usernames_from_status s
+-- ["BONUS500","AT"]
+--
+parse_usernames_from_status :: Status -> [String]
+parse_usernames_from_status status =
+ mapMaybe parse_username status_words
+ where
+ status_words = splitWs (text status)
+
+
+-- | Get all referenced users' timeline URLs.
+--
+make_user_timeline_urls :: Status -> [String]
+make_user_timeline_urls status =
+ map screen_name_to_timeline_url usernames
+ where
+ usernames = parse_usernames_from_status status
+
+
+status_tests :: TestTree
+status_tests =
+ testGroup "Status Tests" [ test_parse_usernames ]
+
+
+test_parse_usernames :: TestTree
+test_parse_usernames =
+ testCase description $ actual @?= expected
+ where
+ description = "all usernames are parsed"
+
+ dummy_user = User { screen_name = "nobody" }
+ dummy_text = "Hypothesis: @donsbot and @bonus500 are two " ++
+ "personalities belonging to the same person."
+ dummy_status = Status { status_id = 1,
+ created_at = Nothing,
+ text = dummy_text,
+ user = dummy_user,
+ reply = False,
+ retweeted = False
+ }
+
+ actual = parse_usernames_from_status dummy_status
+ expected = ["donsbot", "bonus500"]