+{-# LANGUAGE NoMonomorphismRestriction #-}
+
-- | Functions and data for working with Twitter statuses.
-module Twitter.Status
+module Twitter.Status (
+ Status(..),
+ Timeline,
+ get_max_status_id,
+ pretty_print,
+ status_tests,
+ utc_time_to_rfc822 )
where
-import Control.Applicative ((<$>), (<*>))
-import Control.Monad (liftM)
-import Data.Aeson ((.:), FromJSON(..), Value(Object))
-import Data.Maybe (catMaybes, isJust)
-import Data.Monoid (mempty)
-import Data.String.Utils (join, splitWs)
-import Data.Text (pack)
-import Data.Time (formatTime)
-import Data.Time.Clock (UTCTime)
-import Data.Time.Format (parseTime)
-import Data.Time.LocalTime (TimeZone, utcToZonedTime)
-import System.Locale (defaultTimeLocale, rfc822DateFormat)
-import Test.HUnit
-import Text.Regex (matchRegex, mkRegex)
-
-import StringUtils (listify)
-import Twitter.User
+import Control.Applicative ( (<$>), (<*>) )
+import Control.Monad ( liftM )
+import Data.Aeson ( (.:), FromJSON(..), Value(Object) )
+import Data.Maybe ( mapMaybe, isJust )
+import Data.Monoid ( mempty )
+import Data.String.Utils ( join, splitWs )
+import Data.Text ( pack )
+import Data.Time ( formatTime )
+import Data.Time.Clock ( UTCTime )
+import Data.Time.Format ( parseTime )
+import Data.Time.LocalTime ( TimeZone, utcToZonedTime )
+import System.Locale ( defaultTimeLocale, rfc822DateFormat )
+import Test.Tasty ( TestTree, testGroup )
+import Test.Tasty.HUnit ( (@?=), testCase )
+
+import Html ( replace_entities )
+import StringUtils ( listify )
+import Twitter.User ( User(..), screen_name_to_timeline_url )
+
+-- | Representation of a Twitter user status. We only care about a few
+-- of the fields, and those are all that we bother to include in the
+-- representation.
+--
data Status = Status {
created_at :: Maybe UTCTime,
status_id :: Integer,
reply :: Bool,
retweeted :: Bool,
text :: String,
- user :: User
- } deriving (Show, Eq)
+ user :: User }
+ deriving (Eq, Show)
type Timeline = [Status]
+
instance FromJSON Status where
+ -- | Use a bunch of applicative magic to parse a 'Status' out of the
+ -- JSON that we get from the Twitter API.
+ --
parseJSON (Object t) =
Status <$>
liftM parse_status_time (t .: created_at_field) <*>
(t .: id_field) <*>
liftM isJustInt (t .: in_reply_to_status_id_field) <*>
(t .: retweeted_field) <*>
- (t .: text_field) <*>
+ liftM replace_entities (t .: text_field) <*>
(t .: user_field)
where
- -- The typechecker flips out without this.
+ -- | The typechecker flips out without this; it's just a copy if
+ -- 'isJust' specialized to the 'Int' type.
+ --
isJustInt :: Maybe Int -> Bool
isJustInt = isJust
-- Do whatever.
parseJSON _ = mempty
+
+-- | Parse a timestamp from a status into a UTCTime (or Nothing).
+--
+-- Examples:
+--
+-- >>> let s = "Sun Oct 24 18:21:41 +0000 2010"
+-- >>> parse_status_time s
+-- Just 2010-10-24 18:21:41 UTC
+--
+-- >>> parse_status_time "what's up dawg"
+-- Nothing
+--
parse_status_time :: String -> Maybe UTCTime
parse_status_time =
parseTime defaultTimeLocale status_format
where
- -- | Should match e.g. "Sun Oct 24 18:21:41 +0000 2010"
status_format :: String
status_format = "%a %b %d %H:%M:%S %z %Y"
+
+-- | Given a 'TimeZone', convert a 'UTCTime' into an RFC822-format
+-- time string. If no 'TimeZone' is given, assume UTC.
+--
+-- Examples:
+--
+-- >>> let s = "Sun Oct 24 18:21:41 +0000 2010"
+-- >>> let Just t = parse_status_time s
+-- >>> utc_time_to_rfc822 Nothing t
+-- "Sun, 24 Oct 2010 18:21:41 UTC"
+--
utc_time_to_rfc822 :: Maybe TimeZone -> UTCTime -> String
utc_time_to_rfc822 mtz utc =
case mtz of
foo = formatTime defaultTimeLocale rfc822DateFormat
+-- | Get the 'created_at' time out of a 'Status' and display it as an
+-- RFC822-format time string. If there's no created-at time in the
+-- status, you'll get an empty string instead.
+--
+-- >>> let u = User "washington_irving"
+-- >>> let created = parse_status_time "Sun Oct 24 18:21:41 +0000 2010"
+-- >>> let s = Status created 8675309 False False "IM TWITTERING" u
+-- >>> show_created_at Nothing s
+-- "Sun, 24 Oct 2010 18:21:41 UTC"
+-- >>> show_created_at Nothing s{ created_at = Nothing }
+-- ""
+--
show_created_at :: Maybe TimeZone -> Status -> String
show_created_at mtz =
- (maybe "" id) . (fmap $ utc_time_to_rfc822 mtz) . created_at
+ (maybe "" (utc_time_to_rfc822 mtz)) . created_at
+
-- | Returns a nicely-formatted String representing the given 'Status'
-- object.
+--
+-- Examples:
+--
+-- >>> let u = User "washington_irving"
+-- >>> let created = parse_status_time "Sun Oct 24 18:21:41 +0000 2010"
+-- >>> let s = Status created 8675309 False False "IM TWITTERING" u
+-- >>> putStr $ pretty_print Nothing s
+-- washington_irving - Sun, 24 Oct 2010 18:21:41 UTC
+-- -------------------------------------------------
+-- IM TWITTERING
+-- <BLANKLINE>
+-- <BLANKLINE>
+--
pretty_print :: Maybe TimeZone -> Status -> String
pretty_print mtz status =
concat [ name,
-- | Given a list of statuses, returns the greatest status_id
-- belonging to one of the statuses in the list.
+--
+-- Examples:
+--
+-- >>> let u = User "washington_irving"
+-- >>> let created = parse_status_time "Sun Oct 24 18:21:41 +0000 2010"
+-- >>> let s = Status created 8675309 False False "IM TWITTERING" u
+-- >>> let timeline = [s,s,s,s,s]
+-- >>> get_max_status_id timeline
+-- 8675309
+--
get_max_status_id :: Timeline -> Integer
get_max_status_id statuses =
maximum status_ids
status_ids = map status_id statuses
--- | Parse one username from a word.
+-- | Parse one username from a 'String'.
+--
+-- Examples:
+--
+-- >>> parse_username "@washington_irving"
+-- Just "washington_irving"
+-- >>> parse_username "washington_irving"
+-- Nothing
+-- >>> parse_username "Everbody loves @washington_irving, even Raymond"
+-- Just "washington_irving"
+--
+-- >>> parse_username "herp @@@ derp @washington_irving foo@@BAR"
+-- Just "washington_irving"
+--
+-- >>> parse_username "tailing at sign y'all @"
+-- Nothing
+--
parse_username :: String -> Maybe String
-parse_username word =
- case matches of
- Nothing -> Nothing
- Just [] -> Nothing
- Just (first_match:_) -> Just first_match
+parse_username s
+ | null parse_result = Nothing
+ | otherwise = Just parse_result
where
- username_regex = mkRegex "@([a-zA-Z0-9_]+)"
- matches = matchRegex username_regex word
+ -- | A list of characters valid in a Twitter username.
+ --
+ username_chars :: String
+ username_chars = ['a'..'z'] ++ ['A'..'Z'] ++ ['0'..'9'] ++ "_"
+
+ -- | Take a string and drop everything (including the \'@\') up to
+ -- the first character of the first username (if one exists).
+ --
+ start_name :: String -> String
+ start_name w =
+ case dropWhile (/= '@') w of
+ [] -> []
+ (_:xs) -> xs
+
+ parse_userchars :: String -> String
+ parse_userchars = takeWhile (`elem` username_chars)
+
+ -- | Parse a username from the given String by dropping all
+ -- characters that don't belong to it. This function calls
+ -- itself recursively until it gets a username or runs out of
+ -- string.
+ --
+ parse_name :: String -> String
+ parse_name [] = []
+ parse_name rest@(_:xs) =
+ let ucs = (parse_userchars . start_name) rest in
+ case ucs of
+ [] -> parse_name xs
+ _ -> ucs
+
+ parse_result :: String
+ parse_result = parse_name s
-- | Parse all usernames of the form \@username from a status.
+--
+-- Examples:
+--
+-- >>> let u = User "washington_irving"
+-- >>> let b = "YO WHERE'S @BONUS500 and @@@ I LOVE @AT SIGNS@"
+-- >>> let s = Status Nothing 8675309 False False b u
+-- >>> parse_usernames_from_status s
+-- ["BONUS500","AT"]
+--
parse_usernames_from_status :: Status -> [String]
parse_usernames_from_status status =
- catMaybes (map parse_username status_words)
+ mapMaybe parse_username status_words
where
status_words = splitWs (text status)
+
-- | Get all referenced users' timeline URLs.
+--
make_user_timeline_urls :: Status -> [String]
make_user_timeline_urls status =
map screen_name_to_timeline_url usernames
usernames = parse_usernames_from_status status
-status_tests :: [Test]
-status_tests = [ test_parse_usernames ]
+status_tests :: TestTree
+status_tests =
+ testGroup "Status Tests" [ test_parse_usernames ]
-test_parse_usernames :: Test
+test_parse_usernames :: TestTree
test_parse_usernames =
- TestCase $
- assertEqual
- "All usernames are parsed."
- expected_usernames
- actual_usernames
- where
- dummy_user = User { screen_name = "nobody" }
- dummy_status = Status { status_id = 1,
- created_at = Nothing,
- text = "Hypothesis: @donsbot and @bonus500 are two personalities belonging to the same person.",
- user = dummy_user,
- reply = False,
- retweeted = False
- }
-
- actual_usernames = parse_usernames_from_status dummy_status
- expected_usernames = ["donsbot", "bonus500"]
+ testCase description $ actual @?= expected
+ where
+ description = "all usernames are parsed"
+
+ dummy_user = User { screen_name = "nobody" }
+ dummy_text = "Hypothesis: @donsbot and @bonus500 are two " ++
+ "personalities belonging to the same person."
+ dummy_status = Status { status_id = 1,
+ created_at = Nothing,
+ text = dummy_text,
+ user = dummy_user,
+ reply = False,
+ retweeted = False
+ }
+
+ actual = parse_usernames_from_status dummy_status
+ expected = ["donsbot", "bonus500"]