X-Git-Url: http://gitweb.michael.orlitzky.com/?a=blobdiff_plain;f=src%2FTwitter%2FStatus.hs;h=640072c69102da982d3a02a77c5ef527509783b6;hb=HEAD;hp=191b8e85b734820bf73541ec9e55d621a0e0ba73;hpb=4cc476a2714260980899ca5358196bbf5226b3c2;p=dead%2Fhalcyon.git diff --git a/src/Twitter/Status.hs b/src/Twitter/Status.hs index 191b8e8..640072c 100644 --- a/src/Twitter/Status.hs +++ b/src/Twitter/Status.hs @@ -1,42 +1,55 @@ {-# LANGUAGE NoMonomorphismRestriction #-} -- | Functions and data for working with Twitter statuses. -module Twitter.Status +module Twitter.Status ( + Status(..), + Timeline, + get_max_status_id, + pretty_print, + status_tests, + utc_time_to_rfc822 ) where -import Control.Applicative ((<$>), (<*>)) -import Control.Monad (liftM) -import Data.Aeson ((.:), FromJSON(..), Value(Object)) -import Data.Maybe (fromMaybe, mapMaybe, isJust) -import Data.Monoid (mempty) -import Data.String.Utils (join, splitWs) -import Data.Text (pack) -import Data.Time (formatTime) -import Data.Time.Clock (UTCTime) -import Data.Time.Format (parseTime) -import Data.Time.LocalTime (TimeZone, utcToZonedTime) -import System.Locale (defaultTimeLocale, rfc822DateFormat) -import Test.Framework (Test, testGroup) -import Test.Framework.Providers.HUnit (testCase) -import Test.HUnit (Assertion, assertEqual) -import Text.Regex (matchRegex, mkRegex) - -import Html (replace_entities) -import StringUtils (listify) -import Twitter.User +import Control.Applicative ( (<$>), (<*>) ) +import Control.Monad ( liftM ) +import Data.Aeson ( (.:), FromJSON(..), Value(Object) ) +import Data.Maybe ( mapMaybe, isJust ) +import Data.Monoid ( mempty ) +import Data.String.Utils ( join, splitWs ) +import Data.Text ( pack ) +import Data.Time ( formatTime ) +import Data.Time.Clock ( UTCTime ) +import Data.Time.Format ( parseTime ) +import Data.Time.LocalTime ( TimeZone, utcToZonedTime ) +import System.Locale ( defaultTimeLocale, rfc822DateFormat ) +import Test.Tasty ( TestTree, testGroup ) +import Test.Tasty.HUnit ( (@?=), testCase ) +import Html ( replace_entities ) +import StringUtils ( listify ) +import Twitter.User ( User(..), screen_name_to_timeline_url ) + + +-- | Representation of a Twitter user status. We only care about a few +-- of the fields, and those are all that we bother to include in the +-- representation. +-- data Status = Status { created_at :: Maybe UTCTime, status_id :: Integer, reply :: Bool, retweeted :: Bool, text :: String, - user :: User - } deriving (Show, Eq) + user :: User } + deriving (Eq, Show) type Timeline = [Status] + instance FromJSON Status where + -- | Use a bunch of applicative magic to parse a 'Status' out of the + -- JSON that we get from the Twitter API. + -- parseJSON (Object t) = Status <$> liftM parse_status_time (t .: created_at_field) <*> @@ -46,7 +59,9 @@ instance FromJSON Status where liftM replace_entities (t .: text_field) <*> (t .: user_field) where - -- The typechecker flips out without this. + -- | The typechecker flips out without this; it's just a copy if + -- 'isJust' specialized to the 'Int' type. + -- isJustInt :: Maybe Int -> Bool isJustInt = isJust @@ -60,14 +75,36 @@ instance FromJSON Status where -- Do whatever. parseJSON _ = mempty + +-- | Parse a timestamp from a status into a UTCTime (or Nothing). +-- +-- Examples: +-- +-- >>> let s = "Sun Oct 24 18:21:41 +0000 2010" +-- >>> parse_status_time s +-- Just 2010-10-24 18:21:41 UTC +-- +-- >>> parse_status_time "what's up dawg" +-- Nothing +-- parse_status_time :: String -> Maybe UTCTime parse_status_time = parseTime defaultTimeLocale status_format where - -- | Should match e.g. "Sun Oct 24 18:21:41 +0000 2010" status_format :: String status_format = "%a %b %d %H:%M:%S %z %Y" + +-- | Given a 'TimeZone', convert a 'UTCTime' into an RFC822-format +-- time string. If no 'TimeZone' is given, assume UTC. +-- +-- Examples: +-- +-- >>> let s = "Sun Oct 24 18:21:41 +0000 2010" +-- >>> let Just t = parse_status_time s +-- >>> utc_time_to_rfc822 Nothing t +-- "Sun, 24 Oct 2010 18:21:41 UTC" +-- utc_time_to_rfc822 :: Maybe TimeZone -> UTCTime -> String utc_time_to_rfc822 mtz utc = case mtz of @@ -77,12 +114,38 @@ utc_time_to_rfc822 mtz utc = foo = formatTime defaultTimeLocale rfc822DateFormat +-- | Get the 'created_at' time out of a 'Status' and display it as an +-- RFC822-format time string. If there's no created-at time in the +-- status, you'll get an empty string instead. +-- +-- >>> let u = User "washington_irving" +-- >>> let created = parse_status_time "Sun Oct 24 18:21:41 +0000 2010" +-- >>> let s = Status created 8675309 False False "IM TWITTERING" u +-- >>> show_created_at Nothing s +-- "Sun, 24 Oct 2010 18:21:41 UTC" +-- >>> show_created_at Nothing s{ created_at = Nothing } +-- "" +-- show_created_at :: Maybe TimeZone -> Status -> String show_created_at mtz = - (fromMaybe "") . (fmap $ utc_time_to_rfc822 mtz) . created_at + (maybe "" (utc_time_to_rfc822 mtz)) . created_at + -- | Returns a nicely-formatted String representing the given 'Status' -- object. +-- +-- Examples: +-- +-- >>> let u = User "washington_irving" +-- >>> let created = parse_status_time "Sun Oct 24 18:21:41 +0000 2010" +-- >>> let s = Status created 8675309 False False "IM TWITTERING" u +-- >>> putStr $ pretty_print Nothing s +-- washington_irving - Sun, 24 Oct 2010 18:21:41 UTC +-- ------------------------------------------------- +-- IM TWITTERING +-- +-- +-- pretty_print :: Maybe TimeZone -> Status -> String pretty_print mtz status = concat [ name, @@ -104,6 +167,16 @@ pretty_print mtz status = -- | Given a list of statuses, returns the greatest status_id -- belonging to one of the statuses in the list. +-- +-- Examples: +-- +-- >>> let u = User "washington_irving" +-- >>> let created = parse_status_time "Sun Oct 24 18:21:41 +0000 2010" +-- >>> let s = Status created 8675309 False False "IM TWITTERING" u +-- >>> let timeline = [s,s,s,s,s] +-- >>> get_max_status_id timeline +-- 8675309 +-- get_max_status_id :: Timeline -> Integer get_max_status_id statuses = maximum status_ids @@ -111,26 +184,81 @@ get_max_status_id statuses = status_ids = map status_id statuses --- | Parse one username from a word. +-- | Parse one username from a 'String'. +-- +-- Examples: +-- +-- >>> parse_username "@washington_irving" +-- Just "washington_irving" +-- >>> parse_username "washington_irving" +-- Nothing +-- >>> parse_username "Everbody loves @washington_irving, even Raymond" +-- Just "washington_irving" +-- +-- >>> parse_username "herp @@@ derp @washington_irving foo@@BAR" +-- Just "washington_irving" +-- +-- >>> parse_username "tailing at sign y'all @" +-- Nothing +-- parse_username :: String -> Maybe String -parse_username word = - case matches of - Nothing -> Nothing - Just [] -> Nothing - Just (first_match:_) -> Just first_match +parse_username s + | null parse_result = Nothing + | otherwise = Just parse_result where - username_regex = mkRegex "@([a-zA-Z0-9_]+)" - matches = matchRegex username_regex word + -- | A list of characters valid in a Twitter username. + -- + username_chars :: String + username_chars = ['a'..'z'] ++ ['A'..'Z'] ++ ['0'..'9'] ++ "_" + + -- | Take a string and drop everything (including the \'@\') up to + -- the first character of the first username (if one exists). + -- + start_name :: String -> String + start_name w = + case dropWhile (/= '@') w of + [] -> [] + (_:xs) -> xs + + parse_userchars :: String -> String + parse_userchars = takeWhile (`elem` username_chars) + + -- | Parse a username from the given String by dropping all + -- characters that don't belong to it. This function calls + -- itself recursively until it gets a username or runs out of + -- string. + -- + parse_name :: String -> String + parse_name [] = [] + parse_name rest@(_:xs) = + let ucs = (parse_userchars . start_name) rest in + case ucs of + [] -> parse_name xs + _ -> ucs + + parse_result :: String + parse_result = parse_name s -- | Parse all usernames of the form \@username from a status. +-- +-- Examples: +-- +-- >>> let u = User "washington_irving" +-- >>> let b = "YO WHERE'S @BONUS500 and @@@ I LOVE @AT SIGNS@" +-- >>> let s = Status Nothing 8675309 False False b u +-- >>> parse_usernames_from_status s +-- ["BONUS500","AT"] +-- parse_usernames_from_status :: Status -> [String] parse_usernames_from_status status = mapMaybe parse_username status_words where status_words = splitWs (text status) + -- | Get all referenced users' timeline URLs. +-- make_user_timeline_urls :: Status -> [String] make_user_timeline_urls status = map screen_name_to_timeline_url usernames @@ -138,20 +266,17 @@ make_user_timeline_urls status = usernames = parse_usernames_from_status status -status_tests :: Test +status_tests :: TestTree status_tests = - testGroup "Status Tests" [ tc1 ] - where - tc1 = testCase "All usernames are parsed." test_parse_usernames + testGroup "Status Tests" [ test_parse_usernames ] -test_parse_usernames :: Assertion +test_parse_usernames :: TestTree test_parse_usernames = - assertEqual - "All usernames are parsed." - expected_usernames - actual_usernames + testCase description $ actual @?= expected where + description = "all usernames are parsed" + dummy_user = User { screen_name = "nobody" } dummy_text = "Hypothesis: @donsbot and @bonus500 are two " ++ "personalities belonging to the same person." @@ -163,5 +288,5 @@ test_parse_usernames = retweeted = False } - actual_usernames = parse_usernames_from_status dummy_status - expected_usernames = ["donsbot", "bonus500"] + actual = parse_usernames_from_status dummy_status + expected = ["donsbot", "bonus500"]