where
import Data.Maybe
+import Data.String.Utils (join, splitWs)
+import Data.Time (ZonedTime, formatTime, readsTime)
+import System.Locale (defaultTimeLocale, rfc822DateFormat)
+import Test.HUnit
+import Text.Regex (matchRegex, mkRegex)
import Text.XML.HaXml
+import Text.XML.HaXml.Posn (noPos)
+import StringUtils (listify)
import Twitter.User
import Twitter.Xml
data Status = Status { status_id :: Integer,
created_at :: String,
text :: String,
- user :: User }
+ user :: User,
+ reply :: Bool,
+ retweet :: Bool }
deriving (Show, Eq)
-- |Given some XML content, create a 'Status' from it.
-status_from_content :: Content -> (Maybe Status)
+status_from_content :: Content i -> Maybe Status
status_from_content content =
-
if (length status_ids) == 0
|| (length created_ats) == 0
|| (length texts) == 0
|| (length users) == 0
+ || (length retweeteds) == 0
then
Nothing
- else
- case first_status_id of
- Nothing -> Nothing
- (Just status_id_data) ->
- case first_created_at of
- Nothing -> Nothing
- (Just created_at_data) ->
- case first_user of
- Nothing -> Nothing
- (Just user_object) ->
- case (reads status_id_data :: [(Integer, String)]) of
- [] -> Nothing
- parseresult:_ -> Just (Status (fst parseresult) created_at_data all_text user_object)
-
+ else do
+ first_status_id <- get_char_data (status_ids !! 0)
+ integer_status_id <- parse_status_id first_status_id
+ first_created_at <- get_char_data (created_ats !! 0)
+ first_user <- user_from_content (users !! 0)
+ first_retweeted <- get_char_data (retweeteds !! 0)
+
+ let is_reply = case (length reply_to_status_ids) of
+ 0 -> False
+ _ -> True
+
+ let is_retweet = case first_retweeted of
+ "true" -> True
+ _ -> False
+
+ return (Status
+ integer_status_id
+ first_created_at
+ all_text
+ first_user
+ is_reply
+ is_retweet)
where
status_ids = (unique_id content)
- first_status_id = get_char_data (status_ids !! 0)
-
created_ats = (status_created_at content)
- first_created_at = get_char_data (created_ats !! 0)
-
texts = (status_text content)
+ users = (status_user content)
+ retweeteds = (status_retweeted content)
+ reply_to_status_ids = (status_reply_to_status_id content)
+
all_text = concat $ catMaybes (map get_char_data texts)
- users = (status_user content)
- first_user = user_from_content (users !! 0)
+ parse_status_id :: String -> Maybe Integer
+ parse_status_id s =
+ case (reads s) of
+ [] -> Nothing
+ parseresult:_ -> Just (fst parseresult)
+
+-- |Takes an XML String as an argument, and returns the
+-- status that was parsed from it. Should only be used
+-- on XML string where a <status> is a top-level element.
+parse_status :: String -> [Status]
+parse_status xml_data =
+ catMaybes maybe_status
+ where
+ (Document _ _ root _) = xmlParse xml_file_name xml_data
+ root_elem = CElem root noPos
+ status_element = (single_status root_elem)
+ maybe_status = map status_from_content status_element
-- |Takes an XML String as an argument, and returns the list of
catMaybes maybe_statuses
where
(Document _ _ root _) = xmlParse xml_file_name xml_data
- root_elem = CElem root
+ root_elem = CElem root noPos
status_elements = (all_statuses root_elem)
maybe_statuses = map status_from_content status_elements
xml_file_name :: String
xml_file_name = ""
+
+created_at_to_rfc822 :: String -> Maybe String
+created_at_to_rfc822 s =
+ case reads_result of
+ [(t,_)] ->
+ Just $ formatTime defaultTimeLocale rfc822DateFormat t
+ _ -> Nothing
+ where
+ -- Should match e.g. "Sun Oct 24 18:21:41 +0000 2010"
+ fmt :: String
+ fmt = "%a %b %d %H:%M:%S %z %Y"
+
+ reads_result :: [(ZonedTime, String)]
+ reads_result = readsTime defaultTimeLocale fmt s
+
-- |Returns a nicely-formatted String representing the given 'Status'
-- object.
pretty_print :: Status -> String
replicate ((length name) + 3 + (length (created_at status))) '-',
"\n",
replace_entities (text status),
+ "\n\n",
+ join "\n" user_timeline_urls,
"\n" ]
where
name = screen_name (user status)
-
+ user_timeline_urls = listify (make_user_timeline_urls status)
-- |Given a list of statuses, returns the greatest status_id belonging
maximum status_ids
where
status_ids = map status_id statuses
+
+
+-- |Parse one username from a word.
+parse_username :: String -> Maybe String
+parse_username word =
+ case matches of
+ Nothing -> Nothing
+ Just [] -> Nothing
+ Just (first_match:_) -> Just first_match
+ where
+ username_regex = mkRegex "@([a-zA-Z0-9_]+)"
+ matches = matchRegex username_regex word
+
+
+-- |Parse all usernames of the form \@username from a status.
+parse_usernames_from_status :: Status -> [String]
+parse_usernames_from_status status =
+ catMaybes (map parse_username status_words)
+ where
+ status_words = splitWs (text status)
+
+-- |Get all referenced users' timeline URLs.
+make_user_timeline_urls :: Status -> [String]
+make_user_timeline_urls status =
+ map screen_name_to_timeline_url usernames
+ where
+ usernames = parse_usernames_from_status status
+
+
+status_tests :: [Test]
+status_tests = [ test_parse_usernames ]
+
+
+test_parse_usernames :: Test
+test_parse_usernames =
+ TestCase $
+ assertEqual
+ "All usernames are parsed."
+ expected_usernames
+ actual_usernames
+ where
+ dummy_user = User { screen_name = "nobody" }
+ dummy_status = Status { status_id = 1,
+ created_at = "never",
+ text = "Hypothesis: @donsbot and @bonus500 are two personalities belonging to the same person.",
+ user = dummy_user,
+ reply = False,
+ retweet = False
+ }
+
+ actual_usernames = parse_usernames_from_status dummy_status
+ expected_usernames = ["donsbot", "bonus500"]