type Header = String
--- |A crude model of an RFC821 email message.
+-- | A crude model of an RFC821 email message.
data Message = Message { headers :: [Header],
subject :: String,
body :: String,
to :: String }
deriving (Eq)
--- |The default headers attached to each message.
--- The MIME junk is needed for UTF-8 to work properly.
--- Note that your mail server should support the 8BITMIME extension.
+-- | The default headers attached to each message. The MIME junk is
+-- needed for UTF-8 to work properly. Note that your mail server
+-- should support the 8BITMIME extension.
default_headers :: [Header]
default_headers = ["MIME-Version: 1.0",
"Content-Type: text/plain; charset=UTF-8",
"Content-Transfer-Encoding: 8bit"]
--- |Showing a message will print it in roughly RFC-compliant
--- form. This form is sufficient for handing the message off to
--- sendmail (or compatible).
+-- | Showing a message will print it in roughly RFC-compliant
+-- form. This form is sufficient for handing the message off to
+-- sendmail (or compatible).
instance Show Message where
- show m =
- concat [ if (length (headers m) == 0) then "" else (intercalate "\n" (headers m)) ++ "\n",
- "Subject: " ++ (subject m) ++ "\n",
- "From: " ++ (from m) ++ "\n",
- "To: " ++ (to m) ++ "\n",
- "\n",
- (body m) ]
-
+ show m =
+ concat [ formatted_headers,
+ "Subject: " ++ (subject m) ++ "\n",
+ "From: " ++ (from m) ++ "\n",
+ "To: " ++ (to m) ++ "\n",
+ "\n",
+ (body m) ]
+ where
+ formatted_headers =
+ if (length (headers m) == 0)
+ then ""
+ else (intercalate "\n" (headers m)) ++ "\n"
-- |Pad a string on the left with zeros until the entire string has
-- length n.
pad_left :: String -> Int -> String
pad_left str n
- | n < (length str) = str
- | otherwise = (replicate num_zeros '0') ++ str
- where num_zeros = n - (length str)
+ | n < (length str) = str
+ | otherwise = (replicate num_zeros '0') ++ str
+ where num_zeros = n - (length str)
-- three-tuples.
print_sendmail_result :: (String, String, ExitCode) -> IO ()
print_sendmail_result (outs, errs, ec) = do
- case ec of
- ExitSuccess -> return ()
- _ -> putStrLn $ concat ["Output: " ++ outs,
- "\nErrors: " ++ errs,
- "\nExit Code: " ++ (show ec)]
+ case ec of
+ ExitSuccess -> return ()
+ _ -> putStrLn $ concat ["Output: " ++ outs,
+ "\nErrors: " ++ errs,
+ "\nExit Code: " ++ (show ec)]
import Control.Concurrent (forkIO, threadDelay)
import Control.Monad (forever, when)
+import Data.Aeson (decode)
import Data.List ((\\))
+import Data.Time.LocalTime (TimeZone, getCurrentTimeZone)
import System.Exit (ExitCode(..), exitWith)
import System.IO (hPutStrLn, stderr)
import Twitter.User
--- |A wrapper around threadDelay which takes seconds instead of
--- microseconds as its argument.
+-- | A wrapper around threadDelay which takes seconds instead of
+-- microseconds as its argument.
thread_sleep :: Int -> IO ()
thread_sleep seconds = do
let microseconds = seconds * (10 ^ (6 :: Int))
threadDelay microseconds
--- |Given a 'Message', 'Status', and default date, update that
--- message's body and subject with the information contained in the
--- status. Adds a /Date: / header, and returns the updated message.
-message_from_status :: Message -> String -> Status -> Message
-message_from_status message default_date status =
+-- | Given a 'Message', 'Status', and date, update that message's body
+-- and subject with the information contained in the status. Adds a
+-- /Date: / header, and returns the updated message.
+message_from_status :: Maybe TimeZone -> Message -> String -> Status -> Message
+message_from_status mtz message default_date status =
message { subject = "Twat: " ++ (screen_name (user status)),
- body = (pretty_print status),
+ body = (pretty_print mtz status),
headers = ((headers message) ++ ["Date: " ++ date])}
where
- -- Use the Status' created_at date if it can be coerced into
- -- RFC822 format.
- date = case (created_at_to_rfc822 $ created_at status) of
- Nothing -> default_date
- Just c -> c
+ date =
+ case created_at status of
+ Nothing -> default_date
+ Just c -> utc_time_to_rfc822 mtz c
-- | If the given Message is not Nothing, send a copy of it for every
-- Status in the list.
-send_messages :: Cfg -> Maybe Message -> [Status] -> IO ()
-send_messages cfg maybe_message statuses =
+send_messages :: Cfg -> Maybe TimeZone -> Maybe Message -> [Status] -> IO ()
+send_messages cfg mtz maybe_message statuses =
case maybe_message of
Nothing -> return ()
Just message -> do
default_date <- rfc822_now
- let mfs = message_from_status message (default_date)
+ let mfs = message_from_status mtz message default_date
let messages = map mfs statuses
sendmail_results <- mapM sendmail' messages
_ <- mapM print_sendmail_result sendmail_results
-- and verbose is enabled.
mention_retweets :: Cfg -> [Status] -> IO ()
mention_retweets cfg ss = do
- let retweets = filter retweet ss
+ let retweets = filter retweeted ss
when ((ignore_retweets cfg) && (verbose cfg)) $ do
let countstr = show $ length retweets
putStrLn $ "Ignoring " ++ countstr ++ " retweets."
good_statuses
where
replies = filter reply ss
- retweets = filter retweet ss
+ retweets = filter retweeted ss
good_statuses' = case (ignore_replies cfg) of
True -> ss \\ replies
recurse :: Cfg -> String -> Integer -> (Maybe Message) -> IO ()
recurse cfg username latest_status_id maybe_message = do
thread_sleep (heartbeat cfg)
- xmldata <- get_user_new_statuses username latest_status_id
+ timeline <- get_user_new_statuses username latest_status_id
- -- Parsing an empty result can blow up. Just pretend there are
- -- no new statuses in that case.
- let new_statuses = case xmldata of
- Just xml -> parse_statuses xml
- Nothing -> []
+ -- FIXME
+ let Just new_statuses = decode timeline :: Maybe Timeline
case (length new_statuses) of
0 ->
let good_statuses = filter_statuses cfg new_statuses
- _ <- mapM (putStrLn . pretty_print) good_statuses
+ tz <- getCurrentTimeZone
+ let mtz = Just tz
+ mapM_ (putStrLn . (pretty_print mtz)) good_statuses
- send_messages cfg maybe_message good_statuses
+ send_messages cfg mtz maybe_message good_statuses
let new_latest_status_id = get_max_status_id new_statuses
do_recurse new_latest_status_id
-- latest status id to be posted once we have done so.
get_latest_status_id :: Int -> String -> IO Integer
get_latest_status_id delay username = do
- xmldata <- get_user_timeline username
+ timeline <- get_user_timeline username
+ let Just initial_timeline = decode timeline :: Maybe Timeline
- let initial_statuses = case xmldata of
- Just xml -> parse_statuses xml
- Nothing -> []
-
- case (length initial_statuses) of
+ case (length initial_timeline) of
0 -> do
-- If the HTTP part barfs, try again after a while.
putStrLn ("Couldn't retrieve " ++ username ++ "'s timeline. Retrying...")
thread_sleep delay
get_latest_status_id delay username
- _ -> return (get_max_status_id initial_statuses)
+ _ -> return (get_max_status_id initial_timeline)
thread_sleep (heartbeat cfg)
return ()
-
-
--- | A debugging tool that will parse, print, and email a single
--- status (given by its id).
-twat_single_status :: Cfg -> Integer -> (Maybe Message) -> IO ()
-twat_single_status cfg the_status_id maybe_message = do
- xmldata <- get_status the_status_id
-
- -- Parsing an empty result can blow up. Just pretend there are
- -- no new statuses in that case.
- let statuses = case xmldata of
- Just xml -> parse_status xml
- Nothing -> []
-
- case (length statuses) of
- 0 -> do
- putStrLn "No statuses returned."
- return ()
- _ -> do
- _ <- mapM (putStrLn . pretty_print) statuses
-
- case maybe_message of
- Nothing -> do
- putStrLn "No message object given."
- return ()
- Just message -> do
- default_date <- rfc822_now
- let messages = map (message_from_status message (default_date)) statuses
- sendmail_results <- mapM sendmail' messages
- _ <- mapM print_sendmail_result sendmail_results
- return ()
- where
- sendmail' = sendmail (sendmail_path cfg)
\ No newline at end of file
import Test.HUnit
--- |Takes a list of strings, call them string1, string2, etc. and
--- numbers them like a list. So,
--- 1. string1
--- 2. string2
--- 3. etc.
+-- | Takes a list of strings, call them string1, string2, etc. and
+-- numbers them like a list. So,
+--
+-- 1. string1
+-- 2. string2
+-- 3. etc.
+--
listify :: [String] -> [String]
listify items =
zipWith (++) list_numbers items
module Twitter.Http
where
-import Network.Curl
-import System.IO (hPutStrLn, stderr)
+import qualified Data.ByteString.Lazy as B
+import qualified Data.ByteString.Char8 as BC
+import qualified Data.Conduit as C
+import Data.Conduit.Binary (sinkLbs)
+import Network.HTTP.Conduit
+import Web.Authenticate.OAuth (
+ OAuth(..),
+ Credential,
+ newCredential,
+ newOAuth,
+ signOAuth)
-- |The API URL of username's timeline.
--
-- See,
--
--- <http://dev.twitter.com/doc/get/statuses/user_timeline>
+-- <https://dev.twitter.com/docs/api/1.1/get/statuses/user_timeline>
--
user_timeline_url :: String -> String
user_timeline_url username =
- concat [ "http://api.twitter.com/1/statuses/user_timeline.xml",
- "?screen_name=" ++ username,
- "&include_rts=true",
- "&count=10" ]
+ concat [ "https://api.twitter.com/",
+ "1.1/",
+ "statuses/",
+ "user_timeline.json?",
+ "screen_name=",
+ username,
+ "&include_rts=true&",
+ "count=10" ]
status_url :: Integer -> String
status_url status_id =
- concat [ "http://api.twitter.com/1/statuses/show/",
- (show status_id),
- ".xml" ]
-
--- |Given username's last status id, constructs the API URL for
--- username's new statuses. Essentially, 'user_timeline_url' with a
--- "since_id" parameter tacked on.
+ concat [ "https://api.twitter.com/",
+ "1.1/",
+ "statuses/",
+ "show.json?id=",
+ (show status_id) ]
+
+-- | Given username's last status id, constructs the API URL for
+-- username's new statuses. Essentially, 'user_timeline_url' with a
+-- "since_id" parameter tacked on.
user_new_statuses_url :: String -> Integer -> String
user_new_statuses_url username last_status_id =
- concat [ user_timeline_url username,
- "&since_id=" ++ (show last_status_id) ]
+ concat [ user_timeline_url username,
+ "&since_id=" ++ (show last_status_id) ]
-get_status :: Integer -> IO (Maybe String)
+get_status :: Integer -> IO B.ByteString
get_status status_id = do
- let uri = (status_url status_id)
- status <- (http_get uri)
- return status
+ let uri = (status_url status_id)
+ status <- (http_get uri)
+ return status
--- |Return's username's timeline, or 'Nothing' if there was an error.
-get_user_timeline :: String -> IO (Maybe String)
+-- | Return's username's timeline.
+get_user_timeline :: String -> IO B.ByteString
get_user_timeline username = do
let uri = (user_timeline_url username)
timeline <- (http_get uri)
return timeline
--- | Returns the XML representing all of username's statuses that are
+-- | Returns the JSON representing all of username's statuses that are
-- newer than last_status_id.
-get_user_new_statuses :: String -> Integer -> IO (Maybe String)
+get_user_new_statuses :: String -> Integer -> IO B.ByteString
get_user_new_statuses username last_status_id = do
let uri = (user_new_statuses_url username last_status_id)
new_statuses <- (http_get uri)
return new_statuses
--- | Options that will be passed to every curl request.
-curl_options :: [CurlOption]
-curl_options =
- [ CurlTimeout 45,
- -- The Global cache is not thread-friendly.
- CurlDNSUseGlobalCache False,
- -- And we don't want to use a DNS cache anyway.
- CurlDNSCacheTimeout 0 ]
-
-
--- | Uses the CURL API to retrieve uri. Returns 'Nothing' if there was
--- an error.
-http_get :: String -> IO (Maybe String)
-http_get uri =
- withCurlDo $ do
- -- Create a Curl instance.
- curl <- initialize
-
- -- Perform the request, and get back a CurlResponse object.
- -- The cast is needed to specify how we would like our headers
- -- and body returned (Strings).
- resp <- do_curl_ curl uri curl_options :: IO CurlResponse
-
- -- Pull out the response code as a CurlCode.
- let code = respCurlCode resp
-
- case code of
- CurlOK -> return $ Just (respBody resp)
- error_code -> do
- hPutStrLn stderr ("HTTP Error: " ++ (show error_code))
- -- If an error occurred, we want to dump as much information as
- -- possible. If this becomes a problem, we can use respGetInfo to
- -- query the response object for more information
- return Nothing
+-- | Retrieve a URL, or crash.
+http_get :: String -> IO B.ByteString
+http_get url = do
+ manager <- newManager def
+ request <- parseUrl url
+
+ C.runResourceT $ do
+ signed_request <- signOAuth oauth credential request
+ response <- http signed_request manager
+ responseBody response C.$$+- sinkLbs
+
+ where
+ consumer_key = BC.pack ""
+ consumer_secret = BC.pack ""
+ access_token = BC.pack ""
+ access_secret = BC.pack ""
+
+ oauth :: OAuth
+ oauth = newOAuth {
+ oauthConsumerKey = consumer_key,
+ oauthConsumerSecret = consumer_secret
+ }
+
+ credential :: Credential
+ credential = newCredential access_token access_secret
--- |Functions and data for working with Twitter statuses.
+-- | Functions and data for working with Twitter statuses.
module Twitter.Status
where
-import Data.Maybe
+import Control.Applicative ((<$>), (<*>))
+import Control.Monad (liftM)
+import Data.Aeson ((.:), FromJSON(..), Value(Object))
+import Data.Maybe (catMaybes, isJust)
+import Data.Monoid (mempty)
import Data.String.Utils (join, splitWs)
-import Data.Time (ZonedTime, formatTime, readsTime)
+import Data.Text (pack)
+import Data.Time (formatTime)
+import Data.Time.Clock (UTCTime)
+import Data.Time.Format (parseTime)
+import Data.Time.LocalTime (TimeZone, utcToZonedTime)
import System.Locale (defaultTimeLocale, rfc822DateFormat)
import Test.HUnit
import Text.Regex (matchRegex, mkRegex)
-import Text.XML.HaXml
-import Text.XML.HaXml.Posn (noPos)
import StringUtils (listify)
import Twitter.User
-import Twitter.Xml
-
--- |Represents one Twitter status. We don't care about any of their
--- other properties.
-data Status = Status { status_id :: Integer,
- created_at :: String,
- text :: String,
- user :: User,
- reply :: Bool,
- retweet :: Bool }
- deriving (Show, Eq)
-
-
--- |Given some XML content, create a 'Status' from it.
-status_from_content :: Content i -> Maybe Status
-status_from_content content =
- if (length status_ids) == 0
- || (length created_ats) == 0
- || (length texts) == 0
- || (length users) == 0
- || (length retweeteds) == 0
- then
- Nothing
- else do
- first_status_id <- get_char_data (status_ids !! 0)
- integer_status_id <- parse_status_id first_status_id
- first_created_at <- get_char_data (created_ats !! 0)
- first_user <- user_from_content (users !! 0)
- first_retweeted <- get_char_data (retweeteds !! 0)
-
- let is_reply = case (length reply_to_status_ids) of
- 0 -> False
- _ -> True
-
- let is_retweet = case first_retweeted of
- "true" -> True
- _ -> False
-
- return (Status
- integer_status_id
- first_created_at
- all_text
- first_user
- is_reply
- is_retweet)
- where
- status_ids = (unique_id content)
- created_ats = (status_created_at content)
- texts = (status_text content)
- users = (status_user content)
- retweeteds = (status_retweeted content)
- reply_to_status_ids = (status_reply_to_status_id content)
-
- all_text = concat $ catMaybes (map get_char_data texts)
-
- parse_status_id :: String -> Maybe Integer
- parse_status_id s =
- case (reads s) of
- [] -> Nothing
- parseresult:_ -> Just (fst parseresult)
-
--- |Takes an XML String as an argument, and returns the
--- status that was parsed from it. Should only be used
--- on XML string where a <status> is a top-level element.
-parse_status :: String -> [Status]
-parse_status xml_data =
- catMaybes maybe_status
- where
- (Document _ _ root _) = xmlParse xml_file_name xml_data
- root_elem = CElem root noPos
- status_element = (single_status root_elem)
- maybe_status = map status_from_content status_element
-
--- |Takes an XML String as an argument, and returns the list of
--- statuses that can be parsed from it.
-parse_statuses :: String -> [Status]
-parse_statuses xml_data =
- catMaybes maybe_statuses
+data Status = Status {
+ created_at :: Maybe UTCTime,
+ status_id :: Integer,
+ reply :: Bool,
+ retweeted :: Bool,
+ text :: String,
+ user :: User
+ } deriving (Show, Eq)
+
+type Timeline = [Status]
+
+instance FromJSON Status where
+ parseJSON (Object t) =
+ Status <$>
+ liftM parse_status_time (t .: created_at_field) <*>
+ (t .: id_field) <*>
+ liftM isJustInt (t .: in_reply_to_status_id_field) <*>
+ (t .: retweeted_field) <*>
+ (t .: text_field) <*>
+ (t .: user_field)
where
- (Document _ _ root _) = xmlParse xml_file_name xml_data
- root_elem = CElem root noPos
- status_elements = (all_statuses root_elem)
- maybe_statuses = map status_from_content status_elements
-
-
--- |This is a required parameter to the xmlParse function used in
--- error reporting. We're not parsing a function, though, so we leave
--- it blank.
-xml_file_name :: String
-xml_file_name = ""
+ -- The typechecker flips out without this.
+ isJustInt :: Maybe Int -> Bool
+ isJustInt = isJust
+
+ created_at_field = pack "created_at"
+ id_field = pack "id"
+ in_reply_to_status_id_field = pack "in_reply_to_status_id"
+ retweeted_field = pack "retweeted"
+ text_field = pack "text"
+ user_field = pack "user"
+
+ -- Do whatever.
+ parseJSON _ = mempty
+
+parse_status_time :: String -> Maybe UTCTime
+parse_status_time =
+ parseTime defaultTimeLocale status_format
+ where
+ -- | Should match e.g. "Sun Oct 24 18:21:41 +0000 2010"
+ status_format :: String
+ status_format = "%a %b %d %H:%M:%S %z %Y"
+
+utc_time_to_rfc822 :: Maybe TimeZone -> UTCTime -> String
+utc_time_to_rfc822 mtz utc =
+ case mtz of
+ Nothing -> foo utc
+ Just tz -> foo $ utcToZonedTime tz utc
+ where
+ foo = formatTime defaultTimeLocale rfc822DateFormat
+
+
+show_created_at :: Maybe TimeZone -> Status -> String
+show_created_at mtz =
+ (maybe "" id) . (fmap $ utc_time_to_rfc822 mtz) . created_at
+
+-- | Returns a nicely-formatted String representing the given 'Status'
+-- object.
+pretty_print :: Maybe TimeZone -> Status -> String
+pretty_print mtz status =
+ concat [ name,
+ " - ",
+ sca,
+ "\n",
+ replicate bar_length '-',
+ "\n",
+ text status,
+ "\n\n",
+ join "\n" user_timeline_urls,
+ "\n" ]
+ where
+ sca = show_created_at mtz status
+ name = screen_name (user status)
+ user_timeline_urls = listify (make_user_timeline_urls status)
+ bar_length = (length name) + 3 + (length sca)
-created_at_to_rfc822 :: String -> Maybe String
-created_at_to_rfc822 s =
- case reads_result of
- [(t,_)] ->
- Just $ formatTime defaultTimeLocale rfc822DateFormat t
- _ -> Nothing
- where
- -- Should match e.g. "Sun Oct 24 18:21:41 +0000 2010"
- fmt :: String
- fmt = "%a %b %d %H:%M:%S %z %Y"
-
- reads_result :: [(ZonedTime, String)]
- reads_result = readsTime defaultTimeLocale fmt s
-
--- |Returns a nicely-formatted String representing the given 'Status'
--- object.
-pretty_print :: Status -> String
-pretty_print status =
- concat [ name,
- " - ",
- (created_at status),
- "\n",
- replicate ((length name) + 3 + (length (created_at status))) '-',
- "\n",
- replace_entities (text status),
- "\n\n",
- join "\n" user_timeline_urls,
- "\n" ]
- where
- name = screen_name (user status)
- user_timeline_urls = listify (make_user_timeline_urls status)
-
-
--- |Given a list of statuses, returns the greatest status_id belonging
--- to one of the statuses in the list.
-get_max_status_id :: [Status] -> Integer
+-- | Given a list of statuses, returns the greatest status_id
+-- belonging to one of the statuses in the list.
+get_max_status_id :: Timeline -> Integer
get_max_status_id statuses =
maximum status_ids
where
status_ids = map status_id statuses
--- |Parse one username from a word.
+-- | Parse one username from a word.
parse_username :: String -> Maybe String
parse_username word =
- case matches of
- Nothing -> Nothing
- Just [] -> Nothing
- Just (first_match:_) -> Just first_match
- where
- username_regex = mkRegex "@([a-zA-Z0-9_]+)"
- matches = matchRegex username_regex word
+ case matches of
+ Nothing -> Nothing
+ Just [] -> Nothing
+ Just (first_match:_) -> Just first_match
+ where
+ username_regex = mkRegex "@([a-zA-Z0-9_]+)"
+ matches = matchRegex username_regex word
--- |Parse all usernames of the form \@username from a status.
+-- | Parse all usernames of the form \@username from a status.
parse_usernames_from_status :: Status -> [String]
parse_usernames_from_status status =
- catMaybes (map parse_username status_words)
- where
- status_words = splitWs (text status)
+ catMaybes (map parse_username status_words)
+ where
+ status_words = splitWs (text status)
--- |Get all referenced users' timeline URLs.
+-- | Get all referenced users' timeline URLs.
make_user_timeline_urls :: Status -> [String]
make_user_timeline_urls status =
- map screen_name_to_timeline_url usernames
- where
- usernames = parse_usernames_from_status status
+ map screen_name_to_timeline_url usernames
+ where
+ usernames = parse_usernames_from_status status
status_tests :: [Test]
where
dummy_user = User { screen_name = "nobody" }
dummy_status = Status { status_id = 1,
- created_at = "never",
+ created_at = Nothing,
text = "Hypothesis: @donsbot and @bonus500 are two personalities belonging to the same person.",
user = dummy_user,
reply = False,
- retweet = False
+ retweeted = False
}
actual_usernames = parse_usernames_from_status dummy_status
module Twitter.User
where
-import Text.XML.HaXml
-
-import Twitter.Xml
-
--- |Represents a Twitter user, and contains the only attribute thereof
--- that we care about: the screen (user) name.
-data User = User { screen_name :: String }
- deriving (Show, Eq)
-
-
--- |Create a 'User' from HaXML 'Content'.
-user_from_content :: Content i -> (Maybe User)
-user_from_content c =
- if (length names) == 0
- then
- Nothing
- else
- case (get_char_data (names !! 0)) of
- Nothing -> Nothing
- (Just content) -> Just (User (content))
-
+import Control.Applicative ((<$>))
+import Data.Aeson ((.:), FromJSON(..), Value(Object))
+import Data.Text (pack)
+import Data.Monoid (mempty)
+
+-- | Represents a Twitter user, and contains the only attribute
+-- thereof that we care about: the screen (user) name.
+data User = User { screen_name :: String } deriving (Eq, Show)
+
+instance FromJSON User where
+ parseJSON (Object u) =
+ User <$> (u .: screen_name_field)
where
- names = user_screen_name c
+ screen_name_field = pack "screen_name"
+ -- Do whatever.
+ parseJSON _ = mempty
-- |Get the URL for the given screen name's timeline.
screen_name_to_timeline_url :: String -> String
-screen_name_to_timeline_url sn =
- "http://twitter.com/" ++ sn
+screen_name_to_timeline_url =
+ ("http://twitter.com/" ++)
+++ /dev/null
--- |Application-specific XML functions.
-module Twitter.Xml
-where
-
-import Data.Char (chr)
-import Test.HUnit
-import Text.Regex (matchRegex, mkRegex, subRegex)
-import Text.XML.HaXml
-
--- |Returns the 'CharData' contained within the given 'Content', or
--- 'Nothing' if no acceptable CharData was found. It will parse either
--- a 'CString' ('String') or 'CRef' (XML entity reference).
-get_char_data :: Content i -> (Maybe CharData)
-get_char_data (CString _ cd _) = Just cd
-get_char_data (CRef ref _) = Just (verbatim ref) -- Entities.
-get_char_data _ = Nothing
-
-
--- |A 'CFilter' returning all top-level <status> elements.
--- The name is due to the fact that if we retrieve more than
--- one status, they will be wrapped in a <statuses> tag, and
--- thus not be top-level.
-single_status :: CFilter i
-single_status = (tag "status")
-
--- |A 'CFilter' returning all <status> tags within <statuses>.
-all_statuses :: CFilter i
-all_statuses = (tag "statuses" /> tag "status")
-
--- |Finds the text of the <id> element contained within some other
--- content. Called unique_id here because status_id is used elsewhere.
-unique_id :: CFilter i
-unique_id = keep /> (tag "id") /> txt
-
--- |Finds the text of the <created_at> element contained within some
--- other element.
-status_created_at :: CFilter i
-status_created_at = keep /> (tag "created_at") /> txt
-
--- |Finds the text of the <text> element contained within some
--- other element.
-status_text :: CFilter i
-status_text = keep /> (tag "text") /> txt
-
--- |Finds the XML of the <user> element contained within some other
--- element.
-status_user :: CFilter i
-status_user = keep /> (tag "user")
-
--- | Finds the text of the <retweeted> element contained within some
--- other element.
-status_retweeted :: CFilter i
-status_retweeted = keep /> (tag "retweeted") /> txt
-
--- | Finds the text of the <in_reply_to_status_id> element contained
--- within some other element.
-status_reply_to_status_id :: CFilter i
-status_reply_to_status_id = keep /> (tag "in_reply_to_status_id") /> txt
-
--- |Finds the text of the <screen_name> element contained within some
--- other element.
-user_screen_name :: CFilter i
-user_screen_name = keep /> (tag "screen_name") /> txt
-
--- |A wrapper around the 'read' function which returns either Nothing
--- or (Just <the thing that could be read>).
-maybe_read :: (Read a) => String -> Maybe a
-maybe_read str =
- case (reads str) of
- [] -> Nothing
- ((y,_):_) -> Just y
-
--- |Takes a unicode codepoint in decimal and returns it as a
--- one-character string.
-entity_from_codepoint :: String -> String
-entity_from_codepoint codepoint =
- case (maybe_read codepoint) of
- Nothing -> ""
- Just num -> [(chr num)]
-
-
--- | A list of tuples whose first entry is a regular expression
--- matching XML entities, and whose second entry is the ASCII
--- character represented by that entity.
---
--- For some reason, ampersands are escaped twice in the status
--- text. Rather than unescape everything twice, we just stick "amp"
--- in the list again.
-xml_entities :: [(String, String)]
-xml_entities = [("[lr]dquo", "\""),
- ("quot", "\""),
- ("[mn]dash", "-"),
- ("nbsp", " "),
- ("amp", "&"),
- ("amp", "&"),
- ("lt", "<"),
- ("gt", ">"),
- ("hellip", "…")]
-
--- |Replace all of the XML entities in target.
-replace_entities :: String -> String
-replace_entities target =
- unescape_numeric (unescape_recursive xml_entities target)
-
--- |Recursively unescape all numeric entities in the given String.
-unescape_numeric :: String -> String
-unescape_numeric target =
- case match of
- Nothing -> target
- Just subexprs ->
- case subexprs of
- [] -> target
- s1:_ ->
- let this_entity_regex = mkRegex ("&#" ++ s1 ++ ";") in
- let replacement = entity_from_codepoint s1 in
- let new_target = subRegex this_entity_regex target replacement in
- unescape_numeric new_target
- where
- from = "&#([0-9]+);"
- match = matchRegex (mkRegex from) target
-
-
-
--- |The recursive function which does the real work for
--- 'replace_entities'.
-unescape_recursive :: [(String, String)] -> String -> String
-unescape_recursive [] target = target
-unescape_recursive replacements target =
- unescape_recursive (tail replacements) (subRegex (mkRegex from) target to)
- where
- replacement = (replacements !! 0)
- from = "&" ++ (fst replacement) ++ ";"
- to = (snd replacement)
-
-
-
-xml_tests :: [Test]
-xml_tests = [ test_replace_entities, test_double_unescape ]
-
-
-test_replace_entities :: Test
-test_replace_entities =
- TestCase $ assertEqual "All entities are replaced correctly." expected_text actual_text
- where
- actual_text = (replace_entities ""The moon is gay……" said <insert the current president of the United States of America>. “It’s OK—–he’s not a real doctor.”")
- expected_text = "\"The moon is gay……\" said <insert the current president of the United States of America>. \"It’s OK--he’s not a real doctor.\""
-
-
-test_double_unescape :: Test
-test_double_unescape =
- TestCase $ assertEqual "The status text is unescaped twice." expected_text actual_text
- where
- actual_text = (replace_entities "As a kid, I'd pull a girl's hair to let her know I liked her, but now that I'm older &amp; wiser I simply hit her with my car.")
- expected_text = "As a kid, I'd pull a girl's hair to let her know I liked her, but now that I'm older & wiser I simply hit her with my car."
executable twat
build-depends:
+ aeson == 0.6.*,
+ authenticate-oauth == 1.4.*,
base == 4.*,
- curl == 1.3.*,
- directory == 1.1.*,
- HaXml == 1.23.*,
+ bytestring == 0.10.*,
+ conduit == 1.*,
+ directory == 1.2.*,
+ HaXml == 1.24.*,
+ http-conduit == 1.9.*,
HUnit == 1.2.*,
MissingH == 1.*,
process == 1.*,
old-locale == 1.*,
regex-compat == 0.*,
+ text == 0.11.*,
time == 1.*