From 57795a766ea4e59592f84426da3f34b3261ffb3b Mon Sep 17 00:00:00 2001 From: Michael Orlitzky Date: Tue, 10 Jan 2012 20:18:39 -0500 Subject: [PATCH] Rewrite most of Bliptv to work with their new scheme. --- src/websites/bliptv.rb | 154 +++++++++++++++++++---------------------- 1 file changed, 73 insertions(+), 81 deletions(-) diff --git a/src/websites/bliptv.rb b/src/websites/bliptv.rb index d9e4b53..3320587 100644 --- a/src/websites/bliptv.rb +++ b/src/websites/bliptv.rb @@ -19,115 +19,107 @@ require 'src/website' require 'cgi' -class Bliptv < Website - - VALID_BLIPTV_URL_REGEX = /^(http:\/\/)?([[:alnum:]\-]+\.)?blip\.tv\/file\/(\d+)(.*)?$/ - VALID_BLIPTV_REDIR_URL_REGEX = /^(http:\/\/)?([[:alnum:]\-]+\.)?blip\.tv\/play\/[[:alnum:]_]+$/ - - def self.owns_url?(url) - return (url =~ VALID_BLIPTV_URL_REGEX || - url =~ VALID_BLIPTV_REDIR_URL_REGEX) - end - - - def get_video_url() - page_data = '' - - if (@url =~ VALID_BLIPTV_URL_REGEX) - page_data = self.get_page_data(@url) - else - # It's a redirect. Figure out the RSS page URL from the redirect. - redir_url = get_redirect_url - rss_page_url = parse_rss_url(redir_url) - rss_data = get_page_data(rss_page_url) - - # The "real" page URL is embedded in the RSS feed. Once we get the - # real URL, we can proceed as if we were given that URL in the first - # place. - real_page_url = parse_page_url(rss_data) - page_data = self.get_page_data(real_page_url) +class BliptvMediaFormat + # This is just a convenience class for parsing two parameters out of + # an RSS feed: 'url' and 'blip:role'. + def initialize(line) + @url = nil + @role = nil + + url_regex = /url=\"([^\"]+)\"/ + role_regex = /blip:role=\"([^\"]+)\"/ + url_matches = url_regex.match(line) + role_matches = role_regex.match(line) + + if not url_matches.nil? and (url_matches.length) > 1 + @url = url_matches[1] end - filepath = parse_video_url(page_data) + if not role_matches.nil? and (role_matches.length > 1) + @role = role_matches[1] + end + end - return filepath + def role + return @role end + def url + return @url + end +end - protected; - def parse_page_url(data) - # A simplified VALID_BLIPTV_URL_REGEX. - page_url_regex = /http:\/\/blip\.tv\/file\/\d+/ - matches = page_url_regex.match(data) +class Bliptv < Website - if matches.nil? - raise StandardError.new("Couldn't parse the real page URL from the RSS page.") - end + VALID_BLIPTV_URL_REGEX = /^(http:\/\/)?blip\.tv\/.*?(\d+)$/ - return matches[0] + def self.owns_url?(url) + return url =~ VALID_BLIPTV_URL_REGEX end - def parse_rss_url(url) - rss_id_regex = /\/flash\/(\d+)/ - matches = rss_id_regex.match(url) - if matches.nil? or (matches.length < 2) - raise StandardError.new("Couldn't parse the video ID from the redirect URL: #{url}") - end + def get_video_url() + video_id = self.parse_video_id() + rss_page_url = "http://blip.tv/rss/flash/#{video_id}" + rss_data = get_page_data(rss_page_url) + video_url = parse_video_url(rss_data) - return "http://blip.tv/rss/flash/#{matches[1]}" + return video_url end - def get_redirect_url - uri = URI.parse(@url) - - response = Net::HTTP.start(uri.host, uri.port) do |http| - http.get(uri.request_uri, {}) - end - - return CGI::unescape(response['location']) - end + protected; - def parse_video_url(page_data) - # First, try to find the MOV video. The source videos are usually - # encoded with MOV. - video_url_regex = /"Quicktime \(\.mov\)", "attribute" : "(.*?\.mov)/i - matches = video_url_regex.match(page_data) + def parse_video_id() + video_id_regex = /(\d+)$/ + matches = video_id_regex.match(@url) - if not matches.nil? and (matches.length > 1) + if matches.nil? or (matches.length < 2) + raise StandardError.new("Couldn't parse the video id from the URL.") + else return matches[1] end + end - # I've seen some free software videos encoded as OGG/Vorbis, too. - video_url_regex = /"Ogg Theora\/Vorbis \(\.og[gv]\)", "attribute" : "(.*?\.og[gv])/i - matches = video_url_regex.match(page_data) - - if not matches.nil? and (matches.length > 1) - return matches[1] - end - # If that didn't work, try the WMV format, which is occasionally - # used for the source as well. - video_url_regex = /"Windows Media \(\.wmv\)", "attribute" : "(.*?\.wmv)/i - matches = video_url_regex.match(page_data) + def choose_best_format(formats) + # 'formats' is assumed to be an array of BliptvMediaFormat. We + # return the best one (in terms of video quality). + formats.each do |f| + if f.url.nil? or f.role.nil? + formats.delete(f) + next + end - if not matches.nil? and (matches.length > 1) - return matches[1] + return f if f.role == "Source" end + if formats.length == 0 + raise StandardError.new("No valid formats in the RSS feed.") + else + # Return whatever's left if we don't have a 'Source' video. + return formats[0] + end + end - # If neither of the source formats are present, just grab the - # video URL from the Flash variable and be done with it. - video_url_regex = /setPrimaryMediaUrl\("(.*?\.(flv|mov|wmv|mp4|og[gv]))/i - matches = video_url_regex.match(page_data) - - if matches.nil? or (matches.length < 2) - raise StandardError.new("Couldn't parse any of the video format URLs.") + def parse_video_url(page_data) + # All of the elements containing video URLs begin like this. + media_regex = /^\s*