From b440a5f61db2c0dbf5266ce6885d2240b6fb02e9 Mon Sep 17 00:00:00 2001 From: Michael Orlitzky Date: Sun, 17 Oct 2010 23:19:47 -0400 Subject: [PATCH] Add blip.tv support for URLs of the form http://blip.tv/play/. --- src/websites/bliptv.rb | 75 +++++++++++++++++++++++++++++++++++------- 1 file changed, 63 insertions(+), 12 deletions(-) diff --git a/src/websites/bliptv.rb b/src/websites/bliptv.rb index b3bd8bb..116d4f7 100644 --- a/src/websites/bliptv.rb +++ b/src/websites/bliptv.rb @@ -21,38 +21,89 @@ require 'src/website' class Bliptv < Website VALID_BLIPTV_URL_REGEX = /^(http:\/\/)?([[:alnum:]\-]+\.)?blip\.tv\/file\/(\d+)(.*)?$/ + VALID_BLIPTV_REDIR_URL_REGEX = /^(http:\/\/)?([[:alnum:]\-]+\.)?blip\.tv\/play\/[[:alnum:]]+$/ def self.owns_url?(url) - return url =~ VALID_BLIPTV_URL_REGEX + return (url =~ VALID_BLIPTV_URL_REGEX || + url =~ VALID_BLIPTV_REDIR_URL_REGEX) end - + def get_video_url() - page_data = self.get_page_data(@url) + page_data = '' + + if (@url =~ VALID_BLIPTV_URL_REGEX) + page_data = self.get_page_data(@url) + else + # It's a redirect. Figure out the RSS page URL from the redirect. + redir_url = get_redirect_url + rss_page_url = parse_rss_url(redir_url) + rss_data = get_page_data(rss_page_url) + + # The "real" page URL is embedded in the RSS feed. Once we get the + # real URL, we can proceed as if we were given that URL in the first + # place. + real_page_url = parse_page_url(rss_data) + page_data = self.get_page_data(real_page_url) + end + filepath = parse_video_url(page_data) - + return filepath end - + protected; - + def parse_page_url(data) + # A simplified VALID_BLIPTV_URL_REGEX. + page_url_regex = /http:\/\/blip\.tv\/file\/\d+/ + matches = page_url_regex.match(data) + + if matches.nil? + raise StandardError.new("Couldn't parse the real page URL from the RSS page.") + end + + return matches[0] + end + + def parse_rss_url(url) + rss_id_regex = /\/flash\/(\d+)/ + matches = rss_id_regex.match(url) + + if matches.nil? or (matches.length < 2) + raise StandardError.new("Couldn't parse the video ID from the redirect URL: #{url}") + end + + return "http://blip.tv/rss/flash/#{matches[1]}" + end + + def get_redirect_url + uri = URI.parse(@url) + + response = Net::HTTP.start(uri.host, uri.port) do |http| + http.get(uri.request_uri, {}) + end + + return response['location'] + end + + def parse_video_url(page_data) # First, try to find the MOV video. The source videos are usually # encoded with MOV. video_url_regex = /"Quicktime \(\.mov\)", "attribute" : "(.*?\.mov)/i matches = video_url_regex.match(page_data) - if not matches.nil? + if not matches.nil? and (matches.length > 1) return matches[1] end - # I've seen some free software videos encoded as OGG/Vorbis, too. + # I've seen some free software videos encoded as OGG/Vorbis, too. video_url_regex = /"Ogg Theora\/Vorbis \(\.og[gv]\)", "attribute" : "(.*?\.og[gv])/i matches = video_url_regex.match(page_data) - if not matches.nil? + if not matches.nil? and (matches.length > 1) return matches[1] end @@ -61,17 +112,17 @@ class Bliptv < Website video_url_regex = /"Windows Media \(\.wmv\)", "attribute" : "(.*?\.wmv)/i matches = video_url_regex.match(page_data) - if not matches.nil? + if not matches.nil? and (matches.length > 1) return matches[1] end - + # If neither of the source formats are present, just grab the # video URL from the Flash variable and be done with it. video_url_regex = /setPrimaryMediaUrl\("(.*?\.(flv|mov|wmv|mp4|og[gv]))/i matches = video_url_regex.match(page_data) - if matches.nil? + if matches.nil? or (matches.length < 2) raise StandardError.new("Couldn't parse any of the video format URLs.") end -- 2.44.2