X-Git-Url: http://gitweb.michael.orlitzky.com/?p=dead%2Fwhatever-dl.git;a=blobdiff_plain;f=src%2Fwebsites%2Fbliptv.rb;h=d9e4b53682dc4445347bae0aca871960963e5cf4;hp=9322356a9e92353a31782e3a89c27500946b3939;hb=b45a395047ef46761b58b12df1afd9e948a3f786;hpb=d56c687299c6db6d4571c78ddb54e5da0d8b96ca diff --git a/src/websites/bliptv.rb b/src/websites/bliptv.rb index 9322356..d9e4b53 100644 --- a/src/websites/bliptv.rb +++ b/src/websites/bliptv.rb @@ -17,34 +17,94 @@ # require 'src/website' +require 'cgi' class Bliptv < Website VALID_BLIPTV_URL_REGEX = /^(http:\/\/)?([[:alnum:]\-]+\.)?blip\.tv\/file\/(\d+)(.*)?$/ + VALID_BLIPTV_REDIR_URL_REGEX = /^(http:\/\/)?([[:alnum:]\-]+\.)?blip\.tv\/play\/[[:alnum:]_]+$/ def self.owns_url?(url) - return url =~ VALID_BLIPTV_URL_REGEX + return (url =~ VALID_BLIPTV_URL_REGEX || + url =~ VALID_BLIPTV_REDIR_URL_REGEX) end - + def get_video_url() - page_data = self.get_page_data(@url) + page_data = '' + + if (@url =~ VALID_BLIPTV_URL_REGEX) + page_data = self.get_page_data(@url) + else + # It's a redirect. Figure out the RSS page URL from the redirect. + redir_url = get_redirect_url + rss_page_url = parse_rss_url(redir_url) + rss_data = get_page_data(rss_page_url) + + # The "real" page URL is embedded in the RSS feed. Once we get the + # real URL, we can proceed as if we were given that URL in the first + # place. + real_page_url = parse_page_url(rss_data) + page_data = self.get_page_data(real_page_url) + end + filepath = parse_video_url(page_data) - + return filepath end - + protected; - + def parse_page_url(data) + # A simplified VALID_BLIPTV_URL_REGEX. + page_url_regex = /http:\/\/blip\.tv\/file\/\d+/ + matches = page_url_regex.match(data) + + if matches.nil? + raise StandardError.new("Couldn't parse the real page URL from the RSS page.") + end + + return matches[0] + end + + def parse_rss_url(url) + rss_id_regex = /\/flash\/(\d+)/ + matches = rss_id_regex.match(url) + + if matches.nil? or (matches.length < 2) + raise StandardError.new("Couldn't parse the video ID from the redirect URL: #{url}") + end + + return "http://blip.tv/rss/flash/#{matches[1]}" + end + + def get_redirect_url + uri = URI.parse(@url) + + response = Net::HTTP.start(uri.host, uri.port) do |http| + http.get(uri.request_uri, {}) + end + + return CGI::unescape(response['location']) + end + + def parse_video_url(page_data) # First, try to find the MOV video. The source videos are usually # encoded with MOV. video_url_regex = /"Quicktime \(\.mov\)", "attribute" : "(.*?\.mov)/i matches = video_url_regex.match(page_data) - if not matches.nil? + if not matches.nil? and (matches.length > 1) + return matches[1] + end + + # I've seen some free software videos encoded as OGG/Vorbis, too. + video_url_regex = /"Ogg Theora\/Vorbis \(\.og[gv]\)", "attribute" : "(.*?\.og[gv])/i + matches = video_url_regex.match(page_data) + + if not matches.nil? and (matches.length > 1) return matches[1] end @@ -53,17 +113,17 @@ class Bliptv < Website video_url_regex = /"Windows Media \(\.wmv\)", "attribute" : "(.*?\.wmv)/i matches = video_url_regex.match(page_data) - if not matches.nil? + if not matches.nil? and (matches.length > 1) return matches[1] end - + # If neither of the source formats are present, just grab the # video URL from the Flash variable and be done with it. - video_url_regex = /setPrimaryMediaUrl\("(.*?\.(flv|mov|wmv|mp4))/i + video_url_regex = /setPrimaryMediaUrl\("(.*?\.(flv|mov|wmv|mp4|og[gv]))/i matches = video_url_regex.match(page_data) - if matches.nil? + if matches.nil? or (matches.length < 2) raise StandardError.new("Couldn't parse any of the video format URLs.") end