Rewrite most of Bliptv to work with their new scheme.

author Michael Orlitzky <michael@orlitzky.com>

Wed, 11 Jan 2012 01:18:39 +0000 (20:18 -0500)

committer Michael Orlitzky <michael@orlitzky.com>

Wed, 11 Jan 2012 01:18:39 +0000 (20:18 -0500)
author Michael Orlitzky <michael@orlitzky.com>
Wed, 11 Jan 2012 01:18:39 +0000 (20:18 -0500)
committer Michael Orlitzky <michael@orlitzky.com>
Wed, 11 Jan 2012 01:18:39 +0000 (20:18 -0500)
diff --git a/src/websites/bliptv.rb b/src/websites/bliptv.rb

index d9e4b53682dc4445347bae0aca871960963e5cf4..33205877c61152a2974412a27b0e2a5356f023c5 100644 (file)
--- a/src/websites/bliptv.rb
+++ b/src/websites/bliptv.rb
@@ -19,115 +19,107 @@
  require 'src/website'
  require 'cgi'
  
-class Bliptv < Website
-
-  VALID_BLIPTV_URL_REGEX = /^(http:\/\/)?([[:alnum:]\-]+\.)?blip\.tv\/file\/(\d+)(.*)?$/
-  VALID_BLIPTV_REDIR_URL_REGEX = /^(http:\/\/)?([[:alnum:]\-]+\.)?blip\.tv\/play\/[[:alnum:]_]+$/
-
-  def self.owns_url?(url)
-    return (url =~ VALID_BLIPTV_URL_REGEX ||
-            url =~ VALID_BLIPTV_REDIR_URL_REGEX)
-  end
-
-
-  def get_video_url()
-    page_data = ''
-
-    if (@url =~ VALID_BLIPTV_URL_REGEX)
-      page_data = self.get_page_data(@url)
-    else
-      # It's a redirect. Figure out the RSS page URL from the redirect.
-      redir_url = get_redirect_url
-      rss_page_url = parse_rss_url(redir_url)
-      rss_data = get_page_data(rss_page_url)
-
-      # The "real" page URL is embedded in the RSS feed. Once we get the
-      # real URL, we can proceed as if we were given that URL in the first
-      # place.
-      real_page_url = parse_page_url(rss_data)
-      page_data = self.get_page_data(real_page_url)
+class BliptvMediaFormat
+  # This is just a convenience class for parsing two parameters out of
+  # an RSS feed: 'url' and 'blip:role'.
+  def initialize(line)
+    @url = nil
+    @role = nil
+
+    url_regex  = /url=\"([^\"]+)\"/
+    role_regex = /blip:role=\"([^\"]+)\"/
+    url_matches  = url_regex.match(line)
+    role_matches = role_regex.match(line)
+
+    if not url_matches.nil? and (url_matches.length) > 1
+      @url = url_matches[1]
      end
  
-    filepath = parse_video_url(page_data)
+    if not role_matches.nil? and (role_matches.length > 1)
+      @role = role_matches[1]
+    end
+  end
  
-    return filepath
+  def role
+    return @role
    end
  
+  def url
+    return @url
+  end
+end
  
-  protected;
  
-  def parse_page_url(data)
-    # A simplified VALID_BLIPTV_URL_REGEX.
-    page_url_regex = /http:\/\/blip\.tv\/file\/\d+/
-    matches = page_url_regex.match(data)
+class Bliptv < Website
  
-    if matches.nil?
-      raise StandardError.new("Couldn't parse the real page URL from the RSS page.")
-    end
+  VALID_BLIPTV_URL_REGEX = /^(http:\/\/)?blip\.tv\/.*?(\d+)$/
  
-    return matches[0]
+  def self.owns_url?(url)
+    return url =~ VALID_BLIPTV_URL_REGEX
    end
  
-  def parse_rss_url(url)
-    rss_id_regex = /\/flash\/(\d+)/
-    matches = rss_id_regex.match(url)
  
-    if matches.nil? or (matches.length < 2)
-      raise StandardError.new("Couldn't parse the video ID from the redirect URL: #{url}")
-    end
+  def get_video_url()
+    video_id = self.parse_video_id()
+    rss_page_url = "http://blip.tv/rss/flash/#{video_id}"
+    rss_data = get_page_data(rss_page_url)
+    video_url = parse_video_url(rss_data)
  
-    return "http://blip.tv/rss/flash/#{matches[1]}"
+    return video_url
    end
  
-  def get_redirect_url
-    uri = URI.parse(@url)
-
-    response = Net::HTTP.start(uri.host, uri.port) do |http|
-      http.get(uri.request_uri, {})
-    end
-
-    return CGI::unescape(response['location'])
-  end
  
+  protected;
  
-  def parse_video_url(page_data)
-    # First, try to find the MOV video. The source videos are usually
-    # encoded with MOV.
-    video_url_regex = /"Quicktime \(\.mov\)", "attribute" : "(.*?\.mov)/i
-    matches = video_url_regex.match(page_data)
+  def parse_video_id()
+    video_id_regex = /(\d+)$/
+    matches = video_id_regex.match(@url)
  
-    if not matches.nil? and (matches.length > 1)
+    if matches.nil? or (matches.length < 2)
+      raise StandardError.new("Couldn't parse the video id from the URL.")
+    else
        return matches[1]
      end
+  end
  
-    # I've seen some free software videos encoded as OGG/Vorbis, too.
-    video_url_regex = /"Ogg Theora\/Vorbis \(\.og[gv]\)", "attribute" : "(.*?\.og[gv])/i
-    matches = video_url_regex.match(page_data)
-
-    if not matches.nil? and (matches.length > 1)
-      return matches[1]
-    end
  
-    # If that didn't work, try the WMV format, which is occasionally
-    # used for the source as well.
-    video_url_regex = /"Windows Media \(\.wmv\)", "attribute" : "(.*?\.wmv)/i
-    matches = video_url_regex.match(page_data)
+  def choose_best_format(formats)
+    # 'formats' is assumed to be an array of BliptvMediaFormat. We
+    # return the best one (in terms of video quality).
+    formats.each do |f|
+      if f.url.nil? or f.role.nil?
+        formats.delete(f)
+        next
+      end
  
-    if not matches.nil? and (matches.length > 1)
-      return matches[1]
+      return f if f.role == "Source"
      end
  
+    if formats.length == 0
+      raise StandardError.new("No valid formats in the RSS feed.")
+    else
+      # Return whatever's left if we don't have a 'Source' video.
+      return formats[0]
+    end
+  end
  
-    # If neither of the source formats are present, just grab the
-    # video URL from the Flash variable and be done with it.
-    video_url_regex = /setPrimaryMediaUrl\("(.*?\.(flv|mov|wmv|mp4|og[gv]))/i
-    matches = video_url_regex.match(page_data)
-
-    if matches.nil? or (matches.length < 2)
-      raise StandardError.new("Couldn't parse any of the video format URLs.")
+  def parse_video_url(page_data)
+    # All of the elements containing video URLs begin like this.
+    media_regex = /^\s*<media:content/
+
+    formats = []
+    # Create an array of BliptvMediaFormat from lines matching
+    # media_regex.
+    page_data.lines.each do |line|
+      if (line =~ media_regex)
+        bp = BliptvMediaFormat.new(line)
+        formats << bp
+      end
      end
  
-    return matches[1]
+    # And return the URL from the best one.
+    # choose_best_format will raise an error if need be.
+    return choose_best_format(formats).url
    end
  
  end
author	Michael Orlitzky <michael@orlitzky.com>
	Wed, 11 Jan 2012 01:18:39 +0000 (20:18 -0500)
committer	Michael Orlitzky <michael@orlitzky.com>
	Wed, 11 Jan 2012 01:18:39 +0000 (20:18 -0500)