src/websites/bliptv.rb

   1 #
   2 # Copyright Michael Orlitzky
   3 #
   4 # http://michael.orlitzky.com/
   5 #
   6 # This program is free software: you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation, either version 3 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14 # GNU General Public License for more details.
  15 #
  16 # http://www.fsf.org/licensing/licenses/gpl.html
  17 #
  18
  19 require 'src/website'
  20 require 'cgi'
  21
  22 class Bliptv < Website
  23
  24   VALID_BLIPTV_URL_REGEX = /^(http:\/\/)?([[:alnum:]\-]+\.)?blip\.tv\/file\/(\d+)(.*)?$/
  25   VALID_BLIPTV_REDIR_URL_REGEX = /^(http:\/\/)?([[:alnum:]\-]+\.)?blip\.tv\/play\/[[:alnum:]_]+$/
  26
  27   def self.owns_url?(url)
  28     return (url =~ VALID_BLIPTV_URL_REGEX ||
  29             url =~ VALID_BLIPTV_REDIR_URL_REGEX)
  30   end
  31
  32
  33   def get_video_url()
  34     page_data = ''
  35
  36     if (@url =~ VALID_BLIPTV_URL_REGEX)
  37       page_data = self.get_page_data(@url)
  38     else
  39       # It's a redirect. Figure out the RSS page URL from the redirect.
  40       redir_url = get_redirect_url
  41       rss_page_url = parse_rss_url(redir_url)
  42       rss_data = get_page_data(rss_page_url)
  43
  44       # The "real" page URL is embedded in the RSS feed. Once we get the
  45       # real URL, we can proceed as if we were given that URL in the first
  46       # place.
  47       real_page_url = parse_page_url(rss_data)
  48       page_data = self.get_page_data(real_page_url)
  49     end
  50
  51     filepath = parse_video_url(page_data)
  52
  53     return filepath
  54   end
  55
  56
  57   protected;
  58
  59   def parse_page_url(data)
  60     # A simplified VALID_BLIPTV_URL_REGEX.
  61     page_url_regex = /http:\/\/blip\.tv\/file\/\d+/
  62     matches = page_url_regex.match(data)
  63
  64     if matches.nil?
  65       raise StandardError.new("Couldn't parse the real page URL from the RSS page.")
  66     end
  67
  68     return matches[0]
  69   end
  70
  71   def parse_rss_url(url)
  72     rss_id_regex = /\/flash\/(\d+)/
  73     matches = rss_id_regex.match(url)
  74
  75     if matches.nil? or (matches.length < 2)
  76       raise StandardError.new("Couldn't parse the video ID from the redirect URL: #{url}")
  77     end
  78
  79     return "http://blip.tv/rss/flash/#{matches[1]}"
  80   end
  81
  82   def get_redirect_url
  83     uri = URI.parse(@url)
  84
  85     response = Net::HTTP.start(uri.host, uri.port) do |http|
  86       http.get(uri.request_uri, {})
  87     end
  88
  89     return CGI::unescape(response['location'])
  90   end
  91
  92
  93   def parse_video_url(page_data)
  94     # First, try to find the MOV video. The source videos are usually
  95     # encoded with MOV.
  96     video_url_regex = /"Quicktime \(\.mov\)", "attribute" : "(.*?\.mov)/i
  97     matches = video_url_regex.match(page_data)
  98
  99     if not matches.nil? and (matches.length > 1)
 100       return matches[1]
 101     end
 102
 103     # I've seen some free software videos encoded as OGG/Vorbis, too.
 104     video_url_regex = /"Ogg Theora\/Vorbis \(\.og[gv]\)", "attribute" : "(.*?\.og[gv])/i
 105     matches = video_url_regex.match(page_data)
 106
 107     if not matches.nil? and (matches.length > 1)
 108       return matches[1]
 109     end
 110
 111     # If that didn't work, try the WMV format, which is occasionally
 112     # used for the source as well.
 113     video_url_regex = /"Windows Media \(\.wmv\)", "attribute" : "(.*?\.wmv)/i
 114     matches = video_url_regex.match(page_data)
 115
 116     if not matches.nil? and (matches.length > 1)
 117       return matches[1]
 118     end
 119
 120
 121     # If neither of the source formats are present, just grab the
 122     # video URL from the Flash variable and be done with it.
 123     video_url_regex = /setPrimaryMediaUrl\("(.*?\.(flv|mov|wmv|mp4|og[gv]))/i
 124     matches = video_url_regex.match(page_data)
 125
 126     if matches.nil? or (matches.length < 2)
 127       raise StandardError.new("Couldn't parse any of the video format URLs.")
 128     end
 129
 130     return matches[1]
 131   end
 132
 133 end