require 'src/website'
require 'cgi'
-class Bliptv < Website
-
- VALID_BLIPTV_URL_REGEX = /^(http:\/\/)?([[:alnum:]\-]+\.)?blip\.tv\/file\/(\d+)(.*)?$/
- VALID_BLIPTV_REDIR_URL_REGEX = /^(http:\/\/)?([[:alnum:]\-]+\.)?blip\.tv\/play\/[[:alnum:]_]+$/
-
- def self.owns_url?(url)
- return (url =~ VALID_BLIPTV_URL_REGEX ||
- url =~ VALID_BLIPTV_REDIR_URL_REGEX)
- end
-
-
- def get_video_url()
- page_data = ''
-
- if (@url =~ VALID_BLIPTV_URL_REGEX)
- page_data = self.get_page_data(@url)
- else
- # It's a redirect. Figure out the RSS page URL from the redirect.
- redir_url = get_redirect_url
- rss_page_url = parse_rss_url(redir_url)
- rss_data = get_page_data(rss_page_url)
-
- # The "real" page URL is embedded in the RSS feed. Once we get the
- # real URL, we can proceed as if we were given that URL in the first
- # place.
- real_page_url = parse_page_url(rss_data)
- page_data = self.get_page_data(real_page_url)
+class BliptvMediaFormat
+ # This is just a convenience class for parsing two parameters out of
+ # an RSS feed: 'url' and 'blip:role'.
+ def initialize(line)
+ @url = nil
+ @role = nil
+
+ url_regex = /url=\"([^\"]+)\"/
+ role_regex = /blip:role=\"([^\"]+)\"/
+ url_matches = url_regex.match(line)
+ role_matches = role_regex.match(line)
+
+ if not url_matches.nil? and (url_matches.length) > 1
+ @url = url_matches[1]
end
- filepath = parse_video_url(page_data)
+ if not role_matches.nil? and (role_matches.length > 1)
+ @role = role_matches[1]
+ end
+ end
- return filepath
+ def role
+ return @role
end
+ def url
+ return @url
+ end
+end
- protected;
- def parse_page_url(data)
- # A simplified VALID_BLIPTV_URL_REGEX.
- page_url_regex = /http:\/\/blip\.tv\/file\/\d+/
- matches = page_url_regex.match(data)
+class Bliptv < Website
- if matches.nil?
- raise StandardError.new("Couldn't parse the real page URL from the RSS page.")
- end
+ VALID_BLIPTV_URL_REGEX = /^(http:\/\/)?blip\.tv\/.*?(\d+)$/
- return matches[0]
+ def self.owns_url?(url)
+ return url =~ VALID_BLIPTV_URL_REGEX
end
- def parse_rss_url(url)
- rss_id_regex = /\/flash\/(\d+)/
- matches = rss_id_regex.match(url)
- if matches.nil? or (matches.length < 2)
- raise StandardError.new("Couldn't parse the video ID from the redirect URL: #{url}")
- end
+ def get_video_url()
+ video_id = self.parse_video_id()
+ rss_page_url = "http://blip.tv/rss/flash/#{video_id}"
+ rss_data = get_page_data(rss_page_url)
+ video_url = parse_video_url(rss_data)
- return "http://blip.tv/rss/flash/#{matches[1]}"
+ return video_url
end
- def get_redirect_url
- uri = URI.parse(@url)
-
- response = Net::HTTP.start(uri.host, uri.port) do |http|
- http.get(uri.request_uri, {})
- end
-
- return CGI::unescape(response['location'])
- end
+ protected;
- def parse_video_url(page_data)
- # First, try to find the MOV video. The source videos are usually
- # encoded with MOV.
- video_url_regex = /"Quicktime \(\.mov\)", "attribute" : "(.*?\.mov)/i
- matches = video_url_regex.match(page_data)
+ def parse_video_id()
+ video_id_regex = /(\d+)$/
+ matches = video_id_regex.match(@url)
- if not matches.nil? and (matches.length > 1)
+ if matches.nil? or (matches.length < 2)
+ raise StandardError.new("Couldn't parse the video id from the URL.")
+ else
return matches[1]
end
+ end
- # I've seen some free software videos encoded as OGG/Vorbis, too.
- video_url_regex = /"Ogg Theora\/Vorbis \(\.og[gv]\)", "attribute" : "(.*?\.og[gv])/i
- matches = video_url_regex.match(page_data)
-
- if not matches.nil? and (matches.length > 1)
- return matches[1]
- end
- # If that didn't work, try the WMV format, which is occasionally
- # used for the source as well.
- video_url_regex = /"Windows Media \(\.wmv\)", "attribute" : "(.*?\.wmv)/i
- matches = video_url_regex.match(page_data)
+ def choose_best_format(formats)
+ # 'formats' is assumed to be an array of BliptvMediaFormat. We
+ # return the best one (in terms of video quality).
+ formats.each do |f|
+ if f.url.nil? or f.role.nil?
+ formats.delete(f)
+ next
+ end
- if not matches.nil? and (matches.length > 1)
- return matches[1]
+ return f if f.role == "Source"
end
+ if formats.length == 0
+ raise StandardError.new("No valid formats in the RSS feed.")
+ else
+ # Return whatever's left if we don't have a 'Source' video.
+ return formats[0]
+ end
+ end
- # If neither of the source formats are present, just grab the
- # video URL from the Flash variable and be done with it.
- video_url_regex = /setPrimaryMediaUrl\("(.*?\.(flv|mov|wmv|mp4|og[gv]))/i
- matches = video_url_regex.match(page_data)
-
- if matches.nil? or (matches.length < 2)
- raise StandardError.new("Couldn't parse any of the video format URLs.")
+ def parse_video_url(page_data)
+ # All of the elements containing video URLs begin like this.
+ media_regex = /^\s*<media:content/
+
+ formats = []
+ # Create an array of BliptvMediaFormat from lines matching
+ # media_regex.
+ page_data.lines.each do |line|
+ if (line =~ media_regex)
+ bp = BliptvMediaFormat.new(line)
+ formats << bp
+ end
end
- return matches[1]
+ # And return the URL from the best one.
+ # choose_best_format will raise an error if need be.
+ return choose_best_format(formats).url
end
end