From 14b76291067609e454d5c18e9ce8bc2a0d09987e Mon Sep 17 00:00:00 2001 From: Michael Orlitzky Date: Wed, 13 Jan 2010 23:12:50 -0500 Subject: [PATCH] Added high-definition video support to the Youtube class. --- src/websites/youtube.rb | 85 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 82 insertions(+), 3 deletions(-) diff --git a/src/websites/youtube.rb b/src/websites/youtube.rb index c8a1cad..532d8ca 100644 --- a/src/websites/youtube.rb +++ b/src/websites/youtube.rb @@ -17,7 +17,7 @@ # require 'src/website' - +require 'cgi' class Youtube < Website @@ -27,6 +27,16 @@ class Youtube < Website return url =~ VALID_YOUTUBE_URL_REGEX end + + def initialize(url) + super + + # The @format variable just caches the format of the video we're + # downloading. Storing it will prevent us from having to calculate + # it twice. + @format = 0 + end + def get_video_url() video_id = self.parse_video_id() @@ -43,13 +53,40 @@ class Youtube < Website t_parameter = self.parse_t_parameter(page_data) video_url = "http://www.youtube.com/get_video?video_id=#{video_id}&t=#{t_parameter}" - + + # Figure out which formats are available, and if any are, + # choose the best one. + available_formats = get_available_formats(page_data) + desired_format = get_desired_format(available_formats) + + if not desired_format.nil? + # First we cache the format so that when we're asked for the + # video filename later, we don't have to recompute the format. + @format = desired_format + + # And then stick the format parameter on the end of the URL. + video_url = video_url + "&fmt=#{desired_format}" + end + return video_url end def get_video_filename() - return (self.parse_video_id() + '.flv') + # The format -> extension mapping is available on Wikipedia: + # + # http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs + # + # The default extension is .flv. + extension = '.flv' + + if [18, 22, 35, 37].include?(@format) + extension = '.mp4' + elsif (@format == 17) + extension = '.3gp' + end + + return (self.parse_video_id() + extension) end @@ -93,5 +130,47 @@ class Youtube < Website return t_parameter end + + def get_available_formats(page_data) + # Parse the list of available formats from the "fmt_list" Flash + # variable. + available_formats = [] + fmt_list_regex = /\"fmt_list\"\:[[:space:]]\"([^\"]+?)\"/ + matches = fmt_list_regex.match(page_data) + + if matches.nil? + return nil + else + fmts_string = CGI::unescape(matches[1]) + + fmts_string.split(',').each do |fmt| + # Each "fmt" will look something like, + # + # 35/640000/9/0/115 + # + # with the format identifier coming before the first slash. + first_slash_idx = fmt.index('/') + available_formats << fmt[0...first_slash_idx].to_i + end + + end + + return available_formats + end + + + def get_desired_format(available_formats) + # Check for the presence of formats, in order of preference + # (quality). That is, we check for the best formats first. As soon + # as a format is found to be available, we return it as the + # desired format, since the first format we find is going to be + # the best available format. + return 37 if available_formats.include?(37) + return 22 if available_formats.include?(22) + return 35 if available_formats.include?(35) + return 18 if available_formats.include?(18) + return 34 if available_formats.include?(34) + return 17 if available_formats.include?(17) + end end -- 2.44.2