From 3bf657c5b1de3e1a9f718212ed7090a5e073b57d Mon Sep 17 00:00:00 2001 From: Michael Orlitzky Date: Fri, 14 Oct 2011 16:10:24 -0400 Subject: [PATCH] Add a fix for Youtube's itag parameter. Make the fmt_stream_url selection smarter. --- src/websites/youtube.rb | 38 +++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/src/websites/youtube.rb b/src/websites/youtube.rb index c51ba92..f9fd05a 100644 --- a/src/websites/youtube.rb +++ b/src/websites/youtube.rb @@ -66,20 +66,16 @@ class Youtube < Website # We assume that all available formats will have an entry in the # fmt_url_map hash. video_url = fmt_url_map[desired_format] - return video_url rescue StandardError => e # If at first you do not succeed, maybe someone decided to # change some shit. This alternate method parses # url_encoded_fmt_stream_map. fmt_streams = get_fmt_stream_list(page_data) - video_url = self.unicode_unescape(fmt_streams[0]) - video_url = CGI::unescape(video_url) + video_url = self.choose_best_fmt_stream_url(fmt_streams) - # Strip off everything after the first space in the URL. - # I don't know why this works, but if we leave the space - # in (encoded, even), Youtube throws us 403 errors. - video_url.gsub!(/ .+$/, '') + # The "itag" parameter makes the 403 happen. + video_url.gsub!(/itag=\d+&/, '') end return video_url @@ -106,12 +102,30 @@ class Youtube < Website protected; + def choose_best_fmt_stream_url(fmt_stream_urls) + # Take a list, generated by get_fmt_stream_list(), and choose the + # best URL out of the bunch based on the video format. + fmt_stream_urls.each do |fs| + if fs =~ /video\/mp4/ and fs =~ /quality=large/ + return fs + elsif fs =~ /quality=large/ + return fs + elsif fs =~ /video\/mp4/ + return fs + else + return fs + end + end + end + + def unicode_unescape(string) # Unescape sequences like '\u0026'. # Ok, only '\u0026' for now. return string.gsub('\u0026', '&') end + def get_fmt_stream_list(page_data) # This is another (new?) method of embedding the video URLs. # The url_encoded_fmt_stream_map variable contains a list of URLs @@ -130,6 +144,16 @@ class Youtube < Website urlstring = matches[1] urlstring.gsub!('url=', '') urls = urlstring.split(',') + + urls.each_index do |idx| + urls[idx] = self.unicode_unescape(urls[idx]) + urls[idx] = CGI::unescape(urls[idx]) + # Strip off everything after the first space in the URL. + # I don't know why this works, but if we leave the space + # in (encoded, even), Youtube throws us 403 errors. + urls[idx].gsub!(/ .+$/, '') + end + return urls end -- 2.44.2