X-Git-Url: https://gitweb.michael.orlitzky.com/?a=blobdiff_plain;ds=sidebyside;f=src%2Fwebsites%2Fyoutube.rb;h=2ab4678c11dae6660c0128765281eefffd38e61b;hb=8e886df259246365023322b78f58e4037cb536a4;hp=c51ba9264e1ac056845e3c8c3d67dafe44bc1dd3;hpb=e72d484c8bf3e719f3f65ada1398772853836a56;p=dead%2Fwhatever-dl.git

diff --git a/src/websites/youtube.rb b/src/websites/youtube.rb
index c51ba92..2ab4678 100644
--- a/src/websites/youtube.rb
+++ b/src/websites/youtube.rb
@@ -66,20 +66,22 @@ class Youtube < Website
       # We assume that all available formats will have an entry in the
       # fmt_url_map hash.
       video_url = fmt_url_map[desired_format]
-
       return video_url
-    rescue StandardError => e
+    rescue StandardError
       # If at first you do not succeed, maybe someone decided to
       # change some shit. This alternate method parses
       # url_encoded_fmt_stream_map.
       fmt_streams = get_fmt_stream_list(page_data)
-      video_url = self.unicode_unescape(fmt_streams[0])
-      video_url = CGI::unescape(video_url)
+      video_url = self.choose_best_fmt_stream_url(fmt_streams)
 
-      # Strip off everything after the first space in the URL.
-      # I don't know why this works, but if we leave the space
-      # in (encoded, even), Youtube throws us 403 errors.
-      video_url.gsub!(/ .+$/, '')
+      # A duplicated "itag" parameter results in a 403.
+      itag_regex = /&itag=\d+/
+      matches = video_url.scan(itag_regex)
+
+      if matches.length > 1
+        # Get rid of the first occurrence.
+        video_url.sub!(itag_regex, '')
+      end
     end
 
     return video_url
@@ -106,12 +108,30 @@ class Youtube < Website
 
   protected;
 
+  def choose_best_fmt_stream_url(fmt_stream_urls)
+    # Take a list, generated by get_fmt_stream_list(), and choose the
+    # best URL out of the bunch based on the video format.
+    fmt_stream_urls.each do |fs|
+      if fs =~ /video\/mp4/ and fs =~ /quality=large/
+        return fs
+      elsif fs =~ /quality=large/
+        return fs
+      elsif fs =~ /video\/mp4/
+        return fs
+      else
+        return fs
+      end
+    end
+  end
+
+
   def unicode_unescape(string)
     # Unescape sequences like '\u0026'.
     # Ok, only '\u0026' for now.
     return string.gsub('\u0026', '&')
   end
 
+
   def get_fmt_stream_list(page_data)
     # This is another (new?) method of embedding the video URLs.
     # The url_encoded_fmt_stream_map variable contains a list of URLs
@@ -130,6 +150,16 @@ class Youtube < Website
     urlstring = matches[1]
     urlstring.gsub!('url=', '')
     urls = urlstring.split(',')
+
+    urls.each_index do |idx|
+      urls[idx] = self.unicode_unescape(urls[idx])
+      urls[idx] = CGI::unescape(urls[idx])
+      # Strip off everything after the first space in the URL.
+      # I don't know why this works, but if we leave the space
+      # in (encoded, even), Youtube throws us 403 errors.
+      urls[idx].gsub!(/ .+$/, '')
+    end
+
     return urls
   end
 
@@ -143,20 +173,23 @@ class Youtube < Website
     # We'll call /watch?v=video_id the "first form."
     first_form_video_id_regex = /v=([0-9a-z_\-]+)/i
     first_form_matches = first_form_video_id_regex.match(@url)
-    return first_form_matches[1] if not (first_form_matches.nil? ||
-                                         first_form_matches.length < 2)
+    if not first_form_matches.nil? || first_form_matches.length < 2
+      return first_form_matches[1]
+    end
 
     # First form didn't work? Try the second.
     second_form_video_id_regex = /\/v\/([0-9a-z_\-]+)/i
     second_form_matches = second_form_video_id_regex.match(@url)
-    return second_form_matches[1] if not (second_form_matches.nil? ||
-                                          second_form_matches.length < 2)
+    if not second_form_matches.nil? || second_form_matches.length < 2
+      return second_form_matches[1]
+    end
 
     # ...and the third.
     third_form_video_id_regex = /\/([[:alnum:]]+)$/i
     third_form_matches = third_form_video_id_regex.match(@url)
-    return third_form_matches[1] if not (third_form_matches.nil? ||
-                                         third_form_matches.length < 2)
+    if not third_form_matches.nil? || third_form_matches.length < 2
+      return third_form_matches[1]
+    end
 
     # If we made it here, we couldn't figure out the video id. Yes,
     # this is fatal, since we don't know where the video file is