# We assume that all available formats will have an entry in the
# fmt_url_map hash.
video_url = fmt_url_map[desired_format]
-
return video_url
- rescue StandardError => e
+ rescue StandardError
# If at first you do not succeed, maybe someone decided to
# change some shit. This alternate method parses
# url_encoded_fmt_stream_map.
fmt_streams = get_fmt_stream_list(page_data)
- video_url = self.unicode_unescape(fmt_streams[0])
- video_url = CGI::unescape(video_url)
+ video_url = self.choose_best_fmt_stream_url(fmt_streams)
- # Strip off everything after the first space in the URL.
- # I don't know why this works, but if we leave the space
- # in (encoded, even), Youtube throws us 403 errors.
- video_url.gsub!(/ .+$/, '')
+ # A duplicated "itag" parameter results in a 403.
+ itag_regex = /&itag=\d+/
+ matches = video_url.scan(itag_regex)
+
+ if matches.length > 1
+ # Get rid of the first occurrence.
+ video_url.sub!(itag_regex, '')
+ end
end
return video_url
protected;
+ def choose_best_fmt_stream_url(fmt_stream_urls)
+ # Take a list, generated by get_fmt_stream_list(), and choose the
+ # best URL out of the bunch based on the video format.
+ fmt_stream_urls.each do |fs|
+ if fs =~ /video\/mp4/ and fs =~ /quality=large/
+ return fs
+ elsif fs =~ /quality=large/
+ return fs
+ elsif fs =~ /video\/mp4/
+ return fs
+ else
+ return fs
+ end
+ end
+ end
+
+
def unicode_unescape(string)
# Unescape sequences like '\u0026'.
# Ok, only '\u0026' for now.
return string.gsub('\u0026', '&')
end
+
def get_fmt_stream_list(page_data)
# This is another (new?) method of embedding the video URLs.
# The url_encoded_fmt_stream_map variable contains a list of URLs
urlstring = matches[1]
urlstring.gsub!('url=', '')
urls = urlstring.split(',')
+
+ urls.each_index do |idx|
+ urls[idx] = self.unicode_unescape(urls[idx])
+ urls[idx] = CGI::unescape(urls[idx])
+ # Strip off everything after the first space in the URL.
+ # I don't know why this works, but if we leave the space
+ # in (encoded, even), Youtube throws us 403 errors.
+ urls[idx].gsub!(/ .+$/, '')
+ end
+
return urls
end
# We'll call /watch?v=video_id the "first form."
first_form_video_id_regex = /v=([0-9a-z_\-]+)/i
first_form_matches = first_form_video_id_regex.match(@url)
- return first_form_matches[1] if not (first_form_matches.nil? ||
- first_form_matches.length < 2)
+ if not first_form_matches.nil? || first_form_matches.length < 2
+ return first_form_matches[1]
+ end
# First form didn't work? Try the second.
second_form_video_id_regex = /\/v\/([0-9a-z_\-]+)/i
second_form_matches = second_form_video_id_regex.match(@url)
- return second_form_matches[1] if not (second_form_matches.nil? ||
- second_form_matches.length < 2)
+ if not second_form_matches.nil? || second_form_matches.length < 2
+ return second_form_matches[1]
+ end
# ...and the third.
third_form_video_id_regex = /\/([[:alnum:]]+)$/i
third_form_matches = third_form_video_id_regex.match(@url)
- return third_form_matches[1] if not (third_form_matches.nil? ||
- third_form_matches.length < 2)
+ if not third_form_matches.nil? || third_form_matches.length < 2
+ return third_form_matches[1]
+ end
# If we made it here, we couldn't figure out the video id. Yes,
# this is fatal, since we don't know where the video file is