X-Git-Url: http://gitweb.michael.orlitzky.com/?p=dead%2Fwhatever-dl.git;a=blobdiff_plain;f=src%2Fwebsites%2Fyoutube.rb;h=c51ba9264e1ac056845e3c8c3d67dafe44bc1dd3;hp=c2a37d603e59c1e210c52c0a33627b81df37c711;hb=e72d484c8bf3e719f3f65ada1398772853836a56;hpb=9a0f311962f0348bb676e513e18b378e4ad11086 diff --git a/src/websites/youtube.rb b/src/websites/youtube.rb index c2a37d6..c51ba92 100644 --- a/src/websites/youtube.rb +++ b/src/websites/youtube.rb @@ -49,22 +49,38 @@ class Youtube < Website page_data_url = "http://www.youtube.com/watch?v=#{video_id}" page_data = self.get_page_data(page_data_url) - # Get the URL map from the page. - fmt_url_map = get_format_url_map(page_data) - - # Figure out which formats are available, and if any are, - # choose the best one. - available_formats = fmt_url_map.keys() - desired_format = get_desired_format(available_formats) - - # First we cache the format so that when we're asked for the - # video filename later, we don't have to recompute the format. - @format = desired_format - - # And then use whatever URL is available for the desired format. - # We assume that all available formats will have an entry in the - # fmt_url_map hash. - video_url = fmt_url_map[desired_format] + begin + # Get the URL map from the page. + fmt_url_map = get_format_url_map(page_data) + + # Figure out which formats are available, and if any are, + # choose the best one. + available_formats = fmt_url_map.keys() + desired_format = get_desired_format(available_formats) + + # First we cache the format so that when we're asked for the + # video filename later, we don't have to recompute the format. + @format = desired_format + + # And then use whatever URL is available for the desired format. + # We assume that all available formats will have an entry in the + # fmt_url_map hash. + video_url = fmt_url_map[desired_format] + + return video_url + rescue StandardError => e + # If at first you do not succeed, maybe someone decided to + # change some shit. This alternate method parses + # url_encoded_fmt_stream_map. + fmt_streams = get_fmt_stream_list(page_data) + video_url = self.unicode_unescape(fmt_streams[0]) + video_url = CGI::unescape(video_url) + + # Strip off everything after the first space in the URL. + # I don't know why this works, but if we leave the space + # in (encoded, even), Youtube throws us 403 errors. + video_url.gsub!(/ .+$/, '') + end return video_url end @@ -90,6 +106,34 @@ class Youtube < Website protected; + def unicode_unescape(string) + # Unescape sequences like '\u0026'. + # Ok, only '\u0026' for now. + return string.gsub('\u0026', '&') + end + + def get_fmt_stream_list(page_data) + # This is another (new?) method of embedding the video URLs. + # The url_encoded_fmt_stream_map variable contains a list of URLs + # in the form url=foo1,url=foo2... + # + # It looks like the first one in the list is the highest + # quality? Let's just take that one for now. + fmt_stream_regex = /\"url_encoded_fmt_stream_map\": \"(.+?)\"/ + + matches = fmt_stream_regex.match(page_data) + + if (matches.nil? || matches.length < 2) + raise StandardError.new("Could not parse the url_encoded_fmt_stream_map Flash variable.") + end + + urlstring = matches[1] + urlstring.gsub!('url=', '') + urls = urlstring.split(',') + return urls + end + + # Get the video id from the URL. Should be relatively easy, # unless Youtube supports some URL formats of which I'm unaware. def parse_video_id()