X-Git-Url: http://gitweb.michael.orlitzky.com/?a=blobdiff_plain;f=src%2Fwebsites%2Fyoutube.rb;h=ed5ab4b8a96efa62ee49379caf205f9b89c5e69c;hb=dbde07faafd93f8a9503c1fedfe317723da7cac5;hp=ee9a1152c2a6c9e05f230a1c9dfacc12823bb545;hpb=3dbd698397f041569cb729822a5ea7e909af62a5;p=dead%2Fwhatever-dl.git diff --git a/src/websites/youtube.rb b/src/websites/youtube.rb index ee9a115..ed5ab4b 100644 --- a/src/websites/youtube.rb +++ b/src/websites/youtube.rb @@ -95,9 +95,6 @@ class Youtube < Website # Get the video id from the URL. Should be relatively easy, # unless Youtube supports some URL formats of which I'm unaware. def parse_video_id() - # Return nil if we get no matches below. - video_id = nil - # Both URLs are fairly easy to parse if you handle # them one at a time. The only tricky situation is when # parameters like "&hl=en" are tacked on to the end. @@ -110,49 +107,68 @@ class Youtube < Website # First form didn't work? Try the second. second_form_video_id_regex = /\/v\/([0-9a-z_\-]+)/i second_form_matches = second_form_video_id_regex.match(@url) - video_id = second_form_matches[1] if not (second_form_matches.nil? || - second_form_matches.length < 2) - - return video_id + return second_form_matches[1] if not (second_form_matches.nil? || + second_form_matches.length < 2) + + # ...and the third. + third_form_video_id_regex = /\/([[:alnum:]]+)$/i + third_form_matches = third_form_video_id_regex.match(@url) + return third_form_matches[1] if not (third_form_matches.nil? || + third_form_matches.length < 2) + + # If we made it here, we couldn't figure out the video id. Yes, + # this is fatal, since we don't know where the video file is + # located. + raise StandardError.new("Could not parse the video id.") end # Parse out the "t" parameter from the video's page. I'm not sure - # what "t" stands for, but it's located in some JSON, and is required - # for the final video URL to work. + # what "t" stands for, but it's required for the final video URL to + # work. It can be stored in either JSON or URL parameters. def parse_t_parameter(page_data) t_parameter = nil - t_parameter_regex = /\"t\"\:[[:space:]]\"([^\"]+?)\"/ - matches = t_parameter_regex.match(page_data) - t_parameter = matches[1] if not (matches.nil? || matches.length < 2) + t_parameter_regexes = [ /\"t\"\:[[:space:]]\"([^\"]+?)\"/, + /&t=([^&\"\\]+)/ ] + matches = t_parameter_regexes.map { |tpr| tpr.match(page_data) } + + if matches.nitems == 0 + raise StandardError.new("Could not parse the 't' parameter.") + end + + first_match = matches.compact[0] + t_parameter = CGI::unescape(first_match[1]) return t_parameter end def get_available_formats(page_data) - # Parse the list of available formats from the "fmt_list" Flash - # variable. + # Parse the list of available formats from the "fmt_list" + # variable. It can be stored as either a Flash variable (JSON + # notation), or as URL parameter. available_formats = [] - fmt_list_regex = /\"fmt_list\"\:[[:space:]]\"([^\"]+?)\"/ - matches = fmt_list_regex.match(page_data) - - if matches.nil? - return nil - else - fmts_string = CGI::unescape(matches[1]) - - fmts_string.split(',').each do |fmt| - # Each "fmt" will look something like, - # - # 35/640000/9/0/115 - # - # with the format identifier coming before the first slash. - first_slash_idx = fmt.index('/') - available_formats << fmt[0...first_slash_idx].to_i - end - + fmt_list_regexes = [ /\"fmt_list\"\:[[:space:]]\"([^\"]+?)\"/, + /fmt_list=([^&\"\\]+)/ ] + + matches = fmt_list_regexes.map { |flr| flr.match(page_data) } + + if matches.nitems == 0 + raise StandardError.new("Could not find any valid formats.") + end + + first_match = matches.compact[0] + fmts_string = CGI::unescape(first_match[1]) + + fmts_string.split(',').each do |fmt| + # Each "fmt" will look something like, + # + # 35/640000/9/0/115 + # + # with the format identifier coming before the first slash. + first_slash_idx = fmt.index('/') + available_formats << fmt[0...first_slash_idx].to_i end return available_formats