From a557fae384ed2c8f8782ed09e5428e2cf701acf6 Mon Sep 17 00:00:00 2001 From: Michael Orlitzky Date: Wed, 31 Mar 2010 14:57:08 -0400 Subject: [PATCH] Raise errors if the Youtube class can't parse either the video id or the format list. Add video id parsing for the new URL format. --- src/websites/youtube.rb | 46 ++++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/src/websites/youtube.rb b/src/websites/youtube.rb index ee9a115..5f87754 100644 --- a/src/websites/youtube.rb +++ b/src/websites/youtube.rb @@ -95,9 +95,6 @@ class Youtube < Website # Get the video id from the URL. Should be relatively easy, # unless Youtube supports some URL formats of which I'm unaware. def parse_video_id() - # Return nil if we get no matches below. - video_id = nil - # Both URLs are fairly easy to parse if you handle # them one at a time. The only tricky situation is when # parameters like "&hl=en" are tacked on to the end. @@ -110,10 +107,19 @@ class Youtube < Website # First form didn't work? Try the second. second_form_video_id_regex = /\/v\/([0-9a-z_\-]+)/i second_form_matches = second_form_video_id_regex.match(@url) - video_id = second_form_matches[1] if not (second_form_matches.nil? || - second_form_matches.length < 2) - - return video_id + return second_form_matches[1] if not (second_form_matches.nil? || + second_form_matches.length < 2) + + # ...and the third. + third_form_video_id_regex = /\/([[:alnum:]]+)$/i + third_form_matches = third_form_video_id_regex.match(@url) + return third_form_matches[1] if not (third_form_matches.nil? || + third_form_matches.length < 2) + + # If we made it here, we couldn't figure out the video id. Yes, + # this is fatal, since we don't know where the video file is + # located. + raise StandardError.new("Could not parse the video id.") end @@ -139,20 +145,18 @@ class Youtube < Website matches = fmt_list_regex.match(page_data) if matches.nil? - return nil - else - fmts_string = CGI::unescape(matches[1]) - - fmts_string.split(',').each do |fmt| - # Each "fmt" will look something like, - # - # 35/640000/9/0/115 - # - # with the format identifier coming before the first slash. - first_slash_idx = fmt.index('/') - available_formats << fmt[0...first_slash_idx].to_i - end - + raise StandardError.new("Could not find any valid formats.") + end + + fmts_string = CGI::unescape(matches[1]) + fmts_string.split(',').each do |fmt| + # Each "fmt" will look something like, + # + # 35/640000/9/0/115 + # + # with the format identifier coming before the first slash. + first_slash_idx = fmt.index('/') + available_formats << fmt[0...first_slash_idx].to_i end return available_formats -- 2.44.2