# Get the video id from the URL. Should be relatively easy,
# unless Youtube supports some URL formats of which I'm unaware.
def parse_video_id()
- # Return nil if we get no matches below.
- video_id = nil
-
# Both URLs are fairly easy to parse if you handle
# them one at a time. The only tricky situation is when
# parameters like "&hl=en" are tacked on to the end.
# First form didn't work? Try the second.
second_form_video_id_regex = /\/v\/([0-9a-z_\-]+)/i
second_form_matches = second_form_video_id_regex.match(@url)
- video_id = second_form_matches[1] if not (second_form_matches.nil? ||
- second_form_matches.length < 2)
-
- return video_id
+ return second_form_matches[1] if not (second_form_matches.nil? ||
+ second_form_matches.length < 2)
+
+ # ...and the third.
+ third_form_video_id_regex = /\/([[:alnum:]]+)$/i
+ third_form_matches = third_form_video_id_regex.match(@url)
+ return third_form_matches[1] if not (third_form_matches.nil? ||
+ third_form_matches.length < 2)
+
+ # If we made it here, we couldn't figure out the video id. Yes,
+ # this is fatal, since we don't know where the video file is
+ # located.
+ raise StandardError.new("Could not parse the video id.")
end
# Parse out the "t" parameter from the video's page. I'm not sure
- # what "t" stands for, but it's located in some JSON, and is required
- # for the final video URL to work.
+ # what "t" stands for, but it's required for the final video URL to
+ # work. It can be stored in either JSON or URL parameters.
def parse_t_parameter(page_data)
t_parameter = nil
- t_parameter_regex = /\"t\"\:[[:space:]]\"([^\"]+?)\"/
- matches = t_parameter_regex.match(page_data)
- t_parameter = matches[1] if not (matches.nil? || matches.length < 2)
+ t_parameter_regexes = [ /\"t\"\:[[:space:]]\"([^\"]+?)\"/,
+ /&t=([^&\"\\]+)/ ]
+ matches = t_parameter_regexes.map { |tpr| tpr.match(page_data) }
+
+ if matches.nitems == 0
+ raise StandardError.new("Could not parse the 't' parameter.")
+ end
+
+ first_match = matches.compact[0]
+ t_parameter = CGI::unescape(first_match[1])
return t_parameter
end
def get_available_formats(page_data)
- # Parse the list of available formats from the "fmt_list" Flash
- # variable.
+ # Parse the list of available formats from the "fmt_list"
+ # variable. It can be stored as either a Flash variable (JSON
+ # notation), or as URL parameter.
available_formats = []
- fmt_list_regex = /\"fmt_list\"\:[[:space:]]\"([^\"]+?)\"/
- matches = fmt_list_regex.match(page_data)
-
- if matches.nil?
- return nil
- else
- fmts_string = CGI::unescape(matches[1])
-
- fmts_string.split(',').each do |fmt|
- # Each "fmt" will look something like,
- #
- # 35/640000/9/0/115
- #
- # with the format identifier coming before the first slash.
- first_slash_idx = fmt.index('/')
- available_formats << fmt[0...first_slash_idx].to_i
- end
-
+ fmt_list_regexes = [ /\"fmt_list\"\:[[:space:]]\"([^\"]+?)\"/,
+ /fmt_list=([^&\"\\]+)/ ]
+
+ matches = fmt_list_regexes.map { |flr| flr.match(page_data) }
+
+ if matches.nitems == 0
+ raise StandardError.new("Could not find any valid formats.")
+ end
+
+ first_match = matches.compact[0]
+ fmts_string = CGI::unescape(first_match[1])
+
+ fmts_string.split(',').each do |fmt|
+ # Each "fmt" will look something like,
+ #
+ # 35/640000/9/0/115
+ #
+ # with the format identifier coming before the first slash.
+ first_slash_idx = fmt.index('/')
+ available_formats << fmt[0...first_slash_idx].to_i
end
return available_formats