2 # Copyright Michael Orlitzky
4 # http://michael.orlitzky.com/
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # http://www.fsf.org/licensing/licenses/gpl.html
22 class Youtube
< Website
24 VALID_YOUTUBE_URL_REGEX
= /^(http:\/\
/)?([a-z0-9]+\.)?youtube\.com\/((watch
\?v
=)|(v\
/)|([a-z]+\#[a-z]\/[a-z
]\
/[0-9]\/))[a-z0-9_\
-]+
(\
&.*)?\
#?$/i
26 def self.owns_url
?(url
)
27 return url
=~ VALID_YOUTUBE_URL_REGEX
34 # The @format variable just caches the format of the video we're
35 # downloading. Storing it will prevent us from having to calculate
42 video_id
= self.parse_video_id()
44 # The video's URL (the "page data" URL) may be different from the
45 # URL that was passed to the program. We support the /v/video_id
46 # URL format, but that is *not* the main video page where we can
47 # retrieve the "t" parameter. We can only get that from the
48 # /watch?v=video_id form.
49 page_data_url
= "http://www.youtube.com/watch?v=#{video_id}"
50 page_data
= self.get_page_data(page_data_url
)
53 t_parameter
= self.parse_t_parameter(page_data
)
55 video_url
= "http://www.youtube.com/get_video?video_id=#{video_id}&t=#{t_parameter}"
57 # Figure out which formats are available, and if any are,
58 # choose the best one.
59 available_formats
= get_available_formats(page_data
)
60 desired_format
= get_desired_format(available_formats
)
62 if not desired_format
.nil?
63 # First we cache the format so that when we're asked for the
64 # video filename later, we don't have to recompute the format.
65 @format = desired_format
67 # And then stick the format parameter on the end of the URL.
68 video_url
= video_url +
"&fmt=#{desired_format}"
75 def get_video_filename()
76 # The format -> extension mapping is available on Wikipedia:
78 # http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs
80 # The default extension is .flv.
83 if [18, 22, 35, 37].include?(@format)
89 return (self.parse_video_id() + extension
)
95 # Get the video id from the URL. Should be relatively easy,
96 # unless Youtube supports some URL formats of which I'm unaware.
98 # Both URLs are fairly easy to parse if you handle
99 # them one at a time. The only tricky situation is when
100 # parameters like "&hl=en" are tacked on to the end.
101 # We'll call /watch?v=video_id the "first form."
102 first_form_video_id_regex
= /v=([0-9a-z_\-]+)/i
103 first_form_matches
= first_form_video_id_regex
.match(@url)
104 return first_form_matches
[1] if not (first_form_matches
.nil? ||
105 first_form_matches
.length
< 2)
107 # First form didn't work? Try the second.
108 second_form_video_id_regex
= /\/v\
/([0-9a-z_\-]+)/i
109 second_form_matches
= second_form_video_id_regex
.match(@url)
110 return second_form_matches
[1] if not (second_form_matches
.nil? ||
111 second_form_matches
.length
< 2)
114 third_form_video_id_regex
= /\/([[:alnum:]]+
)$/i
115 third_form_matches
= third_form_video_id_regex
.match(@url)
116 return third_form_matches
[1] if not (third_form_matches
.nil? ||
117 third_form_matches
.length
< 2)
119 # If we made it here, we couldn't figure out the video id. Yes,
120 # this is fatal, since we don't know where the video file is
122 raise StandardError
.new("Could not parse the video id.")
126 # Parse out the "t" parameter from the video's page. I'm not sure
127 # what "t" stands for, but it's located in some JSON, and is required
128 # for the final video URL to work.
129 def parse_t_parameter(page_data
)
132 t_parameter_regex
= /\"t\"\:[[:space:]]\"([^\"]+?)\"/
133 matches
= t_parameter_regex
.match(page_data
)
134 t_parameter
= matches
[1] if not (matches
.nil? || matches
.length
< 2)
140 def get_available_formats(page_data
)
141 # Parse the list of available formats from the "fmt_list" Flash
143 available_formats
= []
144 fmt_list_regex
= /\"fmt_list\"\:[[:space:]]\"([^\"]+?)\"/
145 matches
= fmt_list_regex
.match(page_data
)
148 raise StandardError
.new("Could not find any valid formats.")
151 fmts_string
= CGI
::unescape(matches
[1])
152 fmts_string
.split(',').each
do |fmt
|
153 # Each "fmt" will look something like,
157 # with the format identifier coming before the first slash.
158 first_slash_idx
= fmt
.index('/')
159 available_formats
<< fmt
[0...first_slash_idx
].to_i
162 return available_formats
166 def get_desired_format(available_formats
)
167 # Check for the presence of formats, in order of preference
168 # (quality). That is, we check for the best formats first. As soon
169 # as a format is found to be available, we return it as the
170 # desired format, since the first format we find is going to be
171 # the best available format.
172 return 37 if available_formats
.include?(37)
173 return 22 if available_formats
.include?(22)
174 return 35 if available_formats
.include?(35)
175 return 18 if available_formats
.include?(18)
176 return 34 if available_formats
.include?(34)
177 return 17 if available_formats
.include?(17)