]> gitweb.michael.orlitzky.com - dead/whatever-dl.git/blob - src/websites/youtube.rb
0d9bf398338538a3dfd8e128e851a98680ca46ae
[dead/whatever-dl.git] / src / websites / youtube.rb
1 #
2 # Copyright Michael Orlitzky
3 #
4 # http://michael.orlitzky.com/
5 #
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
15 #
16 # http://www.fsf.org/licensing/licenses/gpl.html
17 #
18
19 require 'src/website'
20
21 # Needed to download the page, which is in turn
22 # needed because it contains the video URL.
23 require 'net/http'
24 require 'uri'
25
26
27 class Youtube < Website
28
29 VALID_YOUTUBE_URL_REGEX = /^(http:\/\/)?(www\.)?youtube\.com\/((watch\?v=)|(v\/))[[:alnum:]]+(\&.*)?\#?$/
30
31 def self.owns_url?(url)
32 return url =~ VALID_YOUTUBE_URL_REGEX
33 end
34
35
36 def get_video_url(url)
37 video_id = self.parse_video_id(url)
38
39 # The video's URL (the "page data" URL) may be different from the
40 # URL that was passed to the program. We support the /v/video_id
41 # URL format, but that is *not* the main video page where we can
42 # retrieve the "t" parameter. We can only get that from the
43 # /watch?v=video_id form.
44 page_data_url = "http://www.youtube.com/watch?v=#{video_id}"
45 page_data = self.get_page_data(page_data_url)
46
47 # Magic.
48 t_parameter = self.parse_t_parameter(page_data)
49
50 video_url = "http://www.youtube.com/get_video?video_id=#{video_id}&t=#{t_parameter}"
51
52 return video_url
53 end
54
55
56 protected;
57
58 # Get the video id from the URL. Should be relatively easy,
59 # unless Youtube supports some URL formats of which I'm unaware.
60 def parse_video_id(url)
61 # Return nil if we get no matches below.
62 video_id = nil
63
64 # Both URLs are fairly easy to parse if you handle
65 # them one at a time. The only tricky situation is when
66 # parameters like "&hl=en" are tacked on to the end.
67 # We'll call /watch?v=video_id the "first form."
68 first_form_video_id_regex = /v=([[:alnum:]]+)$/
69 first_form_matches = first_form_video_id_regex.match(url)
70 return first_form_matches[1] if not (first_form_matches.nil? ||
71 first_form_matches.length < 2)
72
73 # First form didn't work? Try the second.
74 second_form_video_id_regex = /\/v\/([[:alnum:]]+)/
75 second_form_matches = second_form_video_id_regex.match(url)
76 video_id = second_form_matches[1] if not (second_form_matches.nil? ||
77 second_form_matches.length < 2)
78
79 return video_id
80 end
81
82
83 # Parse out the "t" parameter from the video's page. I'm not sure
84 # what "t" stands for, but it's located in some JSON, and is required
85 # for the final video URL to work.
86 def parse_t_parameter(page_data)
87 t_parameter = nil
88
89 t_parameter_regex = /\"t\"\:[[:space:]]\"([^\"]+?)\"/
90 matches = t_parameter_regex.match(page_data)
91 t_parameter = matches[1] if not (matches.nil? || matches.length < 2)
92
93 return t_parameter
94 end
95
96
97 def get_page_data(url)
98 uri = URI.parse(url)
99
100 response = Net::HTTP.start(uri.host, uri.port) do |http|
101 http.get(uri.request_uri)
102 end
103
104 return response.body
105 end
106
107 end