X-Git-Url: http://gitweb.michael.orlitzky.com/?a=blobdiff_plain;f=src%2Fwebsites%2Fyoutube.rb;fp=src%2Fwebsites%2Fyoutube.rb;h=6766af2d654e4438b7a5dadc07ee61f35b4fd2ff;hb=af614c64b3d5998471af5e54b3d8f36d3e00cc63;hp=0000000000000000000000000000000000000000;hpb=9390083b114048a6782454a37f799733707ee5dc;p=dead%2Fwhatever-dl.git diff --git a/src/websites/youtube.rb b/src/websites/youtube.rb new file mode 100644 index 0000000..6766af2 --- /dev/null +++ b/src/websites/youtube.rb @@ -0,0 +1,107 @@ +# +# Copyright Michael Orlitzky +# +# http://michael.orlitzky.com/ +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# http://www.fsf.org/licensing/licenses/gpl.html +# + +require 'src/website' + +# Needed to download the page, which is in turn +# needed because it contains the video URL. +require 'net/http' +require 'uri' + + +class Youtube < Website + + VALID_YOUTUBE_URL_REGEX = /^(http:\/\/)?(www\.)?youtube\.com\/((watch\?v=)|(v\/))[[:alnum:]]+(\&.*)?$/ + + def self.owns_url?(url) + return url =~ VALID_YOUTUBE_URL_REGEX + end + + + def get_video_url(url) + video_id = self.parse_video_id(url) + + # The video's URL (the "page data" URL) may be different from the + # URL that was passed to the program. We support the /v/video_id + # URL format, but that is *not* the main video page where we can + # retrieve the "t" parameter. We can only get that from the + # /watch?v=video_id form. + page_data_url = "http://www.youtube.com/watch?v=#{video_id}" + page_data = self.get_page_data(page_data_url) + + # Magic. + t_parameter = self.parse_t_parameter(page_data) + + video_url = "http://www.youtube.com/get_video?video_id=#{video_id}&t=#{t_parameter}" + + return video_url + end + + + protected; + + # Get the video id from the URL. Should be relatively easy, + # unless Youtube supports some URL formats of which I'm unaware. + def parse_video_id(url) + # Return nil if we get no matches below. + video_id = nil + + # Both URLs are fairly easy to parse if you handle + # them one at a time. The only tricky situation is when + # parameters like "&hl=en" are tacked on to the end. + # We'll call /watch?v=video_id the "first form." + first_form_video_id_regex = /v=([[:alnum:]]+)$/ + first_form_matches = first_form_video_id_regex.match(url) + return first_form_matches[1] if not (first_form_matches.nil? || + first_form_matches.length < 2) + + # First form didn't work? Try the second. + second_form_video_id_regex = /\/v\/([[:alnum:]]+)/ + second_form_matches = second_form_video_id_regex.match(url) + video_id = second_form_matches[1] if not (second_form_matches.nil? || + second_form_matches.length < 2) + + return video_id + end + + + # Parse out the "t" parameter from the video's page. I'm not sure + # what "t" stands for, but it's located in some JSON, and is required + # for the final video URL to work. + def parse_t_parameter(page_data) + t_parameter = nil + + t_parameter_regex = /\"t\"\:[[:space:]]\"([[:alnum:]]+)\"/ + matches = t_parameter_regex.match(page_data) + t_parameter = matches[1] if not (matches.nil? || matches.length < 2) + + return t_parameter + end + + + def get_page_data(url) + uri = URI.parse(url) + + response = Net::HTTP.start(uri.host, uri.port) do |http| + http.get(uri.request_uri) + end + + return response.body + end + +end