# # Copyright Michael Orlitzky # # http://michael.orlitzky.com/ # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # http://www.fsf.org/licensing/licenses/gpl.html # require 'src/website' class Youtube < Website VALID_YOUTUBE_URL_REGEX = /^(http:\/\/)?([a-z0-9]+\.)?youtube\.com\/((watch\?v=)|(v\/))[a-z0-9_\-]+(\&.*)?\#?$/i def self.owns_url?(url) return url =~ VALID_YOUTUBE_URL_REGEX end def get_video_url() video_id = self.parse_video_id() # The video's URL (the "page data" URL) may be different from the # URL that was passed to the program. We support the /v/video_id # URL format, but that is *not* the main video page where we can # retrieve the "t" parameter. We can only get that from the # /watch?v=video_id form. page_data_url = "http://www.youtube.com/watch?v=#{video_id}" page_data = self.get_page_data(page_data_url) # Magic. t_parameter = self.parse_t_parameter(page_data) video_url = "http://www.youtube.com/get_video?video_id=#{video_id}&t=#{t_parameter}" return video_url end def get_video_filename() return (self.parse_video_id() + '.flv') end protected; # Get the video id from the URL. Should be relatively easy, # unless Youtube supports some URL formats of which I'm unaware. def parse_video_id() # Return nil if we get no matches below. video_id = nil # Both URLs are fairly easy to parse if you handle # them one at a time. The only tricky situation is when # parameters like "&hl=en" are tacked on to the end. # We'll call /watch?v=video_id the "first form." first_form_video_id_regex = /v=([0-9a-z_\-]+)/i first_form_matches = first_form_video_id_regex.match(@url) return first_form_matches[1] if not (first_form_matches.nil? || first_form_matches.length < 2) # First form didn't work? Try the second. second_form_video_id_regex = /\/v\/([0-9a-z_\-]+)/i second_form_matches = second_form_video_id_regex.match(@url) video_id = second_form_matches[1] if not (second_form_matches.nil? || second_form_matches.length < 2) return video_id end # Parse out the "t" parameter from the video's page. I'm not sure # what "t" stands for, but it's located in some JSON, and is required # for the final video URL to work. def parse_t_parameter(page_data) t_parameter = nil t_parameter_regex = /\"t\"\:[[:space:]]\"([^\"]+?)\"/ matches = t_parameter_regex.match(page_data) t_parameter = matches[1] if not (matches.nil? || matches.length < 2) return t_parameter end end