X-Git-Url: http://gitweb.michael.orlitzky.com/?a=blobdiff_plain;f=src%2Fwebsite.rb;h=2f03e6cb4c383fa63093553a60317925bee51183;hb=11aa7c3e702f4ea466152b706f60e154e60e188e;hp=1239712a696a3b855ec3ad79472865a608e7f937;hpb=9390083b114048a6782454a37f799733707ee5dc;p=dead%2Fwhatever-dl.git diff --git a/src/website.rb b/src/website.rb index 1239712..2f03e6c 100644 --- a/src/website.rb +++ b/src/website.rb @@ -16,10 +16,26 @@ # http://www.fsf.org/licensing/licenses/gpl.html # +# Needed for the default implementation of get_page_data. +require 'net/http' + +# Necessary in a lot of subclasses; plus, we need it +# to parse the server name out of our URL. +require 'uri' + +# Needed to download.. things. +require 'net/http' + # This class keeps track of all its subclasses # We use this to loop through every "website" in an # attempt to determine to which site a URL belongs. class Website + + protected; + + @url = nil + + def self.inherited(subclass) if superclass.respond_to? :inherited superclass.inherited(subclass) @@ -31,18 +47,82 @@ class Website @subclasses << subclass end - def self.subclasses - @subclasses + + def server + # Get the HTTP server portion of our URI + uri = URI.parse(@url) + return uri.host end - # This should be overridden in any class that wants - # to claim ownership of a URL. - def self.owns_url?(url) - return false + + + def get_page_data(url, headers = {}) + # A naive implementation that just grabs the + # data from a page. + uri = URI.parse(url) + + response = Net::HTTP.start(uri.host, uri.port) do |http| + http.get(uri.request_uri, headers) + end + + return response.body + end + + + + public; + + def initialize(url) + @url = url end - # Same here. We want to default to nil unless overridden. - def get_video_url(url) + + def self.create(url) + # Factory method returning an instance of + # the appropriate subclass. + + # Check the URL against each website's class. + # The class will know whether or not the URL + # "belongs" to its website. + @subclasses.each do |w| + if w.owns_url?(url) + return w.new(url) + end + end + + # If nothing matched, we don't return an instance + # of anything. return nil end + + + # Abstract definition. Each subclass of Website + # should support it on its own. + def self.owns_url?(url) + raise NotImplementedError + end + + + # Same here. Abstract. + def get_video_url() + raise NotImplementedError + end + + + # The website class should be responsible for determining the + # video's filename. By default, we can take the last component + # of the video URL, but in some cases, subclasses will want + # to override this behavior. + def get_video_filename() + # Use whatever comes after the final front slash. + file_and_params = get_video_url().split('/').pop() + + # Unless it contains URL parameters. We don't want those. + return file_and_params unless file_and_params.include?('?') + + # There must be some parameters. Strip them off. + param_start_idx = file_and_params.index('?') + return file_and_params[0...(param_start_idx)] + end + end