X-Git-Url: http://gitweb.michael.orlitzky.com/?a=blobdiff_plain;f=src%2Fwebsite.rb;fp=src%2Fwebsite.rb;h=0000000000000000000000000000000000000000;hb=6de408333ceb0d142f8fa0fef2571228e89c8fc1;hp=e9e65ca1909f7add3c8b2c6ecc800a3856145528;hpb=8e886df259246365023322b78f58e4037cb536a4;p=dead%2Fwhatever-dl.git diff --git a/src/website.rb b/src/website.rb deleted file mode 100644 index e9e65ca..0000000 --- a/src/website.rb +++ /dev/null @@ -1,147 +0,0 @@ -# -# Copyright Michael Orlitzky -# -# http://michael.orlitzky.com/ -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# http://www.fsf.org/licensing/licenses/gpl.html -# - -# Needed for the default implementation of get_page_data. -require 'net/http' - -# Necessary in a lot of subclasses; plus, we need it -# to parse the server name out of our URL. -require 'uri' - -# Needed to download.. things. -require 'net/http' - -# This class keeps track of all its subclasses -# We use this to loop through every "website" in an -# attempt to determine to which site a URL belongs. -class Website - - protected; - - @url = nil - - - def self.inherited(subclass) - if superclass.respond_to? :inherited - superclass.inherited(subclass) - end - - # Every time we're subclassed, add the new - # subclass to our list of subclasses. - @subclasses ||= [] - @subclasses << subclass - end - - - def server - # Get the HTTP server portion of our URI - uri = URI.parse(@url) - return uri.host - end - - - - def get_page_data(url) - # A naive implementation that just grabs the - # data from a page. - uri = URI.parse(url) - - response = Net::HTTP.start(uri.host, uri.port) do |http| - http.get(uri.request_uri, self.headers) - end - - # Set the referer in case it is needed for some later request. - self.headers['Referer'] = uri.request_uri - - return response.body - end - - - - public; - - # Additional headers used when requesting data from the website. - # These aren't passed as a parameter because the (final) - # downloaders need them as well. - attr_accessor :headers - - def initialize(url) - @url = url - self.headers = { 'User-Agent' => Configuration::USER_AGENT } - end - - - def self.create(url) - # Factory method returning an instance of - # the appropriate subclass. - - # While we're looping through the list of subclasses, - # we'll set this to the Generic class. - generic = nil - - # Check the URL against each website's class. - # The class will know whether or not the URL - # "belongs" to its website. - @subclasses.each do |w| - if w.owns_url?(url) - if w.to_s == 'Generic' - generic = w - else - # We don't want to return Generic here because some - # other subclasses further down the list might match - # the URL. - return w.new(url) - end - end - end - - # If nothing matched, try the generic parser. - return generic.new(url) - end - - - # Abstract definition. Each subclass of Website - # should support it on its own. - def self.owns_url?(url) - raise NotImplementedError - end - - - # Same here. Abstract. - def get_video_url() - raise NotImplementedError - end - - - # The website class should be responsible for determining the - # video's filename. By default, we can take the last component - # of the video URL, but in some cases, subclasses will want - # to override this behavior. - def get_video_filename() - # Use whatever comes after the final front slash. - file_and_params = get_video_url().split('/').pop() - - # Unless it contains URL parameters. We don't want those. - return file_and_params unless file_and_params.include?('?') - - # There must be some parameters. Strip them off. - param_start_idx = file_and_params.index('?') - return file_and_params[0...(param_start_idx)] - end - -end