X-Git-Url: http://gitweb.michael.orlitzky.com/?a=blobdiff_plain;f=src%2Fwebsite.rb;h=e9e65ca1909f7add3c8b2c6ecc800a3856145528;hb=0662d93e5088ecfd2ce351910ab9a1d3568f8359;hp=bfa1b3d618a04163856fee297dd65ce4b0e380b8;hpb=4c1c22529da73bf80cb95d5602236b9ba6c3f0d5;p=dead%2Fwhatever-dl.git diff --git a/src/website.rb b/src/website.rb index bfa1b3d..e9e65ca 100644 --- a/src/website.rb +++ b/src/website.rb @@ -30,12 +30,12 @@ require 'net/http' # We use this to loop through every "website" in an # attempt to determine to which site a URL belongs. class Website - + protected; - + @url = nil - + def self.inherited(subclass) if superclass.respond_to? :inherited superclass.inherited(subclass) @@ -55,44 +55,63 @@ class Website end - + def get_page_data(url) # A naive implementation that just grabs the # data from a page. uri = URI.parse(url) response = Net::HTTP.start(uri.host, uri.port) do |http| - http.get(uri.request_uri) + http.get(uri.request_uri, self.headers) end + # Set the referer in case it is needed for some later request. + self.headers['Referer'] = uri.request_uri + return response.body end - - + + public; + # Additional headers used when requesting data from the website. + # These aren't passed as a parameter because the (final) + # downloaders need them as well. + attr_accessor :headers + def initialize(url) @url = url + self.headers = { 'User-Agent' => Configuration::USER_AGENT } end - + def self.create(url) # Factory method returning an instance of # the appropriate subclass. - + + # While we're looping through the list of subclasses, + # we'll set this to the Generic class. + generic = nil + # Check the URL against each website's class. # The class will know whether or not the URL # "belongs" to its website. @subclasses.each do |w| if w.owns_url?(url) - return w.new(url) + if w.to_s == 'Generic' + generic = w + else + # We don't want to return Generic here because some + # other subclasses further down the list might match + # the URL. + return w.new(url) + end end end - # If nothing matched, we don't return an instance - # of anything. - return nil + # If nothing matched, try the generic parser. + return generic.new(url) end @@ -102,13 +121,13 @@ class Website raise NotImplementedError end - + # Same here. Abstract. def get_video_url() raise NotImplementedError end - + # The website class should be responsible for determining the # video's filename. By default, we can take the last component # of the video URL, but in some cases, subclasses will want @@ -119,10 +138,10 @@ class Website # Unless it contains URL parameters. We don't want those. return file_and_params unless file_and_params.include?('?') - + # There must be some parameters. Strip them off. param_start_idx = file_and_params.index('?') return file_and_params[0...(param_start_idx)] end - + end