+++ /dev/null
-#
-# Copyright Michael Orlitzky
-#
-# http://michael.orlitzky.com/
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# http://www.fsf.org/licensing/licenses/gpl.html
-#
-
-# Needed for the default implementation of get_page_data.
-require 'net/http'
-
-# Necessary in a lot of subclasses; plus, we need it
-# to parse the server name out of our URL.
-require 'uri'
-
-# Needed to download.. things.
-require 'net/http'
-
-# This class keeps track of all its subclasses
-# We use this to loop through every "website" in an
-# attempt to determine to which site a URL belongs.
-class Website
-
- protected;
-
- @url = nil
-
-
- def self.inherited(subclass)
- if superclass.respond_to? :inherited
- superclass.inherited(subclass)
- end
-
- # Every time we're subclassed, add the new
- # subclass to our list of subclasses.
- @subclasses ||= []
- @subclasses << subclass
- end
-
-
- def server
- # Get the HTTP server portion of our URI
- uri = URI.parse(@url)
- return uri.host
- end
-
-
-
- def get_page_data(url)
- # A naive implementation that just grabs the
- # data from a page.
- uri = URI.parse(url)
-
- response = Net::HTTP.start(uri.host, uri.port) do |http|
- http.get(uri.request_uri, self.headers)
- end
-
- # Set the referer in case it is needed for some later request.
- self.headers['Referer'] = uri.request_uri
-
- return response.body
- end
-
-
-
- public;
-
- # Additional headers used when requesting data from the website.
- # These aren't passed as a parameter because the (final)
- # downloaders need them as well.
- attr_accessor :headers
-
- def initialize(url)
- @url = url
- self.headers = { 'User-Agent' => Configuration::USER_AGENT }
- end
-
-
- def self.create(url)
- # Factory method returning an instance of
- # the appropriate subclass.
-
- # While we're looping through the list of subclasses,
- # we'll set this to the Generic class.
- generic = nil
-
- # Check the URL against each website's class.
- # The class will know whether or not the URL
- # "belongs" to its website.
- @subclasses.each do |w|
- if w.owns_url?(url)
- if w.to_s == 'Generic'
- generic = w
- else
- # We don't want to return Generic here because some
- # other subclasses further down the list might match
- # the URL.
- return w.new(url)
- end
- end
- end
-
- # If nothing matched, try the generic parser.
- return generic.new(url)
- end
-
-
- # Abstract definition. Each subclass of Website
- # should support it on its own.
- def self.owns_url?(url)
- raise NotImplementedError
- end
-
-
- # Same here. Abstract.
- def get_video_url()
- raise NotImplementedError
- end
-
-
- # The website class should be responsible for determining the
- # video's filename. By default, we can take the last component
- # of the video URL, but in some cases, subclasses will want
- # to override this behavior.
- def get_video_filename()
- # Use whatever comes after the final front slash.
- file_and_params = get_video_url().split('/').pop()
-
- # Unless it contains URL parameters. We don't want those.
- return file_and_params unless file_and_params.include?('?')
-
- # There must be some parameters. Strip them off.
- param_start_idx = file_and_params.index('?')
- return file_and_params[0...(param_start_idx)]
- end
-
-end