+
+
+ def get_page_data(url)
+ # A naive implementation that just grabs the
+ # data from a page.
+ uri = URI.parse(url)
+
+ response = Net::HTTP.start(uri.host, uri.port) do |http|
+ http.get(uri.request_uri, self.headers)
+ end
+
+ # Set the referer in case it is needed for some later request.
+ self.headers['Referer'] = uri.request_uri
+
+ return response.body
+ end
+
+
+
+ public;
+
+ # Additional headers used when requesting data from the website.
+ # These aren't passed as a parameter because the (final)
+ # downloaders need them as well.
+ attr_accessor :headers
+
+ def initialize(url)
+ @url = url
+ self.headers = { 'User-Agent' => Configuration::USER_AGENT }
+ end
+
+
+ def self.create(url)
+ # Factory method returning an instance of
+ # the appropriate subclass.
+
+ # While we're looping through the list of subclasses,
+ # we'll set this to the Generic class.
+ generic = nil
+
+ # Check the URL against each website's class.
+ # The class will know whether or not the URL
+ # "belongs" to its website.
+ @subclasses.each do |w|
+ if w.owns_url?(url)
+ if w.to_s == 'Generic'
+ generic = w
+ else
+ # We don't want to return Generic here because some
+ # other subclasses further down the list might match
+ # the URL.
+ return w.new(url)
+ end
+ end
+ end
+
+ # If nothing matched, try the generic parser.
+ return generic.new(url)
+ end
+
+
+ # Abstract definition. Each subclass of Website
+ # should support it on its own.