]>
gitweb.michael.orlitzky.com - dead/whatever-dl.git/blob - website.rb
e9e65ca1909f7add3c8b2c6ecc800a3856145528
2 # Copyright Michael Orlitzky
4 # http://michael.orlitzky.com/
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # http://www.fsf.org/licensing/licenses/gpl.html
19 # Needed for the default implementation of get_page_data.
22 # Necessary in a lot of subclasses; plus, we need it
23 # to parse the server name out of our URL.
26 # Needed to download.. things.
29 # This class keeps track of all its subclasses
30 # We use this to loop through every "website" in an
31 # attempt to determine to which site a URL belongs.
39 def self.inherited(subclass
)
40 if superclass
.respond_to
? :inherited
41 superclass
.inherited(subclass
)
44 # Every time we're subclassed, add the new
45 # subclass to our list of subclasses.
47 @subclasses << subclass
52 # Get the HTTP server portion of our URI
59 def get_page_data(url
)
60 # A naive implementation that just grabs the
64 response
= Net
::HTTP.start(uri
.host
, uri
.port
) do |http
|
65 http
.get(uri
.request_uri
, self.headers
)
68 # Set the referer in case it is needed for some later request.
69 self.headers
['Referer'] = uri
.request_uri
78 # Additional headers used when requesting data from the website.
79 # These aren't passed as a parameter because the (final)
80 # downloaders need them as well.
81 attr_accessor
:headers
85 self.headers
= { 'User-Agent' => Configuration
::USER_AGENT }
90 # Factory method returning an instance of
91 # the appropriate subclass.
93 # While we're looping through the list of subclasses,
94 # we'll set this to the Generic class.
97 # Check the URL against each website's class.
98 # The class will know whether or not the URL
99 # "belongs" to its website.
100 @subclasses.each
do |w
|
102 if w
.to_s
== 'Generic'
105 # We don't want to return Generic here because some
106 # other subclasses further down the list might match
113 # If nothing matched, try the generic parser.
114 return generic
.new(url
)
118 # Abstract definition. Each subclass of Website
119 # should support it on its own.
120 def self.owns_url
?(url
)
121 raise NotImplementedError
125 # Same here. Abstract.
127 raise NotImplementedError
131 # The website class should be responsible for determining the
132 # video's filename. By default, we can take the last component
133 # of the video URL, but in some cases, subclasses will want
134 # to override this behavior.
135 def get_video_filename()
136 # Use whatever comes after the final front slash.
137 file_and_params
= get_video_url().split('/').pop()
139 # Unless it contains URL parameters. We don't want those.
140 return file_and_params
unless file_and_params
.include?('?')
142 # There must be some parameters. Strip them off.
143 param_start_idx
= file_and_params
.index('?')
144 return file_and_params
[0...(param_start_idx
)]