]> gitweb.michael.orlitzky.com - dead/whatever-dl.git/blob - src/website.rb
4a3f2afe36a0c009a095202e9cb2eb7e405a2395
[dead/whatever-dl.git] / src / website.rb
1 #
2 # Copyright Michael Orlitzky
3 #
4 # http://michael.orlitzky.com/
5 #
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
15 #
16 # http://www.fsf.org/licensing/licenses/gpl.html
17 #
18
19 # Needed for the default implementation of get_page_data.
20 require 'net/http'
21
22 # Necessary in a lot of subclasses; plus, we need it
23 # to parse the server name out of our URL.
24 require 'uri'
25
26 # Needed to download.. things.
27 require 'net/http'
28
29 # This class keeps track of all its subclasses
30 # We use this to loop through every "website" in an
31 # attempt to determine to which site a URL belongs.
32 class Website
33
34 protected;
35
36 @url = nil
37
38
39 def self.inherited(subclass)
40 if superclass.respond_to? :inherited
41 superclass.inherited(subclass)
42 end
43
44 # Every time we're subclassed, add the new
45 # subclass to our list of subclasses.
46 @subclasses ||= []
47 @subclasses << subclass
48 end
49
50
51 def server
52 # Get the HTTP server portion of our URI
53 uri = URI.parse(@url)
54 return uri.host
55 end
56
57
58
59 def get_page_data(url)
60 # A naive implementation that just grabs the
61 # data from a page.
62 uri = URI.parse(url)
63
64 response = Net::HTTP.start(uri.host, uri.port) do |http|
65 http.get(uri.request_uri, self.headers)
66 end
67
68 # Set the referer in case it is needed for some later request.
69 self.headers['Referer'] = uri.request_uri
70
71 return response.body
72 end
73
74
75
76 public;
77
78 # Additional headers used when requesting data from the website.
79 # These aren't passed as a parameter because the (final)
80 # downloaders need them as well.
81 attr_accessor :headers
82
83 def initialize(url)
84 @url = url
85 self.headers = { 'User-Agent' => Configuration::USER_AGENT }
86 end
87
88
89 def self.create(url)
90 # Factory method returning an instance of
91 # the appropriate subclass.
92
93 # Check the URL against each website's class.
94 # The class will know whether or not the URL
95 # "belongs" to its website.
96 @subclasses.each do |w|
97 if w.owns_url?(url)
98 return w.new(url)
99 end
100 end
101
102 # If nothing matched, we don't return an instance
103 # of anything.
104 return nil
105 end
106
107
108 # Abstract definition. Each subclass of Website
109 # should support it on its own.
110 def self.owns_url?(url)
111 raise NotImplementedError
112 end
113
114
115 # Same here. Abstract.
116 def get_video_url()
117 raise NotImplementedError
118 end
119
120
121 # The website class should be responsible for determining the
122 # video's filename. By default, we can take the last component
123 # of the video URL, but in some cases, subclasses will want
124 # to override this behavior.
125 def get_video_filename()
126 # Use whatever comes after the final front slash.
127 file_and_params = get_video_url().split('/').pop()
128
129 # Unless it contains URL parameters. We don't want those.
130 return file_and_params unless file_and_params.include?('?')
131
132 # There must be some parameters. Strip them off.
133 param_start_idx = file_and_params.index('?')
134 return file_and_params[0...(param_start_idx)]
135 end
136
137 end