]> gitweb.michael.orlitzky.com - dead/whatever-dl.git/blob - lib/whatever-dl/website.rb
Use strings instead of symbols for the downloader type.
[dead/whatever-dl.git] / lib / whatever-dl / website.rb
1 #
2 # Copyright Michael Orlitzky
3 #
4 # http://michael.orlitzky.com/
5 #
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
15 #
16 # http://www.fsf.org/licensing/licenses/gpl.html
17 #
18
19 # Needed for the default implementation of get_page_data.
20 require 'net/http'
21
22 # Necessary in a lot of subclasses; plus, we need it
23 # to parse the server name out of our URL.
24 require 'uri'
25
26 # Needed to download.. things.
27 require 'net/http'
28
29 # This class keeps track of all its subclasses
30 # We use this to loop through every "website" in an
31 # attempt to determine to which site a URL belongs.
32 class Website
33
34 protected;
35
36 @url = nil
37
38
39 def self.inherited(subclass)
40 if superclass.respond_to? :inherited
41 superclass.inherited(subclass)
42 end
43
44 # Every time we're subclassed, add the new
45 # subclass to our list of subclasses.
46 @subclasses ||= []
47 @subclasses << subclass
48 end
49
50
51 def server
52 # Get the HTTP server portion of our URI
53 uri = URI.parse(@url)
54 return uri.host
55 end
56
57
58
59 def get_page_data(url)
60 # A naive implementation that just grabs the
61 # data from a page.
62 uri = URI.parse(url)
63
64 response = Net::HTTP.start(uri.host, uri.port) do |http|
65 http.get(uri.request_uri, self.headers)
66 end
67
68 # Set the referer in case it is needed for some later request.
69 self.headers['Referer'] = uri.request_uri
70
71 return response.body
72 end
73
74
75
76 public;
77
78 # Additional headers used when requesting data from the website.
79 # These aren't passed as a parameter because the (final)
80 # downloaders need them as well.
81 attr_accessor :headers
82
83 def initialize(url)
84 @url = url
85 cfg = Configuration.new()
86 self.headers = { 'User-Agent' => cfg.user_agent }
87 end
88
89
90 def self.create(url)
91 # Factory method returning an instance of
92 # the appropriate subclass.
93
94 # While we're looping through the list of subclasses,
95 # we'll set this to the Generic class.
96 generic = nil
97
98 # Check the URL against each website's class.
99 # The class will know whether or not the URL
100 # "belongs" to its website.
101 @subclasses.each do |w|
102 if w.owns_url?(url)
103 if w.to_s == 'Generic'
104 generic = w
105 else
106 # We don't want to return Generic here because some
107 # other subclasses further down the list might match
108 # the URL.
109 return w.new(url)
110 end
111 end
112 end
113
114 # If nothing matched, try the generic parser.
115 return generic.new(url)
116 end
117
118
119 # Abstract definition. Each subclass of Website
120 # should support it on its own.
121 def self.owns_url?(url)
122 raise NotImplementedError
123 end
124
125
126 # Same here. Abstract.
127 def get_video_url()
128 raise NotImplementedError
129 end
130
131
132 # The website class should be responsible for determining the
133 # video's filename. By default, we can take the last component
134 # of the video URL, but in some cases, subclasses will want
135 # to override this behavior.
136 def get_video_filename()
137 # Use whatever comes after the final front slash.
138 file_and_params = get_video_url().split('/').pop()
139
140 # Unless it contains URL parameters. We don't want those.
141 return file_and_params unless file_and_params.include?('?')
142
143 # There must be some parameters. Strip them off.
144 param_start_idx = file_and_params.index('?')
145 return file_and_params[0...(param_start_idx)]
146 end
147
148 end