]> gitweb.michael.orlitzky.com - dead/whatever-dl.git/blob - src/website.rb
Make headers a property of the website class rather than passing them to get_page_data.
[dead/whatever-dl.git] / src / website.rb
1 #
2 # Copyright Michael Orlitzky
3 #
4 # http://michael.orlitzky.com/
5 #
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
15 #
16 # http://www.fsf.org/licensing/licenses/gpl.html
17 #
18
19 # Needed for the default implementation of get_page_data.
20 require 'net/http'
21
22 # Necessary in a lot of subclasses; plus, we need it
23 # to parse the server name out of our URL.
24 require 'uri'
25
26 # Needed to download.. things.
27 require 'net/http'
28
29 # This class keeps track of all its subclasses
30 # We use this to loop through every "website" in an
31 # attempt to determine to which site a URL belongs.
32 class Website
33
34 protected;
35
36 @url = nil
37
38
39 def self.inherited(subclass)
40 if superclass.respond_to? :inherited
41 superclass.inherited(subclass)
42 end
43
44 # Every time we're subclassed, add the new
45 # subclass to our list of subclasses.
46 @subclasses ||= []
47 @subclasses << subclass
48 end
49
50
51 def server
52 # Get the HTTP server portion of our URI
53 uri = URI.parse(@url)
54 return uri.host
55 end
56
57
58
59 def get_page_data(url)
60 # A naive implementation that just grabs the
61 # data from a page.
62 uri = URI.parse(url)
63
64 response = Net::HTTP.start(uri.host, uri.port) do |http|
65 http.get(uri.request_uri, self.headers)
66 end
67
68 return response.body
69 end
70
71
72
73 public;
74
75 # Additional headers used when requesting data from the website.
76 # These aren't passed as a parameter because the (final)
77 # downloaders need them as well.
78 attr_accessor :headers
79
80 def initialize(url)
81 @url = url
82 self.headers = { 'User-Agent' => Configuration::USER_AGENT }
83 end
84
85
86 def self.create(url)
87 # Factory method returning an instance of
88 # the appropriate subclass.
89
90 # Check the URL against each website's class.
91 # The class will know whether or not the URL
92 # "belongs" to its website.
93 @subclasses.each do |w|
94 if w.owns_url?(url)
95 return w.new(url)
96 end
97 end
98
99 # If nothing matched, we don't return an instance
100 # of anything.
101 return nil
102 end
103
104
105 # Abstract definition. Each subclass of Website
106 # should support it on its own.
107 def self.owns_url?(url)
108 raise NotImplementedError
109 end
110
111
112 # Same here. Abstract.
113 def get_video_url()
114 raise NotImplementedError
115 end
116
117
118 # The website class should be responsible for determining the
119 # video's filename. By default, we can take the last component
120 # of the video URL, but in some cases, subclasses will want
121 # to override this behavior.
122 def get_video_filename()
123 # Use whatever comes after the final front slash.
124 file_and_params = get_video_url().split('/').pop()
125
126 # Unless it contains URL parameters. We don't want those.
127 return file_and_params unless file_and_params.include?('?')
128
129 # There must be some parameters. Strip them off.
130 param_start_idx = file_and_params.index('?')
131 return file_and_params[0...(param_start_idx)]
132 end
133
134 end