]> gitweb.michael.orlitzky.com - dead/whatever-dl.git/blob - src/website.rb
Added tests for the new Veoh URL formats.
[dead/whatever-dl.git] / src / website.rb
1 #
2 # Copyright Michael Orlitzky
3 #
4 # http://michael.orlitzky.com/
5 #
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
15 #
16 # http://www.fsf.org/licensing/licenses/gpl.html
17 #
18
19 # Needed for the default implementation of get_page_data.
20 require 'net/http'
21
22 # Necessary in a lot of subclasses; plus, we need it
23 # to parse the server name out of our URL.
24 require 'uri'
25
26 # Needed to download.. things.
27 require 'net/http'
28
29 # This class keeps track of all its subclasses
30 # We use this to loop through every "website" in an
31 # attempt to determine to which site a URL belongs.
32 class Website
33
34 protected;
35
36 @url = nil
37
38
39 def self.inherited(subclass)
40 if superclass.respond_to? :inherited
41 superclass.inherited(subclass)
42 end
43
44 # Every time we're subclassed, add the new
45 # subclass to our list of subclasses.
46 @subclasses ||= []
47 @subclasses << subclass
48 end
49
50
51 def server
52 # Get the HTTP server portion of our URI
53 uri = URI.parse(@url)
54 return uri.host
55 end
56
57
58
59 def get_page_data(url)
60 # A naive implementation that just grabs the
61 # data from a page.
62 uri = URI.parse(url)
63
64 response = Net::HTTP.start(uri.host, uri.port) do |http|
65 http.get(uri.request_uri)
66 end
67
68 return response.body
69 end
70
71
72
73 public;
74
75 def initialize(url)
76 @url = url
77 end
78
79
80 def self.create(url)
81 # Factory method returning an instance of
82 # the appropriate subclass.
83
84 # Check the URL against each website's class.
85 # The class will know whether or not the URL
86 # "belongs" to its website.
87 @subclasses.each do |w|
88 if w.owns_url?(url)
89 return w.new(url)
90 end
91 end
92
93 # If nothing matched, we don't return an instance
94 # of anything.
95 return nil
96 end
97
98
99 # Abstract definition. Each subclass of Website
100 # should support it on its own.
101 def self.owns_url?(url)
102 raise NotImplementedError
103 end
104
105
106 # Same here. Abstract.
107 def get_video_url()
108 raise NotImplementedError
109 end
110
111
112 # The website class should be responsible for determining the
113 # video's filename. By default, we can take the last component
114 # of the video URL, but in some cases, subclasses will want
115 # to override this behavior.
116 def get_video_filename()
117 # Use whatever comes after the final front slash.
118 return get_video_url().split('/').pop()
119 end
120
121 end