]> gitweb.michael.orlitzky.com - dead/whatever-dl.git/commitdiff
Add a generic parser that will hopefully supplant some site-specific subclasses.
authorMichael Orlitzky <michael@orlitzky.com>
Fri, 11 Mar 2011 02:53:05 +0000 (21:53 -0500)
committerMichael Orlitzky <michael@orlitzky.com>
Fri, 11 Mar 2011 02:53:05 +0000 (21:53 -0500)
bin/whatever-dl
src/website.rb
src/websites/generic.rb [new file with mode: 0644]

index f042b21b80e55f1d76dd0b74a40c91d8733ed645..787e9dbb93c748ed5f949bce8e50052a15968081 100755 (executable)
@@ -120,9 +120,9 @@ if (__FILE__ == $0) then
   video_url = site.get_video_url()
 
   if video_url.nil?
-    puts 'Error retrieving video URL.'
-    exit(EXIT_COULDNT_GET_VIDEO_URL)
-  end
+    puts 'Error retrieving video URL:'
+    puts "Site not supported, and the generic parser couldn't find any videos."
+    exit(EXIT_COULDNT_GET_VIDEO_URL) end
 
   # The Downloader class is a factory; it should decide
   # which subclass we get.
index 4a3f2afe36a0c009a095202e9cb2eb7e405a2395..e9e65ca1909f7add3c8b2c6ecc800a3856145528 100644 (file)
@@ -90,18 +90,28 @@ class Website
     # Factory method returning an instance of
     # the appropriate subclass.
 
+    # While we're looping through the list of subclasses,
+    # we'll set this to the Generic class.
+    generic = nil
+
     # Check the URL against each website's class.
     # The class will know whether or not the URL
     # "belongs" to its website.
     @subclasses.each do |w|
       if w.owns_url?(url)
-        return w.new(url)
+        if w.to_s == 'Generic'
+          generic = w
+        else
+          # We don't want to return Generic here because some
+          # other subclasses further down the list might match
+          # the URL.
+          return w.new(url)
+        end
       end
     end
 
-    # If nothing matched, we don't return an instance
-    # of anything.
-    return nil
+    # If nothing matched, try the generic parser.
+    return generic.new(url)
   end
 
 
diff --git a/src/websites/generic.rb b/src/websites/generic.rb
new file mode 100644 (file)
index 0000000..88fa515
--- /dev/null
@@ -0,0 +1,69 @@
+#
+# Copyright Michael Orlitzky
+#
+# http://michael.orlitzky.com/
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# http://www.fsf.org/licensing/licenses/gpl.html
+#
+
+require 'src/website'
+
+class Generic < Website
+
+  VALID_GENERIC_URL_REGEX = /^(http:\/\/)?(www\.)?(.+)$/
+
+  def self.owns_url?(url)
+    return url =~ VALID_GENERIC_URL_REGEX
+  end
+
+
+  def get_video_url()
+    page_data = self.get_page_data(@url)
+    video_url = self.parse_video_url(page_data)
+
+    return video_url
+  end
+
+
+  protected;
+
+  def base_url
+    # Return the website portion of the URL, e.g.
+    # http://www.example.com/
+    base_regex = /(http:\/\/.+?\/)/
+    matches = base_regex.match(@url)
+
+    # It's assumed that this will work, since @url is valid.
+    return matches[1]
+  end
+
+  def parse_video_url(page_data)
+    full_video_url_regex = /(http:\/\/.+?\.(flv|mp4))/i
+    matches = full_video_url_regex.match(page_data)
+
+    if not (matches.nil? || matches.length < 2)
+      return matches[1]
+    end
+
+    partial_video_url_regex = /([^\=\"\']+\.(flv|mp4))/i
+    matches = partial_video_url_regex.match(page_data)
+
+    if not (matches.nil? || matches.length < 2)
+      return base_url + matches[1]
+    end
+
+    return nil
+  end
+
+
+end