From 0662d93e5088ecfd2ce351910ab9a1d3568f8359 Mon Sep 17 00:00:00 2001
From: Michael Orlitzky <michael@orlitzky.com>
Date: Thu, 10 Mar 2011 21:53:05 -0500
Subject: [PATCH] Add a generic parser that will hopefully supplant some
 site-specific subclasses.

---
 bin/whatever-dl         |  6 ++--
 src/website.rb          | 18 ++++++++---
 src/websites/generic.rb | 69 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 86 insertions(+), 7 deletions(-)
 create mode 100644 src/websites/generic.rb

diff --git a/bin/whatever-dl b/bin/whatever-dl
index f042b21..787e9db 100755
--- a/bin/whatever-dl
+++ b/bin/whatever-dl
@@ -120,9 +120,9 @@ if (__FILE__ == $0) then
   video_url = site.get_video_url()
 
   if video_url.nil?
-    puts 'Error retrieving video URL.'
-    exit(EXIT_COULDNT_GET_VIDEO_URL)
-  end
+    puts 'Error retrieving video URL:'
+    puts "Site not supported, and the generic parser couldn't find any videos."
+    exit(EXIT_COULDNT_GET_VIDEO_URL) end
 
   # The Downloader class is a factory; it should decide
   # which subclass we get.
diff --git a/src/website.rb b/src/website.rb
index 4a3f2af..e9e65ca 100644
--- a/src/website.rb
+++ b/src/website.rb
@@ -90,18 +90,28 @@ class Website
     # Factory method returning an instance of
     # the appropriate subclass.
 
+    # While we're looping through the list of subclasses,
+    # we'll set this to the Generic class.
+    generic = nil
+
     # Check the URL against each website's class.
     # The class will know whether or not the URL
     # "belongs" to its website.
     @subclasses.each do |w|
       if w.owns_url?(url)
-        return w.new(url)
+        if w.to_s == 'Generic'
+          generic = w
+        else
+          # We don't want to return Generic here because some
+          # other subclasses further down the list might match
+          # the URL.
+          return w.new(url)
+        end
       end
     end
 
-    # If nothing matched, we don't return an instance
-    # of anything.
-    return nil
+    # If nothing matched, try the generic parser.
+    return generic.new(url)
   end
 
 
diff --git a/src/websites/generic.rb b/src/websites/generic.rb
new file mode 100644
index 0000000..88fa515
--- /dev/null
+++ b/src/websites/generic.rb
@@ -0,0 +1,69 @@
+#
+# Copyright Michael Orlitzky
+#
+# http://michael.orlitzky.com/
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# http://www.fsf.org/licensing/licenses/gpl.html
+#
+
+require 'src/website'
+
+class Generic < Website
+
+  VALID_GENERIC_URL_REGEX = /^(http:\/\/)?(www\.)?(.+)$/
+
+  def self.owns_url?(url)
+    return url =~ VALID_GENERIC_URL_REGEX
+  end
+
+
+  def get_video_url()
+    page_data = self.get_page_data(@url)
+    video_url = self.parse_video_url(page_data)
+
+    return video_url
+  end
+
+
+  protected;
+
+  def base_url
+    # Return the website portion of the URL, e.g.
+    # http://www.example.com/
+    base_regex = /(http:\/\/.+?\/)/
+    matches = base_regex.match(@url)
+
+    # It's assumed that this will work, since @url is valid.
+    return matches[1]
+  end
+
+  def parse_video_url(page_data)
+    full_video_url_regex = /(http:\/\/.+?\.(flv|mp4))/i
+    matches = full_video_url_regex.match(page_data)
+
+    if not (matches.nil? || matches.length < 2)
+      return matches[1]
+    end
+
+    partial_video_url_regex = /([^\=\"\']+\.(flv|mp4))/i
+    matches = partial_video_url_regex.match(page_data)
+
+    if not (matches.nil? || matches.length < 2)
+      return base_url + matches[1]
+    end
+
+    return nil
+  end
+
+
+end
-- 
2.53.0