From 0662d93e5088ecfd2ce351910ab9a1d3568f8359 Mon Sep 17 00:00:00 2001 From: Michael Orlitzky Date: Thu, 10 Mar 2011 21:53:05 -0500 Subject: [PATCH] Add a generic parser that will hopefully supplant some site-specific subclasses. --- bin/whatever-dl | 6 ++-- src/website.rb | 18 ++++++++--- src/websites/generic.rb | 69 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 86 insertions(+), 7 deletions(-) create mode 100644 src/websites/generic.rb diff --git a/bin/whatever-dl b/bin/whatever-dl index f042b21..787e9db 100755 --- a/bin/whatever-dl +++ b/bin/whatever-dl @@ -120,9 +120,9 @@ if (__FILE__ == $0) then video_url = site.get_video_url() if video_url.nil? - puts 'Error retrieving video URL.' - exit(EXIT_COULDNT_GET_VIDEO_URL) - end + puts 'Error retrieving video URL:' + puts "Site not supported, and the generic parser couldn't find any videos." + exit(EXIT_COULDNT_GET_VIDEO_URL) end # The Downloader class is a factory; it should decide # which subclass we get. diff --git a/src/website.rb b/src/website.rb index 4a3f2af..e9e65ca 100644 --- a/src/website.rb +++ b/src/website.rb @@ -90,18 +90,28 @@ class Website # Factory method returning an instance of # the appropriate subclass. + # While we're looping through the list of subclasses, + # we'll set this to the Generic class. + generic = nil + # Check the URL against each website's class. # The class will know whether or not the URL # "belongs" to its website. @subclasses.each do |w| if w.owns_url?(url) - return w.new(url) + if w.to_s == 'Generic' + generic = w + else + # We don't want to return Generic here because some + # other subclasses further down the list might match + # the URL. + return w.new(url) + end end end - # If nothing matched, we don't return an instance - # of anything. - return nil + # If nothing matched, try the generic parser. + return generic.new(url) end diff --git a/src/websites/generic.rb b/src/websites/generic.rb new file mode 100644 index 0000000..88fa515 --- /dev/null +++ b/src/websites/generic.rb @@ -0,0 +1,69 @@ +# +# Copyright Michael Orlitzky +# +# http://michael.orlitzky.com/ +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# http://www.fsf.org/licensing/licenses/gpl.html +# + +require 'src/website' + +class Generic < Website + + VALID_GENERIC_URL_REGEX = /^(http:\/\/)?(www\.)?(.+)$/ + + def self.owns_url?(url) + return url =~ VALID_GENERIC_URL_REGEX + end + + + def get_video_url() + page_data = self.get_page_data(@url) + video_url = self.parse_video_url(page_data) + + return video_url + end + + + protected; + + def base_url + # Return the website portion of the URL, e.g. + # http://www.example.com/ + base_regex = /(http:\/\/.+?\/)/ + matches = base_regex.match(@url) + + # It's assumed that this will work, since @url is valid. + return matches[1] + end + + def parse_video_url(page_data) + full_video_url_regex = /(http:\/\/.+?\.(flv|mp4))/i + matches = full_video_url_regex.match(page_data) + + if not (matches.nil? || matches.length < 2) + return matches[1] + end + + partial_video_url_regex = /([^\=\"\']+\.(flv|mp4))/i + matches = partial_video_url_regex.match(page_data) + + if not (matches.nil? || matches.length < 2) + return base_url + matches[1] + end + + return nil + end + + +end -- 2.43.2