Add a generic parser that will hopefully supplant some site-specific subclasses.

[dead/whatever-dl.git] / bin / whatever-dl
diff --git a/bin/whatever-dl b/bin/whatever-dl

index c20b8bd7922e761c0bf553109381425f30830bee..787e9dbb93c748ed5f949bce8e50052a15968081 100755 (executable)
--- a/bin/whatever-dl
+++ b/bin/whatever-dl
@@ -1,4 +1,4 @@
-#!/usr/bin/ruby -w
+#!/usr/bin/ruby -wKU
  #
  # whatever-dl, a script to download online (web-based) videos.
  #
@@ -19,48 +19,139 @@
  # http://www.fsf.org/licensing/licenses/gpl.html
  #
  
+# We need Pathname to get the real filesystem path
+# of this script (and not, for example, the path of
+# a symlink which points to it.
+require 'pathname'
+
+# And getoptlong to check for our one option, --continue.
+require 'getoptlong'
+
+# This bit of magic adds the parent directory (the
+# project root) to the list of ruby load paths.
+# Thus, our require statements will work regardless of
+# how or from where the script was run.
+executable = Pathname.new(__FILE__).realpath.to_s
+$: << File.dirname(executable) + '/../'
+
+# Load our config file.
+require 'bin/configuration'
+
+# And the downloaders...
+require 'src/downloader'
+
+# The Dir.glob that's coming up doesn't use the
+# Ruby library path so we need to tell it where to
+# look explicitly.
+websites_pattern = File.dirname(executable) + '/../src/websites/*.rb'
+
  # All of the website classes are located in one
  # directory, so we can 'require' them automatically.
-Dir.glob('src/websites/*.rb').each do |r|
+Dir.glob(websites_pattern).each do |r|
    require r
  end
  
  
+EXIT_SUCCESS = 0
+EXIT_NO_URL = 1
+EXIT_INVALID_URL = 2
+EXIT_COULDNT_GET_VIDEO_URL = 3
+EXIT_IO_ERROR = 4
+EXIT_ERROR_READING_FROM_VIDEO_URL = 5
+EXIT_CONNECTION_REFUSED = 6
+EXIT_HTTP_ERROR = 7
+EXIT_ACCESS_DENIED = 8
+
+def usage()
+  puts <<EOF
+
+Usage: whatever-dl [options] <url>
+
+Options:
+  -c, --continue        Continue downloading a previously-attempted file.
+
+EOF
+
+end
+
  # Only actually do something if this script was called
  # directly (i.e. not from the tests).
  if (__FILE__ == $0) then
+  # Default options.
+  options = { :continue => false }
+
+  # Parse the command-line options into the options hash.
+  opts = GetoptLong.new(["--continue", "-c", GetoptLong::NO_ARGUMENT],
+                        ["--help", "-h", GetoptLong::NO_ARGUMENT])
+
+  opts.each do |opt, arg|
+    case opt
+    when '--help'
+      usage()
+      Kernel.exit(EXIT_SUCCESS)
+    when '--continue'
+      options[:continue] = true
+    end
+  end
+
+  # Warn about nonsensical options.
+  if options[:continue] and not (Configuration::DOWNLOAD_METHOD == :wget)
+    puts 'WARNING: The --continue flag does nothing unless DOWNLOAD_METHOD is :wget.'
+  end
+
+  # Note that GetoptLong steals its arguments from ARGV, so we don't need
+  # to take optional arguments into account when figuring out whether or not
+  # we were passed a URL.
    if (ARGV.length < 1) then
      # If the user didn't give us a URL, yell
      # at him or her.
-    puts 'Usage: whatever-dl <url>'
-    Kernel.exit(1)
+    usage()
+    Kernel.exit(EXIT_NO_URL)
    end
  
-  # Check the URL against each website's class.
-  # The class will know whether or not the URL
-  # "belongs" to its website.
-
-  site = nil
-  
-  Website.subclasses.each do |w|
-    if w.owns_url?(ARGV[0])
-      site = w.new()
-      break
-    end
-  end
+  # Factory method.
+  site = Website.create(ARGV[0])
  
    if site.nil?
      puts 'Invalid URL.'
-    exit(1)
+    exit(EXIT_INVALID_URL)
    end
    
-  video_url = site.get_video_url(ARGV[0])
+  video_url = site.get_video_url()
  
    if video_url.nil?
-    puts 'Error retrieving video URL.'
-    exit(2)
+    puts 'Error retrieving video URL:'
+    puts "Site not supported, and the generic parser couldn't find any videos."
+    exit(EXIT_COULDNT_GET_VIDEO_URL) end
+
+  # The Downloader class is a factory; it should decide
+  # which subclass we get.
+  downloader = Downloader.create(Configuration::DOWNLOAD_METHOD)
+  
+  # Attempt to download the file, and rescue and report
+  # any (predictable) exceptions. The wget downloader will
+  # naturally not report any of these, since it will die in
+  # its own process.
+  begin
+    downloader.download(video_url,
+                        site.get_video_filename(),
+                        site.headers(),
+                        continue=options[:continue])
+  rescue Errno::ECONNREFUSED => e
+    puts 'The connection to the server (to download the video file) was refused. Check your connection, and try again later.'
+    Kernel.exit(EXIT_CONNECTION_REFUSED)
+  rescue Errno::EACCES => e
+    puts "Access denied. Check that you have write permission to the output file/directory. Details: #{e.message}."
+  rescue OpenURI::HTTPError => e
+    puts "An HTTP error occurred while downloading the video file: #{e.message}."
+    Kernel.exit(EXIT_HTTP_ERROR)
+  rescue IOError => e
+    puts "Input/Output Error: #{e.message}"
+    Kernel.exit(EXIT_IO_ERROR)
    end
  
-  # *classy*
-  Kernel.exec("wget \"#{video_url}\"")
+  # Write an empty line at the end for aesthetic reasons.
+  puts ''
+  
+  Kernel.exit(EXIT_SUCCESS)
  end