Allow passing headers (default: {}) to the Website get_page_data method.

[dead/whatever-dl.git] / src / website.rb
diff --git a/src/website.rb b/src/website.rb

index 96290dee3de99ac7d00f34e14298110be82455a0..2f03e6cb4c383fa63093553a60317925bee51183 100644 (file)
--- a/src/website.rb
+++ b/src/website.rb
@@ -16,6 +16,16 @@
  # http://www.fsf.org/licensing/licenses/gpl.html
  #
  
+# Needed for the default implementation of get_page_data.
+require 'net/http'
+
+# Necessary in a lot of subclasses; plus, we need it
+# to parse the server name out of our URL.
+require 'uri'
+
+# Needed to download.. things.
+require 'net/http'
+
  # This class keeps track of all its subclasses
  # We use this to loop through every "website" in an
  # attempt to determine to which site a URL belongs.
@@ -37,6 +47,28 @@ class Website
      @subclasses << subclass
    end
  
+
+  def server
+    # Get the HTTP server portion of our URI
+    uri = URI.parse(@url)
+    return uri.host
+  end
+
+
+  
+  def get_page_data(url, headers = {})
+    # A naive implementation that just grabs the
+    # data from a page.
+    uri = URI.parse(url)
+
+    response = Net::HTTP.start(uri.host, uri.port) do |http|
+      http.get(uri.request_uri, headers)
+    end
+
+    return response.body
+  end
+
+  
    
    public;
  
@@ -82,8 +114,15 @@ class Website
    # of the video URL, but in some cases, subclasses will want
    # to override this behavior.
    def get_video_filename()
-    # Use whatever comes after the final front slash.    
-    return get_video_url().split('/').pop()
+    # Use whatever comes after the final front slash.
+    file_and_params = get_video_url().split('/').pop()
+
+    # Unless it contains URL parameters. We don't want those.
+    return file_and_params unless file_and_params.include?('?')
+    
+    # There must be some parameters. Strip them off.
+    param_start_idx = file_and_params.index('?')
+    return file_and_params[0...(param_start_idx)]
    end
    
  end