X-Git-Url: http://gitweb.michael.orlitzky.com/?a=blobdiff_plain;f=src%2Fwebsite.rb;h=bfa1b3d618a04163856fee297dd65ce4b0e380b8;hb=4c1c22529da73bf80cb95d5602236b9ba6c3f0d5;hp=96290dee3de99ac7d00f34e14298110be82455a0;hpb=83e06f83d8274cb32a406739839d56e759664b09;p=dead%2Fwhatever-dl.git diff --git a/src/website.rb b/src/website.rb index 96290de..bfa1b3d 100644 --- a/src/website.rb +++ b/src/website.rb @@ -16,6 +16,16 @@ # http://www.fsf.org/licensing/licenses/gpl.html # +# Needed for the default implementation of get_page_data. +require 'net/http' + +# Necessary in a lot of subclasses; plus, we need it +# to parse the server name out of our URL. +require 'uri' + +# Needed to download.. things. +require 'net/http' + # This class keeps track of all its subclasses # We use this to loop through every "website" in an # attempt to determine to which site a URL belongs. @@ -37,6 +47,28 @@ class Website @subclasses << subclass end + + def server + # Get the HTTP server portion of our URI + uri = URI.parse(@url) + return uri.host + end + + + + def get_page_data(url) + # A naive implementation that just grabs the + # data from a page. + uri = URI.parse(url) + + response = Net::HTTP.start(uri.host, uri.port) do |http| + http.get(uri.request_uri) + end + + return response.body + end + + public; @@ -82,8 +114,15 @@ class Website # of the video URL, but in some cases, subclasses will want # to override this behavior. def get_video_filename() - # Use whatever comes after the final front slash. - return get_video_url().split('/').pop() + # Use whatever comes after the final front slash. + file_and_params = get_video_url().split('/').pop() + + # Unless it contains URL parameters. We don't want those. + return file_and_params unless file_and_params.include?('?') + + # There must be some parameters. Strip them off. + param_start_idx = file_and_params.index('?') + return file_and_params[0...(param_start_idx)] end end