Added the ability to download videos from http://www.yikers.com/.

[dead/whatever-dl.git] / src / websites / yikers.rb
diff --git a/src/websites/yikers.rb b/src/websites/yikers.rb

new file mode 100644 (file)

index 0000000..b602f3a
--- /dev/null
+++ b/src/websites/yikers.rb
@@ -0,0 +1,101 @@
+#
+# Copyright Michael Orlitzky
+#
+# http://michael.orlitzky.com/
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# http://www.fsf.org/licensing/licenses/gpl.html
+#
+
+require 'src/website'
+
+# Needed to download the page, which is in turn
+# needed because it contains the video URL.
+require 'net/http'
+
+
+class Yikers < Website
+
+  VALID_YIKERS_URL_REGEX = /^(http:\/\/)?(www\.)?yikers\.com\/([[:alnum:]_]+)\.html$/
+  
+  def self.owns_url?(url)
+    return url =~ VALID_YIKERS_URL_REGEX
+  end
+
+  
+  def get_video_url()
+    # First we download the page. Each page contains a flash parameter
+    # called 'xml' which contains the relative path to the video's XML file.
+    # We download their XML file (passing it the video id as a side effect),
+    # and that contains the URL of the video file we want.
+    page_data = get_page_data(@url)
+    xml_path = self.parse_xml_path(page_data)
+
+    # The XML path parameter doesn't contain a host name.
+    xml_url = "http://#{self.server}#{xml_path}"
+    xml_data = get_page_data(xml_url)
+    
+    video_url = parse_video_url(xml_data)
+
+    return video_url
+  end
+
+
+  def get_video_filename()
+    # Default to whatever comes after the final frontslash
+    # in the main URL.
+    filename = @url.split('/').pop()
+    
+    # These page URLs are actually descriptive, so we can use
+    # the file name of the HTML page as our video file name.
+    filename_regex = /\/([[:alnum:]_]+)\.html$/
+    matches = filename_regex.match(@url)
+
+    # Overwrite the default if our regex worked.
+    filename = matches[1] if not (matches.nil? || matches.length < 1)
+    
+    return (filename + '.flv')
+  end
+  
+  protected;
+
+  def parse_video_url(data)
+    video_url_regex = /http:\/\/(cdn\.)?yikers\.com\/([[:alnum:]_\/]+)\.flv/
+    matches = video_url_regex.match(data)
+    video_url = matches[0] if not matches.nil?
+    
+    return video_url
+  end
+
+  
+  def parse_xml_path(data)
+    xml_path_regex = /addVariable\(\'xml\', \'(.*?)\'\)/
+    matches = xml_path_regex.match(data)
+    xml_path = matches[1] if not (matches.nil? || matches.length < 2)
+    
+    return xml_path
+  end
+  
+  
+  def get_page_data(url)
+    uri = URI.parse(url)
+
+    response = Net::HTTP.start(uri.host, uri.port) do |http|
+      http_path = uri.path
+      http_path += ('?' + uri.query) if not uri.query.nil?
+      http.get(http_path)
+    end
+    
+    return response.body
+  end
+  
+end