From 168ef3b2ccf5b97d561a3c542a18e8e7587de291 Mon Sep 17 00:00:00 2001 From: Michael Orlitzky Date: Thu, 12 Jun 2008 23:17:16 -0400 Subject: [PATCH 1/1] Initial commit. --- bin/whatever-dl | 66 + makefile | 7 + src/string.rb | 31 + src/website.rb | 46 + src/websites/howcast.rb | 43 + src/websites/redtube.rb | 130 ++ src/websites/youporn.rb | 76 + test/fixtures/youporn/page_data-65778.html | 1747 ++++++++++++++++++++ test/howcast_test.rb | 30 + test/redtube_test.rb | 38 + test/remote_test_suite.rb | 1 + test/test_suite.rb | 4 + test/website_test.rb | 20 + test/youporn_remote_test.rb | 21 + test/youporn_test.rb | 22 + 15 files changed, 2282 insertions(+) create mode 100755 bin/whatever-dl create mode 100644 makefile create mode 100644 src/string.rb create mode 100644 src/website.rb create mode 100644 src/websites/howcast.rb create mode 100644 src/websites/redtube.rb create mode 100644 src/websites/youporn.rb create mode 100644 test/fixtures/youporn/page_data-65778.html create mode 100644 test/howcast_test.rb create mode 100644 test/redtube_test.rb create mode 100644 test/remote_test_suite.rb create mode 100644 test/test_suite.rb create mode 100644 test/website_test.rb create mode 100644 test/youporn_remote_test.rb create mode 100644 test/youporn_test.rb diff --git a/bin/whatever-dl b/bin/whatever-dl new file mode 100755 index 0000000..c20b8bd --- /dev/null +++ b/bin/whatever-dl @@ -0,0 +1,66 @@ +#!/usr/bin/ruby -w +# +# whatever-dl, a script to download online (web-based) videos. +# +# Copyright Michael Orlitzky +# +# http://michael.orlitzky.com/ +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# http://www.fsf.org/licensing/licenses/gpl.html +# + +# All of the website classes are located in one +# directory, so we can 'require' them automatically. +Dir.glob('src/websites/*.rb').each do |r| + require r +end + + +# Only actually do something if this script was called +# directly (i.e. not from the tests). +if (__FILE__ == $0) then + if (ARGV.length < 1) then + # If the user didn't give us a URL, yell + # at him or her. + puts 'Usage: whatever-dl ' + Kernel.exit(1) + end + + # Check the URL against each website's class. + # The class will know whether or not the URL + # "belongs" to its website. + + site = nil + + Website.subclasses.each do |w| + if w.owns_url?(ARGV[0]) + site = w.new() + break + end + end + + if site.nil? + puts 'Invalid URL.' + exit(1) + end + + video_url = site.get_video_url(ARGV[0]) + + if video_url.nil? + puts 'Error retrieving video URL.' + exit(2) + end + + # *classy* + Kernel.exec("wget \"#{video_url}\"") +end diff --git a/makefile b/makefile new file mode 100644 index 0000000..9e702a3 --- /dev/null +++ b/makefile @@ -0,0 +1,7 @@ +.PHONY : test + +test: + ruby test/test_suite.rb + +remote_test: + ruby test/remote_test_suite.rb diff --git a/src/string.rb b/src/string.rb new file mode 100644 index 0000000..4d14345 --- /dev/null +++ b/src/string.rb @@ -0,0 +1,31 @@ +# +# Copyright Michael Orlitzky +# +# http://michael.orlitzky.com/ +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# http://www.fsf.org/licensing/licenses/gpl.html +# + +# Augment the String class with a (left) pad method +class String + def pad_left(pad_char, to_length) + chars_to_pad = to_length - self.length + + if chars_to_pad <= 0 then + # Don't do anything if the string is already long enough + return self + else + return (pad_char * chars_to_pad) + self + end + end +end diff --git a/src/website.rb b/src/website.rb new file mode 100644 index 0000000..dad2264 --- /dev/null +++ b/src/website.rb @@ -0,0 +1,46 @@ +# +# Copyright Michael Orlitzky +# +# http://michael.orlitzky.com/ +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# http://www.fsf.org/licensing/licenses/gpl.html +# + +# This class keeps track of all its subclasses +# We use this to loop through every "website" in an +# attempt to determine to which site a URL belongs. +class Website + def self.inherited(subclass) + if superclass.respond_to? :inherited + superclass.inherited(subclass) + end + + @subclasses ||= [] + @subclasses << subclass + end + + def self.subclasses + @subclasses + end + + # This should be overridden in any class that wants + # to claim ownership of a URL. + def self.owns_url?(url) + return false + end + + # Same here. We want to default to nil unless overridden. + def get_video_url(url) + return nil + end +end diff --git a/src/websites/howcast.rb b/src/websites/howcast.rb new file mode 100644 index 0000000..c07d848 --- /dev/null +++ b/src/websites/howcast.rb @@ -0,0 +1,43 @@ +# +# Copyright Michael Orlitzky +# +# http://michael.orlitzky.com/ +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# http://www.fsf.org/licensing/licenses/gpl.html +# + +require 'src/website' + +class Howcast < Website + + VALID_HOWCAST_URL_REGEX = /^(http:\/\/)?(www\.)?howcast\.com\/videos\/(\d+)-(.+)$/ + + def self.owns_url?(url) + return url =~ VALID_HOWCAST_URL_REGEX + end + + + def get_video_url(url) + # This regex just pulls out the video id + id_regex = /\/(\d+)-/ + matches = id_regex.match(url) + + if matches.nil? + raise StandardError.new('The URL is a valid Howcast URL, but does not match on the digit portion of the regex. Since the digit portion is a subset of the "valid" regex, this should never occur.') + end + + video_id = matches[1] + return "http://media.howcast.com/system/videos/#{video_id}/#{video_id}.flv" + end + +end diff --git a/src/websites/redtube.rb b/src/websites/redtube.rb new file mode 100644 index 0000000..9ecdcec --- /dev/null +++ b/src/websites/redtube.rb @@ -0,0 +1,130 @@ +# +# Copyright Michael Orlitzky +# +# http://michael.orlitzky.com/ +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# http://www.fsf.org/licensing/licenses/gpl.html +# +# +# NOTE: +# +# All credit belongs to whomever reverse-engineered the Redtube +# Flash applet in the first place. I took the algorithm from this +# script: +# +# http://userscripts.org/scripts/review/8691 +# +# and merely cleaned it up a bit while porting it to Ruby. +# + +# The Redtube class needs the extra string methods.. +require 'src/string' +require 'src/website' + +# This class handles the algorithm magic needed to get +# the URL from a Redtube video id. +class Redtube < Website + + VALID_REDTUBE_URL_REGEX = /^(http:\/\/)?(www\.)?redtube\.com\/(\d+)$/ + + def self.owns_url?(url) + return url =~ VALID_REDTUBE_URL_REGEX + end + + + # The only public method. This calls the other parts + # of the algorithm and, with any luck, we wind up with + # the URL to the video. + def get_video_url(url) + # First, parse the video ID out of the URL. + video_id = /\d+/.match(url)[0] + + padded_id = video_id.to_s.pad_left('0', 7) + + video_dir = self.get_video_dir(video_id) + file_name = self.get_file_name(padded_id) + + # This mess is actually the only directory out of + # which they serve videos. + return 'http://dl.redtube.com/_videos_t4vn23s9jc5498tgj49icfj4678/' + + "#{video_dir}/#{file_name}" + end + + + protected + + VIDEO_FILE_EXTENSION = '.flv' + + # Not sure what they're thinking with this one. + def get_video_dir(video_id) + return (video_id.to_f / 1000.0).floor.to_s.pad_left('0', 7) + end + + + # The first part of the algorithmic magic. Multiply each + # digit of the padded video id by the index of the + # following digit, and sum them up. + def int_magic(padded_video_id) + ret = 0 + + 0.upto(6) do |a| + ret += padded_video_id[a,1].to_i * (a+1) + end + + return ret + end + + + # Part 2 of the magic. Sum the digits of the result + # of the first magic. + def more_magic(file_string) + magic = self.int_magic(file_string).to_s + + ret = 0 + + 0.upto(magic.length - 1) do |a| + ret += magic[a,1].to_i + end + + return ret + end + + + # Complete fricking mystery + def get_file_name(file_string) + map = ['R', '1', '5', '3', '4', '2', 'O', '7', 'K', '9', 'H', 'B', 'C', 'D', 'X', 'F', 'G', 'A', 'I', 'J', '8', 'L', 'M', 'Z', '6', 'P', 'Q', '0', 'S', 'T', 'U', 'V', 'W', 'E', 'Y', 'N'] + + # The stupid variable names I copied from the + # source script. Considering myself disclaimed. + my_int = self.more_magic(file_string) + new_char = '0' + my_int.to_s + + if my_int >= 10 then + new_char = my_int.to_s + end + + file_name = map[file_string[3] - 48 + my_int + 3] + file_name += new_char[1,1] + file_name += map[file_string[0] - 48 + my_int + 2] + file_name += map[file_string[2] - 48 + my_int + 1] + file_name += map[file_string[5] - 48 + my_int + 6] + file_name += map[file_string[1] - 48 + my_int + 5] + file_name += new_char[0,1] + file_name += map[file_string[4] - 48 + my_int + 7] + file_name += map[file_string[6] - 48 + my_int + 4] + file_name += VIDEO_FILE_EXTENSION + + return file_name + end + +end diff --git a/src/websites/youporn.rb b/src/websites/youporn.rb new file mode 100644 index 0000000..ef2fb28 --- /dev/null +++ b/src/websites/youporn.rb @@ -0,0 +1,76 @@ +# +# Copyright Michael Orlitzky +# +# http://michael.orlitzky.com/ +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# http://www.fsf.org/licensing/licenses/gpl.html +# + +require 'src/website' + +# Needed to download the page, which is in turn +# needed because it contains the video URL. +require 'net/http' +require 'uri' + + +class Youporn < Website + + VALID_YOUPORN_URL_REGEX = /^(http:\/\/)?(www\.)?youporn\.com\/watch\/(\d+)$/ + + def self.owns_url?(url) + return url =~ VALID_YOUPORN_URL_REGEX + end + + + def get_video_url(url) + page_data = self.get_page_data(url) + video_url = self.parse_video_url(page_data) + return video_url + end + + + protected; + + # Get the FLV file URL from the HTML page for this movie. + # They don't obfuscate it or anything, so we assume here + # that the first "download" url ending in ".flv" is the + # movie file we want. + def parse_video_url(page_data) + flv_regex = /http:\/\/download\.youporn\.com\/.*?\.flv/ + matches = flv_regex.match(page_data) + flv_url = matches[0] if not matches.nil? + + return flv_url + end + + + def get_page_data(url) + uri = URI.parse(url) + + response = Net::HTTP.start(uri.host, uri.port) do |http| + # Bypass the stupid age verification. + form_data = 'user_choice=Enter' + http.post(uri.path, form_data, self.get_headers(url)) + end + + return response.body + end + + # Build the header hash from the URL we're requesting. + def get_headers(url) + headers = { 'Referer' => url } + end + + +end diff --git a/test/fixtures/youporn/page_data-65778.html b/test/fixtures/youporn/page_data-65778.html new file mode 100644 index 0000000..23477a9 --- /dev/null +++ b/test/fixtures/youporn/page_data-65778.html @@ -0,0 +1,1747 @@ + + + + + + + + + + + + + + + + + + + + + + + + + +YouPorn.com Lite (BETA) - moaning mom fucked at night - Free Porn Videos + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + +
+
+ + Video | + + + Premium | + + + Dating | + + + + + Cams + + + | + + + Chat + | + + + + SexBlogs + +
+
+ + +
+
+ +
+ +
+ +
+ + +
+ + + + +
+
+ + + +
+
+
+ + + +
+ + + + +
+ + +
+

+ . + moaning mom fucked at night +

+ +
+ + +
+

+ + +
+ + +
+
+
+
+
+ +
+ + + +
+ +
+ +
+
+
+
+
+

Download:

+ + +
+ +
+

Details:

+
    +
  • Duration: 07min 46sec
  • +
  • Views: 988,949 total (587 today) +
  • Rating: 3.22 / 5.00 (1,049 ratings)
  • + +
  • Submitted: Amateur Videos
  • +
  • Date: Tue Sep 25 15:33:19 2007
  • +
+
+
+
+ + + +
+ +
+
+ +
+ + + Share on Facebook +
+
+
+ +
+ +
+
+ + +
+ +
+ + + +
+
+ + +
+ +
+ + + + +
+
+ + + +
+ +
+
+ +

Watch over 4000 DVDs or 25000 videos at YouPorn Plus+ for only 9.95 per month.

+ + + + + + + + + + + + + + + +
+ +
+ + +
+ +
+
+ +

Comments have been disabled for this video.

+ +
+ +
+
+
+ +
+ + + +
+ +
+
+
+ + +
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/test/howcast_test.rb b/test/howcast_test.rb new file mode 100644 index 0000000..5aa0e62 --- /dev/null +++ b/test/howcast_test.rb @@ -0,0 +1,30 @@ +# Unit tests for the Howcast class. +# This class is easy because get_video_url is +# just a string interpolation (we don't have to +# test that). + +require 'test/unit' +require 'src/websites/howcast' + +class HowcastTest < Test::Unit::TestCase + + def test_owns_howcast_urls + + assert(Howcast.owns_url?('http://www.howcast.com/videos/6807-2twr')) + assert(Howcast.owns_url?('www.howcast.com/videos/6807-2dgfdg')) + assert(Howcast.owns_url?('http://howcast.com/videos/6807-cse')) + assert(Howcast.owns_url?('howcast.com/videos/6807-asdgasd')) + end + + + def test_doesnt_own_redtube_urls + assert(!Howcast.owns_url?('http://www.redtube.com/6807')) + assert(!Howcast.owns_url?('www.redtube.com/6807')) + assert(!Howcast.owns_url?('http://redtube.com/6807')) + assert(!Howcast.owns_url?('redtube.com/6807')) + end + + def test_doesnt_own_misc_urls + assert(!Howcast.owns_url?('http://www.howcast.com/abc')) + end +end diff --git a/test/redtube_test.rb b/test/redtube_test.rb new file mode 100644 index 0000000..a929f61 --- /dev/null +++ b/test/redtube_test.rb @@ -0,0 +1,38 @@ +# Unit tests for the Redtube class. Basically just checking +# the results of get_video_url for known ids. + +require 'test/unit' +require 'src/websites/redtube' + +class RedtubeTest < Test::Unit::TestCase + + def test_owns_redtube_urls + assert(Redtube.owns_url?('http://www.redtube.com/6807')) + assert(Redtube.owns_url?('www.redtube.com/6807')) + assert(Redtube.owns_url?('http://redtube.com/6807')) + assert(Redtube.owns_url?('redtube.com/6807')) + end + + + def test_doesnt_own_howcast_urls + assert(!Redtube.owns_url?('http://www.howcast.com/6807')) + assert(!Redtube.owns_url?('www.howcast.com/6807')) + assert(!Redtube.owns_url?('http://howcast.com/6807')) + assert(!Redtube.owns_url?('howcast.com/6807')) + end + + + def test_doesnt_own_misc_urls + assert(!Redtube.owns_url?('http://redtube/123')) + assert(!Redtube.owns_url?('www.redtube.com/abc')) + end + + + def test_get_video_url + rt = Redtube.new() + + test_result = rt.get_video_url('http://www.redtube.com/6807') + assert_equal("http://dl.redtube.com/_videos_t4vn23s9jc5498tgj49icfj4678/0000006/X57OBH08G.flv", test_result) + end + +end diff --git a/test/remote_test_suite.rb b/test/remote_test_suite.rb new file mode 100644 index 0000000..200e795 --- /dev/null +++ b/test/remote_test_suite.rb @@ -0,0 +1 @@ +require 'test/youporn_remote_test' diff --git a/test/test_suite.rb b/test/test_suite.rb new file mode 100644 index 0000000..5b01b0e --- /dev/null +++ b/test/test_suite.rb @@ -0,0 +1,4 @@ +require 'test/howcast_test' +require 'test/redtube_test' +require 'test/website_test' +require 'test/youporn_test' diff --git a/test/website_test.rb b/test/website_test.rb new file mode 100644 index 0000000..e523b1f --- /dev/null +++ b/test/website_test.rb @@ -0,0 +1,20 @@ +# Tests common to all websites. +require 'test/unit' + +# All of the website classes are located in one +# directory, so we can 'require' them automatically. +Dir.glob('src/websites/*.rb').each do |r| + require r +end + +class WebsiteTest < Test::Unit::TestCase + + def test_doesnt_own_misc_urls + Website.subclasses.each do |w| + assert(!w.owns_url?('6807')) + assert(!w.owns_url?('www')) + assert(!w.owns_url?('http')) + end + end + +end diff --git a/test/youporn_remote_test.rb b/test/youporn_remote_test.rb new file mode 100644 index 0000000..1a8dda0 --- /dev/null +++ b/test/youporn_remote_test.rb @@ -0,0 +1,21 @@ +# Remote Youporn tests. Actually hit their website +# and attempt to parse the data returned. + +require 'test/unit' +require 'src/websites/youporn' + +class YoupornRemoteTest < Test::Unit::TestCase + + def test_get_page_data + yp = Youporn.new() + + # We can't rely on the fixture here, because Youporn might + # change their page layout. Instead, check that we can actually + # find the FLV URL on this page. + page_data = yp.send('get_page_data', 'http://www.youporn.com/watch/65778') + + test_result = yp.get_video_url(page_data) + assert_equal('http://download.youporn.com/download/112911/flv/65778_moaning_mom_fucked_at_night.flv', test_result) + end + +end diff --git a/test/youporn_test.rb b/test/youporn_test.rb new file mode 100644 index 0000000..59c14d5 --- /dev/null +++ b/test/youporn_test.rb @@ -0,0 +1,22 @@ +# Unit tests for the Youporn class. Basically just checking +# the results of parse_video_url for now. + +require 'test/unit' +require 'src/websites/youporn' + +class YoupornTest < Test::Unit::TestCase + + def test_parse_video_url + yp = Youporn.new() + + page_data = nil + + File.open('test/fixtures/youporn/page_data-65778.html') do |f| + page_data = f.read + end + + test_result = yp.send('parse_video_url', page_data) + assert_equal('http://download.youporn.com/download/112911/flv/65778_moaning_mom_fucked_at_night.flv', test_result) + end + +end -- 2.44.2