From 1d43361a1d8c6fc3938a2438baa8d8348129b4fd Mon Sep 17 00:00:00 2001 From: mjo Date: Fri, 31 Oct 2008 13:09:08 -0400 Subject: [PATCH] Added the ability to download videos from http://www.yikers.com/. Created tests for the new Yikers class. Added a method to the Website class returning the host portion of its URL. --- src/website.rb | 11 + src/websites/yikers.rb | 101 ++++++ .../video_college_cafeteria_strip_off.html | 327 ++++++++++++++++++ .../video_college_cafeteria_strip_off.xml | 274 +++++++++++++++ test/test_suite.rb | 1 + test/yikers_test.rb | 93 +++++ 6 files changed, 807 insertions(+) create mode 100644 src/websites/yikers.rb create mode 100644 test/fixtures/yikers/video_college_cafeteria_strip_off.html create mode 100644 test/fixtures/yikers/video_college_cafeteria_strip_off.xml create mode 100644 test/yikers_test.rb diff --git a/src/website.rb b/src/website.rb index 96290de..b5a501f 100644 --- a/src/website.rb +++ b/src/website.rb @@ -16,6 +16,10 @@ # http://www.fsf.org/licensing/licenses/gpl.html # +# Necessary in a lot of subclasses; plus, we need it +# to parse the server name out of our URL. +require 'uri' + # This class keeps track of all its subclasses # We use this to loop through every "website" in an # attempt to determine to which site a URL belongs. @@ -37,6 +41,13 @@ class Website @subclasses << subclass end + + def server + # Get the HTTP server portion of our URI + uri = URI.parse(@url) + return uri.host + end + public; diff --git a/src/websites/yikers.rb b/src/websites/yikers.rb new file mode 100644 index 0000000..b602f3a --- /dev/null +++ b/src/websites/yikers.rb @@ -0,0 +1,101 @@ +# +# Copyright Michael Orlitzky +# +# http://michael.orlitzky.com/ +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# http://www.fsf.org/licensing/licenses/gpl.html +# + +require 'src/website' + +# Needed to download the page, which is in turn +# needed because it contains the video URL. +require 'net/http' + + +class Yikers < Website + + VALID_YIKERS_URL_REGEX = /^(http:\/\/)?(www\.)?yikers\.com\/([[:alnum:]_]+)\.html$/ + + def self.owns_url?(url) + return url =~ VALID_YIKERS_URL_REGEX + end + + + def get_video_url() + # First we download the page. Each page contains a flash parameter + # called 'xml' which contains the relative path to the video's XML file. + # We download their XML file (passing it the video id as a side effect), + # and that contains the URL of the video file we want. + page_data = get_page_data(@url) + xml_path = self.parse_xml_path(page_data) + + # The XML path parameter doesn't contain a host name. + xml_url = "http://#{self.server}#{xml_path}" + xml_data = get_page_data(xml_url) + + video_url = parse_video_url(xml_data) + + return video_url + end + + + def get_video_filename() + # Default to whatever comes after the final frontslash + # in the main URL. + filename = @url.split('/').pop() + + # These page URLs are actually descriptive, so we can use + # the file name of the HTML page as our video file name. + filename_regex = /\/([[:alnum:]_]+)\.html$/ + matches = filename_regex.match(@url) + + # Overwrite the default if our regex worked. + filename = matches[1] if not (matches.nil? || matches.length < 1) + + return (filename + '.flv') + end + + protected; + + def parse_video_url(data) + video_url_regex = /http:\/\/(cdn\.)?yikers\.com\/([[:alnum:]_\/]+)\.flv/ + matches = video_url_regex.match(data) + video_url = matches[0] if not matches.nil? + + return video_url + end + + + def parse_xml_path(data) + xml_path_regex = /addVariable\(\'xml\', \'(.*?)\'\)/ + matches = xml_path_regex.match(data) + xml_path = matches[1] if not (matches.nil? || matches.length < 2) + + return xml_path + end + + + def get_page_data(url) + uri = URI.parse(url) + + response = Net::HTTP.start(uri.host, uri.port) do |http| + http_path = uri.path + http_path += ('?' + uri.query) if not uri.query.nil? + http.get(http_path) + end + + return response.body + end + +end diff --git a/test/fixtures/yikers/video_college_cafeteria_strip_off.html b/test/fixtures/yikers/video_college_cafeteria_strip_off.html new file mode 100644 index 0000000..6498862 --- /dev/null +++ b/test/fixtures/yikers/video_college_cafeteria_strip_off.html @@ -0,0 +1,327 @@ + + +College Cafeteria Strip Off + + + + + + + + + + + + + + + + + + + + + + + +
+ SlideShows | Upload | Groups | Live Shows +
+ + + + + +
+ Funny Videos, Pictures, Jokes, and Fun Humor + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
   + +

College Cafeteria Strip Off

+ + + + + + +
+ + +
+ +
+ + +
+ +
<< PreviousTrouble watching? Contact usNext >>
+ + + + + + +
+ +

Similar Media

+ +
Sexy drunk girl c...Espn profanityDrunks take out c...Frisky sea otter
+

+ + + + + +
+ + + +

User Comments

+ + + + + + + +
+ + Posted by: bosshonkey on May 10, 2007 @ 05:03:26 am

+ and this is our future? great....wtf are these 'students' learning? +Wonder if their parents like paying for them to strip in school. xxxxing +retards
+

+

+ + + + + +
+ + Posted by: devin on May 10, 2007 @ 08:41:43 am

+ I want to go to school there.
+

+

+ + + + + +
+ + Posted by: disbelief on May 10, 2007 @ 09:20:19 am

+ i agree... some people need to be hit in the face with a brick.
+

+

+ + + + + +
+ + Posted by: bosshonkey on May 10, 2007 @ 09:49:42 am

+ or two....
+

+

+ + + + + +
+ + Posted by: quick flex on May 10, 2007 @ 03:37:06 pm

+ Yeah, what school is this? I'd also like to enroll.
+

+

+ + + + + +
+ + Posted by: obscura on May 10, 2007 @ 10:15:41 pm

+ Silkeborg Highschool in Denmark
+

+

+ + + + + + +
Post Comment:
+

Want to post a comment? Register for a free account or login to your existing account.

+ +

  + + + +

Today's Top Sites

Macho Video
Hot Video Links
Kontraband
Ownage Videos
Flurl Video Search
Funny Crazy Videos
Hot Videos
MojoFlix
Unique Peek
Street Fight Videos
Free Online Games
FileCabi.net
Kill Some Time
Favorite Vids n Pics
Hot Vids
Play Online Games
Whats Your IQ?
Add Your Link
More Cool Sites

+ + + + +

  + + Register for free | Login

+ + + +

+
+ + + + + + + +
Cool Clips
A cat with two faces.
Best wedding first dance ever.
How to get with two girls.
Student tries to poison teacher.
+
+ +

+ +

Must See LIVE Streaming

+ + +

More Cool Videos

+

+

Trade traffic? Sign up and start sending traffic. We will send 1:1 back via the random rotation plugs above. The more hits you send the more hits you will get back. Sign up if you are interested in trading permanent plug. + +

  
+
+
DMCA | Privacy | Terms of Service | Advertise | Trade Traffic | Contact | Forums | RSS

Celebrity Videos | Video Search Engine | Celebrity Gossip Videos | Celebrity News Videos | Hollywood Gossip Videos | Hollywood Celeb Videos | Election 2008 Videos

NBA Basketball Videos | MLB Baseball Videos | Sexy Celebrity Videos | Humor Comedy Videos

Copyright © 2007 Yikers.com. All Rights Reserved.

+ + + + + + + + + diff --git a/test/fixtures/yikers/video_college_cafeteria_strip_off.xml b/test/fixtures/yikers/video_college_cafeteria_strip_off.xml new file mode 100644 index 0000000..dd4cf6d --- /dev/null +++ b/test/fixtures/yikers/video_college_cafeteria_strip_off.xml @@ -0,0 +1,274 @@ + + + + true + + + + + + + + + true + + bottom + false + false + + + 0 + 0 + + + + false + + true + 10000 + + + 0 + 392 + + + + false + + true + 10000 + + + 28 + 429 + + + + + + + 20 + 45549FE81D8B42E9A688CAF4BD24AA22 + 20 + 45549FE81D8B42E9A688CAF4BD24AA22 + 20 + 45549FE81D8B42E9A688CAF4BD24AA22 + + + /flash/lu_postroll_001.swf + false + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/test/test_suite.rb b/test/test_suite.rb index 26c595d..8987178 100644 --- a/test/test_suite.rb +++ b/test/test_suite.rb @@ -23,6 +23,7 @@ require 'test/veoh_test' require 'test/uri_utilities_test' require 'test/vimeo_test' require 'test/website_test' +require 'test/yikers_test' require 'test/youporn_test' require 'test/youtube_test' diff --git a/test/yikers_test.rb b/test/yikers_test.rb new file mode 100644 index 0000000..1c9548d --- /dev/null +++ b/test/yikers_test.rb @@ -0,0 +1,93 @@ +# +# Copyright Michael Orlitzky +# +# http://michael.orlitzky.com/ +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# http://www.fsf.org/licensing/licenses/gpl.html +# + +require 'test/unit' +require 'src/websites/yikers' + +class YikersTest < Test::Unit::TestCase + + def test_owns_yikers_urls + assert(Yikers.owns_url?('http://www.yikers.com/video_sexy_drunk_girl_crashes_dirtbike.html')) + assert(Yikers.owns_url?('http://www.yikers.com/video_almost_transformers.html')) + assert(Yikers.owns_url?('http://yikers.com/video_almost_transformers.html')) + assert(Yikers.owns_url?('www.yikers.com/video_noisy_drinking_cat.html')) + assert(Yikers.owns_url?('yikers.com/video_college_cafeteria_strip_off.html')) + end + + + def test_doesnt_own_howcast_urls + assert(!Yikers.owns_url?('http://www.howcast.com/videos/6807-2twr')) + assert(!Yikers.owns_url?('www.howcast.com/videos/6807-2dgfdg')) + assert(!Yikers.owns_url?('http://howcast.com/videos/6807-cse')) + assert(!Yikers.owns_url?('howcast.com/videos/6807-asdgasd')) + end + + + def test_doesnt_own_redtube_urls + assert(!Yikers.owns_url?('http://www.redtube.com/6807')) + assert(!Yikers.owns_url?('www.redtube.com/6807')) + assert(!Yikers.owns_url?('http://redtube.com/6807')) + assert(!Yikers.owns_url?('redtube.com/6807')) + end + + def test_doesnt_own_misc_urls + assert(!Yikers.owns_url?('http://www.howcast.com/abc')) + end + + + def test_parse_xml_path + yikers = Yikers.new(nil) + + page_data = nil + + File.open('test/fixtures/yikers/video_college_cafeteria_strip_off.html') do |f| + page_data = f.read + end + + test_result = yikers.send('parse_xml_path', page_data) + assert_equal('/flash/play_flash_xml.php?cid=11798', test_result) + end + + + def test_parse_video_url + yikers = Yikers.new(nil) + + page_data = nil + + File.open('test/fixtures/yikers/video_college_cafeteria_strip_off.xml') do |f| + page_data = f.read + end + + test_result = yikers.send('parse_video_url', page_data) + assert_equal('http://cdn.yikers.com/flv/flash8/yikers_college_cafeteria_strip_off.flv', test_result) + end + + + def test_get_video_filename + yikers = Yikers.new('http://www.yikers.com/video_college_cafeteria_strip_off.html') + assert_equal('video_college_cafeteria_strip_off.flv', yikers.get_video_filename()) + end + + + def test_uri_query_works_on_yikers_urls + uri = URI.parse('http://www.yikers.com/flash/play_flash_xml.php?cid=11798') + http_path = uri.path + '?' + uri.query + assert_equal('/flash/play_flash_xml.php?cid=11798', http_path) + end + +end -- 2.44.2