From 1b8ef3e1c23144e7ca2f8f6724d123b12ae9081c Mon Sep 17 00:00:00 2001 From: Michael Orlitzky Date: Sun, 15 Jun 2008 23:43:00 -0400 Subject: [PATCH] Added the Infoq site. Added support for infoq.com by adding a new website class. Infoq attempts to obscure their video URLs with Base64 encoding which is easy enough to get around. I implemented this, and a couple of tests for the new class. --- src/websites/infoq.rb | 69 ++ .../infoq/jim-weirich-discusses-rake.html | 964 ++++++++++++++++++ test/infoq_remote_test.rb | 21 + test/infoq_test.rb | 27 + test/remote_test_suite.rb | 1 + test/test_suite.rb | 1 + 6 files changed, 1083 insertions(+) create mode 100644 src/websites/infoq.rb create mode 100644 test/fixtures/infoq/jim-weirich-discusses-rake.html create mode 100644 test/infoq_remote_test.rb create mode 100644 test/infoq_test.rb diff --git a/src/websites/infoq.rb b/src/websites/infoq.rb new file mode 100644 index 0000000..fe05562 --- /dev/null +++ b/src/websites/infoq.rb @@ -0,0 +1,69 @@ +# +# Copyright Michael Orlitzky +# +# http://michael.orlitzky.com/ +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# http://www.fsf.org/licensing/licenses/gpl.html +# + +require 'src/website' +require 'base64' + +class Infoq < Website + + VALID_INFOQ_URL_REGEX = /^(http:\/\/)?(www\.)?infoq\.com\/(.+)$/ + + def self.owns_url?(url) + return url =~ VALID_INFOQ_URL_REGEX + end + + + def get_video_url(url) + page_data = self.get_page_data(url) + video_url = self.parse_video_url(page_data) + return video_url + end + + + protected; + + # Get the FLV file URL from the HTML page for this movie. + # It's encoded in base64 -- no big deal. + def parse_video_url(page_data) + # Get the base64 string. It's stored in a javascript + # variable called "jsclassref". This regex should match + # a javascript variable declaration preceded, separated, + # and/or terminated by zero or more whitespace characters. + base64_regex = /^\s*var\s+jsclassref=\'(.*)\';\s*$/ + matches = base64_regex.match(page_data) + + if (matches.nil? || matches.length < 2) + raise StandardError.new('There were no base64-encoded video URLs found on the page: '); + end + + return Base64.decode64(matches[1]) + end + + + # Just make a normal HTTP "get" request. + def get_page_data(url) + uri = URI.parse(url) + + response = Net::HTTP.start(uri.host, uri.port) do |http| + http.get(uri.path) + end + + return response.body + end + +end diff --git a/test/fixtures/infoq/jim-weirich-discusses-rake.html b/test/fixtures/infoq/jim-weirich-discusses-rake.html new file mode 100644 index 0000000..4672bb0 --- /dev/null +++ b/test/fixtures/infoq/jim-weirich-discusses-rake.html @@ -0,0 +1,964 @@ +​​​​​InfoQ: Jim Weirich Discusses Rake, the Ruby Make Tool + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+

InfoQ

+ + + + + + + + + + + + + + + + +
+ + +
+
+ +
+ + + +

Interview

+
+
+
+
+ +
+    Good News: We have re-worked our video infrastructure to provide more reliable service. Please email bugs at infoq.com with any problems. +
+ +

Jim Weirich Discusses Rake, the Ruby Make Tool

+ +

+ Interview with + Jim Weirich + + by + Werner Schuster + + on + Apr 17, 2008 03:12 AM +

+
+
Community
+ +
Ruby
+ +
Topics
+ +
Domain Specific Languages,
+ +
Build systems
+ + +
Tags
+ +
Rake,
+ +
Mocks
+ + +
+ + + + + +
Please install Flash player.
+ + + +
+
+ + + + + + +
+
Bookmark
+
+digg+, +
+
+reddit+, +
+
+del.icio.us+, +
+
+dzone+, +
+
+facebook+ +
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + + +
+

No comments

+

+ + Reply +

+
+ + + + + + + +
+ +
+ +
    + + + + + + + +
+ + + +
+
+
+
+
+ + + +
+ +
+ + + + + + + + + + +
+
+ +
+

Exclusive Content

+ +
+ +
+
+
+ + + + + + + + +
+ + + +
+ + + + + +
+

Avi Bryant on MagLev and GemStone

+ +

+ +

Avi Bryant talks about working on MagLev, a Ruby implementation built by GemStone. Avi explains the reasons for MagLev, the merits of GemStone's distributed OODB features, and more.

+ +
+ +
+ + + + +
+

Using Numbers to Communicate - in the Spirit of Agile

+ +

+ +

Developers and the business use numbers differently, leading to poor communication. Here the "Spirit of Agile" tells a developer the trick: translate non-computational issues into number language.

+ +
+ +
+ + + + +
+

Dealing With the Organizational Challenges of Agile Adoption

+ +

+ +

In this presentation filmed during QCon London 2007, Joseph Pelrine talks about the challenges faced by organizations trying to adopt Agile methods.

+ +
+ +
+ + + + +
+

Domain Driven Design and Development In Practice

+ +

+ +

In this article, Srini Penchikala discusses Domain Driven Design from a practical stand-point. The article looks at architectural guidelines and best practices that can be used in a DDD project.

+ +
+ +
+ + + + +
+

Server Side OSGi

+ +

+ +

Adrian Colyer describes OSGi, OSGi implementations, modularity, versioning, operational control, server-side OSGi, design considerations, using existing libraries, and Spring Dynamic Modules.

+ +
+ +
+ + + + +
+

Agile Architecture Is Not Fragile Architecture

+ +

+ +

In this presentation filmed during QCon 2007, Coplien and Henney describe how to start with enough architecture to ensure long term success of an Agile developed project.

+ +
+ +
+ + + + +
+

David Nuescheler on JCR and REST

+ +

+ +

In this interview, Day CTO and JCR Spec Lead David Nuescheler discusses JCR, the Java Content Repository standard, its connection to REST, and the Sling web framework.

+ +
+ +
+ + + + +
+

InfoQ Case Study: NASDAQ Market Replay

+ +

+ +

In this case study InfoQ reviews the usage of Adobe AIR and Amazon Simple Storage Service (S3) in the NASDAQ Market Replay application.

+ +
+ +
+ + + + + + +
+
+
+
+
+
+ + +
+
+ + + + +
+ + + + + + + + + + + +
+ + + + + + + + +​​​​​ \ No newline at end of file diff --git a/test/infoq_remote_test.rb b/test/infoq_remote_test.rb new file mode 100644 index 0000000..7700d6b --- /dev/null +++ b/test/infoq_remote_test.rb @@ -0,0 +1,21 @@ +# Remote Infoq tests. Actually hit their website +# and attempt to parse the data returned. + +require 'test/unit' +require 'src/websites/infoq' + +class InfoqRemoteTest < Test::Unit::TestCase + + def test_get_page_data + iq = Infoq.new() + + # We can't rely on the fixture here, because Infoq might + # change their page layout. Instead, check that we can actually + # find the base64 regex (containing the FLV URL). + page_data = iq.send('get_page_data', 'http://www.infoq.com/interviews/jim-weirich-discusses-rake') + + test_result = iq.get_video_url(page_data) + assert_equal('http://flv.thruhere.net/interviews/JimWeirich.flv', test_result) + end + +end diff --git a/test/infoq_test.rb b/test/infoq_test.rb new file mode 100644 index 0000000..51d1a3f --- /dev/null +++ b/test/infoq_test.rb @@ -0,0 +1,27 @@ +# Unit tests for the Infoq class. Basically just checking +# the results of parse_video_url for now. + +require 'test/unit' +require 'src/websites/infoq' + +class InfoqTest < Test::Unit::TestCase + + def test_owns_infoq_urls + assert(Infoq.owns_url?('http://www.infoq.com/interviews/jim-weirich-discusses-rake')) + end + + + def test_parse_video_url + iq = Infoq.new() + + page_data = nil + + File.open('test/fixtures/infoq/jim-weirich-discusses-rake.html') do |f| + page_data = f.read + end + + test_result = iq.send('parse_video_url', page_data) + assert_equal('http://flv.thruhere.net/interviews/JimWeirich.flv', test_result) + end + +end diff --git a/test/remote_test_suite.rb b/test/remote_test_suite.rb index 200e795..d4085d9 100644 --- a/test/remote_test_suite.rb +++ b/test/remote_test_suite.rb @@ -1 +1,2 @@ +require 'test/infoq_remote_test' require 'test/youporn_remote_test' diff --git a/test/test_suite.rb b/test/test_suite.rb index 5b01b0e..ddc078e 100644 --- a/test/test_suite.rb +++ b/test/test_suite.rb @@ -1,4 +1,5 @@ require 'test/howcast_test' +require 'test/infoq_test' require 'test/redtube_test' require 'test/website_test' require 'test/youporn_test' -- 2.43.2