src/websites/yikers.rb

   1 #
   2 # Copyright Michael Orlitzky
   3 #
   4 # http://michael.orlitzky.com/
   5 #
   6 # This program is free software: you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation, either version 3 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14 # GNU General Public License for more details.
  15 #
  16 # http://www.fsf.org/licensing/licenses/gpl.html
  17 #
  18
  19 require 'src/website'
  20
  21 # Needed to download the page, which is in turn
  22 # needed because it contains the video URL.
  23 require 'net/http'
  24
  25
  26 class Yikers < Website
  27
  28   VALID_YIKERS_URL_REGEX = /^(http:\/\/)?(www\.)?yikers\.com\/([[:alnum:]_]+)\.html$/
  29
  30   def self.owns_url?(url)
  31     return url =~ VALID_YIKERS_URL_REGEX
  32   end
  33
  34
  35   def get_video_url()
  36     # First we download the page. Each page contains a flash parameter
  37     # called 'xml' which contains the relative path to the video's XML file.
  38     # We download their XML file (passing it the video id as a side effect),
  39     # and that contains the URL of the video file we want.
  40     page_data = get_page_data(@url)
  41     xml_path = self.parse_xml_path(page_data)
  42
  43     # The XML path parameter doesn't contain a host name.
  44     xml_url = "http://#{self.server}#{xml_path}"
  45     xml_data = get_page_data(xml_url)
  46
  47     video_url = parse_video_url(xml_data)
  48
  49     return video_url
  50   end
  51
  52
  53   def get_video_filename()
  54     # Default to whatever comes after the final frontslash
  55     # in the main URL.
  56     filename = @url.split('/').pop()
  57
  58     # These page URLs are actually descriptive, so we can use
  59     # the file name of the HTML page as our video file name.
  60     filename_regex = /\/([[:alnum:]_]+)\.html$/
  61     matches = filename_regex.match(@url)
  62
  63     # Overwrite the default if our regex worked.
  64     filename = matches[1] if not (matches.nil? || matches.length < 1)
  65
  66     return (filename + '.flv')
  67   end
  68
  69   protected;
  70
  71   def parse_video_url(data)
  72     video_url_regex = /http:\/\/(cdn\.)?yikers\.com\/([[:alnum:]_\/]+)\.flv/
  73     matches = video_url_regex.match(data)
  74     video_url = matches[0] if not matches.nil?
  75
  76     return video_url
  77   end
  78
  79
  80   def parse_xml_path(data)
  81     xml_path_regex = /addVariable\(\'xml\', \'(.*?)\'\)/
  82     matches = xml_path_regex.match(data)
  83     xml_path = matches[1] if not (matches.nil? || matches.length < 2)
  84
  85     return xml_path
  86   end
  87
  88
  89   def get_page_data(url)
  90     uri = URI.parse(url)
  91
  92     response = Net::HTTP.start(uri.host, uri.port) do |http|
  93       http_path = uri.path
  94       http_path += ('?' + uri.query) if not uri.query.nil?
  95       http.get(http_path)
  96     end
  97
  98     return response.body
  99   end
 100
 101 end