]> gitweb.michael.orlitzky.com - dead/whatever-dl.git/blob - src/websites/bliptv.rb
Add a missing character to VALID_BLIPTV_REDIR_URL_REGEX.
[dead/whatever-dl.git] / src / websites / bliptv.rb
1 #
2 # Copyright Michael Orlitzky
3 #
4 # http://michael.orlitzky.com/
5 #
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
15 #
16 # http://www.fsf.org/licensing/licenses/gpl.html
17 #
18
19 require 'src/website'
20 require 'cgi'
21
22 class Bliptv < Website
23
24 VALID_BLIPTV_URL_REGEX = /^(http:\/\/)?([[:alnum:]\-]+\.)?blip\.tv\/file\/(\d+)(.*)?$/
25 VALID_BLIPTV_REDIR_URL_REGEX = /^(http:\/\/)?([[:alnum:]\-]+\.)?blip\.tv\/play\/[[:alnum:]_]+$/
26
27 def self.owns_url?(url)
28 return (url =~ VALID_BLIPTV_URL_REGEX ||
29 url =~ VALID_BLIPTV_REDIR_URL_REGEX)
30 end
31
32
33 def get_video_url()
34 page_data = ''
35
36 if (@url =~ VALID_BLIPTV_URL_REGEX)
37 page_data = self.get_page_data(@url)
38 else
39 # It's a redirect. Figure out the RSS page URL from the redirect.
40 redir_url = get_redirect_url
41 rss_page_url = parse_rss_url(redir_url)
42 rss_data = get_page_data(rss_page_url)
43
44 # The "real" page URL is embedded in the RSS feed. Once we get the
45 # real URL, we can proceed as if we were given that URL in the first
46 # place.
47 real_page_url = parse_page_url(rss_data)
48 page_data = self.get_page_data(real_page_url)
49 end
50
51 filepath = parse_video_url(page_data)
52
53 return filepath
54 end
55
56
57 protected;
58
59 def parse_page_url(data)
60 # A simplified VALID_BLIPTV_URL_REGEX.
61 page_url_regex = /http:\/\/blip\.tv\/file\/\d+/
62 matches = page_url_regex.match(data)
63
64 if matches.nil?
65 raise StandardError.new("Couldn't parse the real page URL from the RSS page.")
66 end
67
68 return matches[0]
69 end
70
71 def parse_rss_url(url)
72 rss_id_regex = /\/flash\/(\d+)/
73 matches = rss_id_regex.match(url)
74
75 if matches.nil? or (matches.length < 2)
76 raise StandardError.new("Couldn't parse the video ID from the redirect URL: #{url}")
77 end
78
79 return "http://blip.tv/rss/flash/#{matches[1]}"
80 end
81
82 def get_redirect_url
83 uri = URI.parse(@url)
84
85 response = Net::HTTP.start(uri.host, uri.port) do |http|
86 http.get(uri.request_uri, {})
87 end
88
89 return CGI::unescape(response['location'])
90 end
91
92
93 def parse_video_url(page_data)
94 # First, try to find the MOV video. The source videos are usually
95 # encoded with MOV.
96 video_url_regex = /"Quicktime \(\.mov\)", "attribute" : "(.*?\.mov)/i
97 matches = video_url_regex.match(page_data)
98
99 if not matches.nil? and (matches.length > 1)
100 return matches[1]
101 end
102
103 # I've seen some free software videos encoded as OGG/Vorbis, too.
104 video_url_regex = /"Ogg Theora\/Vorbis \(\.og[gv]\)", "attribute" : "(.*?\.og[gv])/i
105 matches = video_url_regex.match(page_data)
106
107 if not matches.nil? and (matches.length > 1)
108 return matches[1]
109 end
110
111 # If that didn't work, try the WMV format, which is occasionally
112 # used for the source as well.
113 video_url_regex = /"Windows Media \(\.wmv\)", "attribute" : "(.*?\.wmv)/i
114 matches = video_url_regex.match(page_data)
115
116 if not matches.nil? and (matches.length > 1)
117 return matches[1]
118 end
119
120
121 # If neither of the source formats are present, just grab the
122 # video URL from the Flash variable and be done with it.
123 video_url_regex = /setPrimaryMediaUrl\("(.*?\.(flv|mov|wmv|mp4|og[gv]))/i
124 matches = video_url_regex.match(page_data)
125
126 if matches.nil? or (matches.length < 2)
127 raise StandardError.new("Couldn't parse any of the video format URLs.")
128 end
129
130 return matches[1]
131 end
132
133 end