VideoCache
Videocache is no longer in development.

Fixed & added new video sites

by blake on 23 Jun 2009

Hi,

I installed videocache 1.9.1 from the FreeBSD ports. The version I received was unable to cache break.com, MSN Soabox, YouPorn, RedTube, and Vimeo. I have since fixed these sites and added support for the following to videocache.

*Vimeo mp4 video format.
*Dailymotion OGG format (openvideo.dailymotion.com)
*Last.fm, Viddler.com, LiveLeak.com and Buzznet.com caching.

I also have partial support of videos from Yahoo Video (videos.yahoo.com). I need testers for this code. Here is the patch below.

diff --git videocache/config.py videocache/config.py
index 6f3a51e..6fc4226 100755
--- videocache/config.py
+++ videocache/config.py
@@ -672,6 +672,36 @@ class YumConf(StartupConf):
     max_break_video_size = Option(0)
     min_break_video_size = Option(0)

+    # Last.fm Specific Options
+    enable_lastfm_cache = Option(1)
+    lastfm_cache_dir = Option('lastfm')
+    max_lastfm_video_size = Option(0)
+    min_lastfm_video_size = Option(0)
+
+    # Viddler Specific Options
+    enable_viddler_cache = Option(1)
+    viddler_cache_dir = Option('viddler')
+    max_viddler_video_size = Option(0)
+    min_viddler_video_size = Option(0)
+
+    # LiveLeak Specific Options
+    enable_liveleak_cache = Option(1)
+    liveleak_cache_dir = Option('liveleak')
+    max_liveleak_video_size = Option(0)
+    min_liveleak_video_size = Option(0)
+
+    # Yahoo Video Specific Options
+    enable_yahoo_cache = Option(1)
+    yahoo_cache_dir = Option('yahoo')
+    max_yahoo_video_size = Option(0)
+    min_yahoo_video_size = Option(0)
+
+    # Buzznet.com Video Specific Options
+    enable_buzznet_cache = Option(1)
+    buzznet_cache_dir = Option('buzznet')
+    max_buzznet_video_size = Option(0)
+    min_buzznet_video_size = Option(0)
+
     _reposlist = []

 def readStartupConfig(configfile, root):
diff --git videocache/videocache.py videocache/videocache.py
index e3dea80..407a1bf 100755
--- videocache/videocache.py
+++ videocache/videocache.py
@@ -36,6 +36,7 @@ import sys
 import threading
 import time
 import urlgrabber
+import urllib
 import urllib2
 import urlparse

@@ -84,20 +85,26 @@ def set_globals(type_low):
     return (enable_cache, cache_dir, max_video_size, min_video_size)

 # Website specific options
-(enable_youtube_cache, youtube_cache_dir, max_youtube_video_size, min_youtube_video_size) = set_globals('youtube') 
-(enable_metacafe_cache, metacafe_cache_dir, max_metacafe_video_size, min_metacafe_video_size) = set_globals('metacafe')
+(enable_bliptv_cache, bliptv_cache_dir, max_bliptv_video_size, min_bliptv_video_size) = set_globals('bliptv')
+(enable_break_cache, break_cache_dir, max_break_video_size, min_break_video_size) = set_globals('break')
+(enable_buzznet_cache, buzznet_cache_dir, max_buzznet_video_size, min_buzznet_video_size) = set_globals('buzznet')
 (enable_dailymotion_cache, dailymotion_cache_dir, max_dailymotion_video_size, min_dailymotion_video_size)=set_globals('dailymotion')
 (enable_google_cache, google_cache_dir, max_google_video_size, min_google_video_size) = set_globals('google')
+(enable_lastfm_cache, lastfm_cache_dir, max_lastfm_video_size, min_lastfm_video_size) = set_globals('lastfm')
+(enable_liveleak_cache, liveleak_cache_dir, max_liveleak_video_size, min_liveleak_video_size) = set_globals('liveleak')
+(enable_liveleak_cache, liveleak_cache_dir, max_liveleak_video_size, min_liveleak_video_size) = set_globals('liveleak')
+(enable_metacafe_cache, metacafe_cache_dir, max_metacafe_video_size, min_metacafe_video_size) = set_globals('metacafe')
 (enable_redtube_cache, redtube_cache_dir, max_redtube_video_size, min_redtube_video_size) = set_globals('redtube')
-(enable_xtube_cache, xtube_cache_dir, max_xtube_video_size, min_xtube_video_size) = set_globals('xtube')
-(enable_vimeo_cache, vimeo_cache_dir, max_vimeo_video_size, min_vimeo_video_size) = set_globals('vimeo')
-(enable_wrzuta_cache, wrzuta_cache_dir, max_wrzuta_video_size, min_wrzuta_video_size) = set_globals('wrzuta')
-(enable_youporn_cache, youporn_cache_dir, max_youporn_video_size, min_youporn_video_size) = set_globals('youporn')
 (enable_soapbox_cache, soapbox_cache_dir, max_soapbox_video_size, min_soapbox_video_size) = set_globals('soapbox')
 (enable_tube8_cache, tube8_cache_dir, max_tube8_video_size, min_tube8_video_size) = set_globals('tube8')
 (enable_tvuol_cache, tvuol_cache_dir, max_tvuol_video_size, min_tvuol_video_size) = set_globals('tvuol')
-(enable_bliptv_cache, bliptv_cache_dir, max_bliptv_video_size, min_bliptv_video_size) = set_globals('bliptv')
-(enable_break_cache, break_cache_dir, max_break_video_size, min_break_video_size) = set_globals('break')
+(enable_viddler_cache, viddler_cache_dir, max_viddler_video_size, min_viddler_video_size) = set_globals('viddler')
+(enable_vimeo_cache, vimeo_cache_dir, max_vimeo_video_size, min_vimeo_video_size) = set_globals('vimeo')
+(enable_wrzuta_cache, wrzuta_cache_dir, max_wrzuta_video_size, min_wrzuta_video_size) = set_globals('wrzuta')
+(enable_xtube_cache, xtube_cache_dir, max_xtube_video_size, min_xtube_video_size) = set_globals('xtube')
+(enable_yahoo_cache, yahoo_cache_dir, max_yahoo_video_size, min_yahoo_video_size) = set_globals('yahoo')
+(enable_youporn_cache, youporn_cache_dir, max_youporn_video_size, min_youporn_video_size) = set_globals('youporn')
+(enable_youtube_cache, youtube_cache_dir, max_youtube_video_size, min_youtube_video_size) = set_globals('youtube') 

 class Function_Thread(threading.Thread):
     def "init"(self, fid):
@@ -173,6 +180,9 @@ class VideoIDPool:
             return self.queue[video_id]
         return False

+    def get_queue_size(self)
+        return len(self.queue)
+
     def remove_from_queue(self, video_id):
         """Dequeue a video_id from the download queue."""
         if video_id in self.queue.keys():
@@ -385,7 +395,7 @@ def refine_url(url, arg_drop_list = []):
                 query += arg + '&'
         except:
             continue
-    return (urllib2.splitquery(url)[0] + '?' + query.rstrip('&')).rstrip('?')
+    return (urllib.splitquery(url)[0] + '?' + query.rstrip('&')).rstrip('?')

 def download_from_source(args):
     """This function downloads the file from remote source and caches it."""
@@ -568,6 +578,66 @@ def squid_part():

             # Youtube videos served via cache.googlevideo.com are handled here.
             # This code has been merged with Google.com videos
+
+            # Last.fm caching is handled here
+            if enable_lastfm_cache:
+                if host.find('.last.fm') > -1 and path.find('/flv/') > -1:
+                    type = 'LASTFM'
+                    try:
+                        video_id = path.strip('/').split('/')[-1]
+                    except:
+                        log(format%(pid, client, '-', 'URL_ERROR', type, 'Error in parsing the url ' + new_url))
+                        video_id = None
+                    if video_id is not None:
+                        new_url = submit_video(pid, client, type, url, video_id)
+
+            # Viddler caching is handled here
+            if enable_viddler_cache:
+                if host.find('a.ec.viddler.com') > -1 and path.find('.flv') > -1:
+                    type = 'VIDDLER'
+                    try:
+                        video_id = path.strip('/').split('/')[-1]
+                    except:
+                        log(format%(pid, client, '-', 'URL_ERROR', type, 'Error in parsing the url ' + new_url))
+                        video_id = None
+                    if video_id is not None:
+                        new_url = submit_video(pid, client, type, url, video_id)
+
+            # LiveLeak caching is handled here
+            if enable_liveleak_cache:
+                if re.compile('cdnll-[0-9]\\.liveleak\\.com').search(host) > -1 and path.find('.flv') > -1:
+                    type = 'LIVELEAK'
+                    try:
+                        video_id = path.strip('/').split('/')[-1]
+                    except:
+                        log(format%(pid, client, '-', 'URL_ERROR', type, 'Error in parsing the url ' + new_url))
+                        video_id = None
+                    if video_id is not None:
+                        new_url = submit_video(pid, client, type, url, video_id)
+
+            # Yahoo Video (video.yahoo.com) caching is handled here
+            if enable_yahoo_cache:
+                if re.compile('snfs(.*)\\/(yp|videosearch)\\/').search(path) > -1 and path.find('.flv') > -1:
+                    type = 'YAHOO'
+                    try:
+                        video_id = path.strip('/').split('/')[-1]
+                    except:
+                        log(format%(pid, client, '-', 'URL_ERROR', type, 'Error in parsing the url ' + new_url))
+                        video_id = None
+                    if video_id is not None:
+                        new_url = submit_video(pid, client, type, url, video_id)
+
+            # BuzzNet.com caching is handled here
+            if enable_buzznet_cache:
+                if (host.find('cdn.buzznet.com') > -1 or re.compile('buzznet-[0-9]?[0-9]\\.vo\\.llnwd\\.net').search(host) > -1) and path.find('.flv') > -1:
+                    type = 'BUZZNET'
+                    try:
+                        video_id = path.strip('/').split('/')[-1]
+                    except:
+                        log(format%(pid, client, '-', 'URL_ERROR', type, 'Error in parsing the url ' + new_url))
+                        video_id = None
+                    if video_id is not None:
+                        new_url = submit_video(pid, client, type, url, video_id)

             # Metacafe.com caching is handled here.
             if enable_metacafe_cache:
@@ -583,7 +653,7 @@ def squid_part():

             # Dailymotion.com caching is handled here.
             if enable_dailymotion_cache:
-                if (re.compile('proxy[a-z0-9\\-][a-z0-9][a-z0-9][a-z0-9]?\\.dailymotion\\.com').search(host) or host.find('vid.akm.dailymotion.com') > -1 or host.find('.cdn.dailymotion.com') > -1)  and (path.find('.flv') > -1 or path.find('.on2') > -1):
+                if (re.compile('proxy[a-z0-9\\-][a-z0-9][a-z0-9][a-z0-9]?\\.dailymotion\\.com').search(host) or host.find('vid.akm.dailymotion.com') > -1 or host.find('.cdn.dailymotion.com') > -1)  and re.compile('\\.(flv|on2|ogg)').search(path) > -1:
                     type = 'DAILYMOTION'
                     try:
                         video_id = path.strip('/').split('/')[-1]
@@ -619,7 +689,7 @@ def squid_part():

             # Redtube.com caching is handled here.
             if enable_redtube_cache:
-                if host.find('dl.redtube.com') > -1 and path.find('.flv') > -1:
+                if re.compile('c[0-9a-z][0-9a-z][0-9a-z]\\.redtube\\.com').search(host) > -1 and path.find('_videos_') > -1 and path.find('.flv') > -1:
                     type = 'REDTUBE'
                     try:
                         video_id = path.strip('/').split('/')[-1]
@@ -643,7 +713,7 @@ def squid_part():

             # Vimeo.com caching is handled here.
             if enable_vimeo_cache:
-                if host.find('bitcast.vimeo.com') > -1 and path.find('vimeo/videos/') > -1 and path.find('.flv') > -1:
+                if host.find('bitcast.vimeo.com') > -1 and path.find('vimeo/v') > -1 and (path.find('.flv') > -1 or path.find('.mp4') > -1):
                     type = 'VIMEO'
                     try:
                         video_id = path.strip('/').split('/')[-1]
@@ -672,7 +742,7 @@ def squid_part():

             # Youporn.com audio file caching is handled here.
             if enable_youporn_cache:
-                if host.find('.files.youporn.com') > -1 and path.find('/flv/') > -1 and path.find('.flv') > -1:
+                if host.find('.files.youporn.com') > -1 and path.find('flv/') > -1 and path.find('.flv') > -1:
                     type = 'YOUPORN'
                     try:
                         video_id = path.strip('/').split('/')[-1]
@@ -684,7 +754,7 @@ def squid_part():

             # Soapbox.msn.com audio file caching is handled here.
             if enable_soapbox_cache:
-                if host.find('.msn.com.edgesuite.net') > -1 and path.find('.flv') > -1:
+                if (host.find('msnbcecn.vo.llnwd.net') > -1 or host.find('.video.msn.com') > -1 or host.find('.msn.com.edgesuite.net') > -1) and path.find('.flv') > -1:
                     type = 'SOAPBOX'
                     try:
                         video_id = path.strip('/').split('/')[-1]
@@ -720,7 +790,7 @@ def squid_part():

             # Blip.tv Video file caching is handled here.
             if enable_bliptv_cache:
-                if re.compile('\\.video[a-z0-9]?[a-z0-9]?\\.blip\\.tv').search(host) and (path.find('.flv') > -1 or path.find('.wmv') > -1 or path.find('.mp4') > -1 or path.find('.rm') > -1 or path.find('.ram') > -1 or path.find('.mov') > -1 or path.find('.avi') > -1 or path.find('.m4v') > -1 or path.find('.mp3') > -1) :
+                if re.compile('\\.video[a-z0-9]?[a-z0-9]?\\.blip\\.tv').search(host) and re.compile('\\.(flv|wmv|mp4|rm|ram|mov|avi|m4v|mp3)').search(path) > -1:
                     type = 'BLIPTV'
                     try:
                         video_id = path.strip('/').split('/')[-1]
@@ -732,7 +802,7 @@ def squid_part():

             # Break.com Video file caching is handled here.
             if enable_break_cache:
-                if host.find('video.break.com') > -1 and (path.find('.flv') > -1 or path.find('.mp4')):
+                if re.compile('media[0-9].break.com').search(host) > -1 and (path.find('.flv') > -1 or path.find('.mp4')):
                     type = 'BREAK'
                     try:
                         video_id = path.strip('/').split('/')[-1]

And here's my Squid ACL's.

acl videocache_allow_url url_regex -i [a-z0-9][0-9a-z][0-9a-z]?[0-9a-z]?[0-9a-z]?\\.xtube\\.com\\/(.*)flv
acl videocache_allow_url url_regex -i \\.files\\.youporn\\.com\\/(.*)\\/(.*)flv\\/
acl videocache_allow_url url_regex -i \\.last\\.fm\\/serve\\/flv\\/(.*)\\.flv
acl videocache_allow_url url_regex -i \\.mais\\.uol\\.com\\.br\\/(.*)\\.flv
acl videocache_allow_url url_regex -i \\.msn\\.com\\.edgesuite\\.net\\/(.*)\\.flv
acl videocache_allow_url url_regex -i \\.video[a-z0-9]?[a-z0-9]?\\.blip\\.tv\\/(.*)\\.(flv|avi|mov|mp3|m4v|mp4|wmv|rm|ram)
acl videocache_allow_url url_regex -i \\.video\\.msn\\.com\\/(.*)\\.flv
acl videocache_allow_url url_regex -i a\\.ec\\.viddler\\.com\\/(.*)\\.flv
acl videocache_allow_url url_regex -i bitcast\\.vimeo\\.com\\/vimeo\\/v\\/
acl videocache_allow_url url_regex -i buzznet-[0-9]?[0-9]\\.vo\\.llnwd\\.net\\/(.*)\\.flv
acl videocache_allow_url url_regex -i c[a-z0-9][0-9a-z][0-9a-z]\\.redtube\\.com\\/_videos_(.*)flv
acl videocache_allow_url url_regex -i cdn\\.buzznet\\.com\\/(.*)\\.flv$
acl videocache_allow_url url_regex -i cdnll-[0-9]\\.liveleak\\.com\\/(.*)\\.flv
acl videocache_allow_url url_regex -i media[0-9]\\.break\\.com\\/(.*)\\.(flv|mp4)
acl videocache_allow_url url_regex -i media[a-z0-9]?[a-z0-9]?[a-z0-9]?\\.tube8\\.com\\/ mobile[a-z0-9]?[a-z0-9]?[a-z0-9]?\\.tube8\\.com\\/
acl videocache_allow_url url_regex -i msnbcecn\\.vo\\.llnwd\\.net\\/(.*)\\.flv
acl videocache_allow_url url_regex -i proxy[a-z0-9\\-][a-z0-9][a-z0-9][a-z0-9]?\\.dailymotion\\.com\\/
acl videocache_allow_url url_regex -i snfs(.*)\\/(yp|videosearch)\\/(.*)\\.flv
acl videocache_allow_url url_regex -i va\\.wrzuta\\.pl\\/wa[0-9][0-9][0-9][0-9]?
acl videocache_allow_url url_regex -i vid\\.akm\\.dailymotion\\.com\\/

I hope this is of use to someone.

1 Answers

by last1ghost1 on 27 Jul 2009

How can i patch my videocache ot test it