rss_leech.py

Pasted by snakebite on Fri Nov 5 10:03:26 2010 UTC as Python

Download as plain text — View in fullscreen

#!/usr/bin/env python2

import sys, urllib, urllib2, re, os

def leech_thread(url):
	print 'Preparing to leech', urllib.unquote(url)
	try:
		if url.startswith('http'):
			page = urllib2.urlopen(url).read()
		else:
			page = open(url).read()
	except:
		raise
		print 'Failed to load page'
		return
	r = re.compile(r'<link>([^<]+\.(?:jpg|gif|png))</link>')
	res = r.findall(page)
	thread_url = url
	for url in res:
		f = urllib.unquote(os.path.basename(url))
		if len(f) > 255:
			f, ext = os.path.splitext(f)
			f = f[:255-len(ext)] + ext
		if url and not os.access(f, os.F_OK):
			print 'Downloading', urllib.unquote(url)
			try:
				open(f, 'w').write(urllib2.urlopen(urllib2.Request(url)).read())
			except KeyboardInterrupt:
				os.remove(f)
				raise
			except Exception as e:
				os.remove(f)
				print 'Failed to download', url, '(%s)' % str(e)

for url in sys.argv[1:]:
	leech_thread(url)