| #!/usr/bin/env python2
import sys, urllib, urllib2, re, os
def leech_thread(url):
print 'Preparing to leech', urllib.unquote(url)
try:
if url.startswith('http'):
page = urllib2.urlopen(url).read()
else:
page = open(url).read()
except:
raise
print 'Failed to load page'
return
r = re.compile(r'<link>([^<]+\.(?:jpg|gif|png))</link>')
res = r.findall(page)
thread_url = url
for url in res:
f = urllib.unquote(os.path.basename(url))
if len(f) > 255:
f, ext = os.path.splitext(f)
f = f[:255-len(ext)] + ext
if url and not os.access(f, os.F_OK):
print 'Downloading', urllib.unquote(url)
try:
open(f, 'w').write(urllib2.urlopen(urllib2.Request(url)).read())
except KeyboardInterrupt:
os.remove(f)
raise
except Exception as e:
os.remove(f)
print 'Failed to download', url, '(%s)' % str(e)
for url in sys.argv[1:]:
leech_thread(url)
|