#!/usr/bin/env python import sys import os import urllib from PIL import Image from StringIO import StringIO """ Author : flo date : 2016.07.18 Purpose : example of parsing url from html and image download Comments: python webparserww.txt website : http://www.blab.com/blab/blab/blab """ try: import requests except ImportError as err: print "yoyoyo, Missing Plugin, download requests dude." exit(1) try: from bs4 import BeautifulSoup except ImportError as err: print "yoyoyo, Missing Plugin, download BeautifulSoup version 4 dude." exit(1) def getdata(argv): url = sys.argv[1] fname = url.split('/')[-1] a , b = urllib.urlretrieve(url, fname) fp = os.getcwd() fp = fp+'/'+fname if os.path.isfile(fp): if sys.platform == "win32": f = open(fp, 'r') data = f.read() tmplist = parsedata(data) f.close() os.remove(fp) os.mkdir(fname) os.chdir(fname) download(tmplist) else: f = open(fp, 'r') data = f.read() tmplist = parsedata(data) f.close() os.remove(fp) os.mkdir(fname) os.chdir(fname) download(tmplist) else: print "Something wrong with your internet or their website, file not downloaded: " + url + "\n" print "Run this again, maybe." exit(1) def parsedata(d): listimg, links, flinks = [], [], [] soup = BeautifulSoup(d, "lxml") for a in soup.find_all('a'): listimg.append(a.get('href')) for a in listimg: if 'jpg' in a: links.append(a) # splitting the ? from url for a in links: mess = a.split('?') flinks.append(mess[0]) return flinks def download(ilist): for img in ilist: iname = img.split('/')[-1] r = requests.get(img) sys.stdout.write('.') i = Image.open(StringIO(r.content)) i.save(iname) sys.stdout.flush() print "\nDone" def main(): argv = "" getdata(argv) # added proper main function if __name__ == "__main__": main()