#!/usr/bin/env python import sys import os from PIL import Image from StringIO import StringIO from urlparse import urlparse """ Author : flo date : 2016.07.27 Purpose : example of parsing url from html and image download Comments: python webparserms.py website : works only on a few website without a html file in the url http://www.blab.com/blab/blab/blab/blab/ """ try: import requests except ImportError as err: print "yoyoyo, Missing Plugin, download requests dude." exit(1) try: from bs4 import BeautifulSoup except ImportError as err: print "yoyoyo, Missing Plugin, download BeautifulSoup version 4 dude." exit(1) def getdata(): sess = requests.Session() url = sys.argv[1] data = sess.get(url, verify=True) sess.close() mdir = "" o = urlparse(url) paths = o.path.split('/') for p in paths: if p != "": mdir = p if data.ok: if sys.platform == "win32": tmplist = parsedata(data.text, url) os.mkdir(mdir) os.chdir(mdir) download(tmplist) else: tmplist = parsedata(data.text, url) os.mkdir(mdir) os.chdir(mdir) download(tmplist) else: print "Something wrong with the website: " + url + "\n" print "Run this again, maybe." exit(1) def parsedata(data, url): listIMG , links = [], [] soup = BeautifulSoup(data, "lxml") for a in soup.find_all('a'): listIMG.append(a.get('href')) for a in listIMG: if 'jpg' in a: links.append(url+a) return links def download(ilist): for img in ilist: iname = img.split('/')[-1] r = requests.get(img) sys.stdout.write('.') i = Image.open(StringIO(r.content)) i.save(iname) sys.stdout.flush() print "\nDone" def main(): getdata() # added proper main function if __name__ == "__main__": main()