# -*- coding:utf8 -*-from bs4 import BeautifulSoupimport os, sys, urllib2, urllibimport thread, threadingclass downloader(threading.Thread): """docstring for downloader""" def __init__(self, url, name): threading.Thread.__init__(self) self.url = url self.name = name def run(self): print 'downling from %s' % self.url urllib.urlretrieve(self.url, self.name)threads=[]def page_loop(page=1): url = 'http://www.beautylegmm.com/Tiara/beautyleg-936.html?page=%s' % page content = urllib2.urlopen(url) soup = BeautifulSoup(content) my_girl = soup.find_all('img') global x # 加入结束检测,写的不好.... if len(my_girl) <5: print '已经全部抓取完毕' sys.exit(0) print '开始抓取' for girl in my_girl: link = girl.get('src') if 'jpg' in link: flink = 'http://www.beautylegmm.com' + link print flink path = 'dbmeizi'+'/' + str(x) + flink[-4:] x = x + 1 t = downloader(flink, path) threads.append(t) t.start() # content2 = urllib2.urlopen(flink).read() # with open('dbmeizi'+'/' + str(x) + flink[-4:],'wb') as code: # code.write(content2) page = int(page) + 1 print '开始抓取下一页' print 'the %s page' % page page_loop(page) x = 1page_loop()