Python 爬取煎蛋网妹子图片

 1 #!/usr/bin/env python
 2 # -*- coding: utf-8 -*-
 3 # @Date    : 2017-08-24 10:17:28
 4 # @Author  : EnderZhou (zptxwd@gmail.com)
 5 # @Link    : http://www.cnblogs.com/enderzhou/
 6 # @Version : $Id$
 7 
 8 import requests
 9 from bs4 import BeautifulSoup as bs
10 import threading
11 import Queue
12 import urllib
13 
14 class jiandan_ooxx(threading.Thread):
15     def __init__(self,queue):
16         threading.Thread.__init__(self)
17         self._queue = queue
18 
19     def run(self):
20         while not self._queue.empty():
21             url = self._queue.get_nowait()
22             self.spider(url)
23 
24     def spider(self,url):
25         r = requests.get(url = url)
26         soup = bs(r.content,'html.parser')
27         imges = soup.find_all(name='img',attrs={})
28         lists = []
29         for i in imges:
30             if 'border' in str(i):
31                 continue
32             elif 'onload' in str(i):
33                 lists.append(i['org_src'])
34                 print i['org_src']
35                 img = 'http:' + i['org_src']
36             else:
37                 lists.append(i['src'])
38                 print i['src']
39                 img = 'http:' + i['src']
40             name = img.split('/')[-1]
41             urllib.urlretrieve(img,filename=name)
42 
43 def main(number):
44     url = 'http://jandan.net/ooxx/page-'
45     headers = {}
46     queue = Queue.Queue()
47 
48     # 此处由最新页面开始爬取，默认爬取最新10页的图片，把number-11改成0即可爬取全部页面的图片。
49     for i in xrange(number,number-11,-1):
50         queue.put(url+str(i))
51     threads = []
52     thread_count = 10
53 
54     for i in range(thread_count):
55         threads.append(jiandan_ooxx(queue))
56 
57     for t in threads:
58         t.start()
59     for t in threads:
60         t.join()
61 
62 if __name__ == '__main__':
63     # 获取最新页码并传入main函数
64     r = requests.get('http://jandan.net/ooxx')
65     soup = bs(r.content,'html.parser')
66     string = soup.find_all(name='span',attrs={'class':'current-comment-page'})
67     number = int(string[1].string[1:-1]) 
68     main(number)
posted on 2017-08-24 12:39 武诚治阅读(270) 评论(0) 收藏举报
刷新页面返回顶部
EnderZhou

Python 爬取煎蛋网妹子图片

导航

公告