1 # 多线程爬虫
2 # map函数的使用
3 # from multiprocessing.dummy import Pool
4 # pool=Pool(4)
5 # results = pool.map(爬取函数,网址列表)
6 # 实例演示:
7 from multiprocessing.dummy import Pool as ThreadPool
8 import requests
9 import time
10
11 def getsource(url):
12 html = requests.get(url)
13
14 urls = []
15
16 for i in range(1,21):
17 newpage = 'http://tieba.baidu.com/p/3522395718?pn=' + str(i)
18 urls.append(newpage)
19
20 time1 = time.time()
21
22 for i in urls:
23 print(i)
24 getsource(i)
25 time2= time.time()
26 print('单线程耗时:' + str(time2-time1))
27
28 # 启用多线程
29 pool = ThreadPool(4)
30 time3=time.time()
31 results = pool.map(getsource,urls)
32 pool.close()
33 pool.join()
34 time4 = time.time()
35 print('并行耗时:' + str(time4-time3))
36
37 # 输出结果:
38 # 单线程耗时:20.18715476989746
39 # 并行耗时:5.100291728973389