利用线程池爬虫


from multiprocessing.dummy import Pool
from lxml import etree
import requests
urls = ['http://wz.sun0769.com/political/index/politicsNewest?id=1&page=1',
'http://wz.sun0769.com/political/index/politicsNewest?id=1&page=2',
'http://wz.sun0769.com/political/index/politicsNewest?id=1&page=3']


def get_request(url):
response = requests.get(url=url)
page_text = response.text
tree = etree.HTML(page_text)
li_list = tree.xpath('/html/body/div[2]/div[3]/ul[2]/li')
for li in li_list:
No = li.xpath('./span[1]//text()')[0]
statu = li.xpath('./span[2]//text()')[0]
pro = li.xpath('./span[3]//text()')[0]
time = li.xpath('./span[5]//text()')[0]
print (No,statu,pro,time)


pool = Pool(3) # 启动了三个线程
pool.map(get_request, urls)    #参数1:回调函数参数2:可迭代的对象,alist
 作用:可以将urls中的每一个元素依次传递给回调函数作为参数,然后回调函数会异步
 对列表中的元素进行相关操作运算
posted @ 2020-05-30 15:28  大魔头的取经故事  阅读(193)  评论(0编辑  收藏  举报