from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor
import requests
from lxml import etree
import csv
f = open('newdata5.csv', mode='w', encoding='utf-8')
csvwrite = csv.writer(f)
def one_page(url):
response=requests.get(url)
response.encoding='uft-8'
demo = response.text
html=etree.HTML(demo)
table=html.xpath('/html/body/div[2]/div[4]/div[1]/table')[0]
trs=table.xpath('./tr')[1:]
for tr in trs:
tu=tr.xpath('./td/text()')
csvwrite.writerow(tu)
print(tu)
print(url,'保存完毕')
if __name__ == '__main__':
with ThreadPoolExecutor(50) as t:#几个线程?
for i in range(50):#爬取几个?
t.submit(one_page,"http://www.xinfadi.com.cn/marketanalysis/0/list/{}.shtml".format(i))
print(str(i)+"条爬取结束.........")