import requests
from lxml import etree
import re
import csv
from concurrent.futures import ThreadPoolExecutor
def getOnePageData(url, writer):
resp = requests.get(url)
html = etree.HTML(resp.text)
trs = html.xpath('/html/body/div[2]/div[4]/div[1]/table//tr')[1:]
for tr in trs:
text = tr.xpath('.//text()')
text = [re.sub(r'\\|/', '', item) for item in text]
writer.writerow(text)
print(text)
def main():
f = open('data.csv', 'w', newline='')
writer = csv.writer(f)
with ThreadPoolExecutor(10) as t:
for i in range(10):
url = "http://xinfadi.com.cn/marketanalysis/0/list/{0}.shtml".format(i+1)
t.submit(getOnePageData, url, writer)
if __name__ == "__main__":
main()