import requests
from bs4 import BeautifulSoup
import openpyxl
wb=openpyxl.Workbook()
sheet=wb.active
sheet.title='product1'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'}
for x in range(20):
res = requests.get("https://promiseedental.en.made-in-china.com/product-list-"+str(x)+".html",headers=headers,timeout=120)#原始地址
soup = BeautifulSoup(res.text,"html.parser")
item_all = soup.find_all('div',class_="prod-image")#获取原始地址中每个产品的详情链接
for item1 in item_all:
try:
url_p = item1.find("a")["href"]#详情链接
res1 = requests.get(url_p)
soup1 = BeautifulSoup(res1.text,"html.parser")
item2 = soup1.find_all('div',class_="swiper-wrapper")
for item3 in item2:
item3_str = str(item3)
lst = item3_str.split("<")
sheet.append(lst)
print("ok")
except Exception as e:
print('---->', e)
wb.save('product.xlsx')