爬取京东手机信息:品牌、型号、总评数量、好评数量、中评数量、差评数量。并保存为CSV格式文件
只有首页的数据
import requests from bs4 import BeautifulSoup import csv import codecs #import pandas as pd header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.89 Safari/537.36', } url="https://search.jd.com/Search?keyword=%E6%89%8B%E6%9C%BA&enc=utf-8&wq=%E6%89%8B%E6%9C%BA&pvid=5282edb11fad4ce1ad0214c0d89a0031" html = requests.get(url=url,headers=header) soup = BeautifulSoup(html.content,'html.parser') items = soup.select('li.gl-item') #print(items) results = [] for item in items: id = item.find('div', class_='p-focus').find('a')['data-sku'] name = item.find("div",class_="p-name p-name-type-2").find("em").text commit_url = "https://sclub.jd.com/comment/productPageComments.action?productId="+str(id)+"&sortType=5&score=0&page=0&pageSize=10" commit_count = requests.get(commit_url).json()["productCommentSummary"]["commentCountStr"] good_commit = requests.get(commit_url).json()["productCommentSummary"]["goodCountStr"] general_commit = requests.get(commit_url).json()["productCommentSummary"]["generalCountStr"] poor_commit = requests.get(commit_url).json()["productCommentSummary"]["poorCountStr"] results.append([name,id,commit_url,commit_count,good_commit,general_commit,poor_commit]) #print(results) #return results for result in results: print(result) with open('write.csv', 'w', newline='', encoding='utf8') as csv_file: # a+:追加 w+:覆盖添加 csv_writer = csv.writer(csv_file) for list in results: csv_writer.writerow(list) csv_file.close()
浙公网安备 33010602011771号