import csv
import urllib.request
import json
import time
import xlwt
# ======》爬取评论信息《=======#
end_page = int(input('请输入爬取的结束页码:'))
for i in range(0, end_page + 1):
print('第%s页开始爬取------' % (i + 1))
url = 'https://club.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98&productId=100038004389&score=0&sortType=5&page={}&pageSize=10&isShadowSku=0&fold=1'
url = url.format(i)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36',
'Referer': 'https://item.jd.com/100026796992.html'
}
request = urllib.request.Request(url=url, headers=headers)
content = urllib.request.urlopen(request).read().decode('gbk')
content = content.strip('fetchJSON_comment98vv385();')
obj = json.loads(content)
comments = obj['comments']
fp = open('苹果.json', 'a', encoding='gbk')
for comment in comments:
id=comment['id']
guid=comment['guid']
# 评论内容
content = comment['content']
# 评论时间
creationTime = comment['creationTime']
isTop=comment['isTop']
referenceTime=comment['referenceTime']
firstCategory=comment['firstCategory']
secondCategory=comment['secondCategory']
thirdCategory=comment['thirdCategory']
replyCount=comment['replyCount']
score=comment['score']
# 评论人
nickname = comment['nickname']
userClient= comment['userClient']
productColor= comment['productColor']
productSize= comment['productSize']
# 会员等级
userLevelName = comment['plusAvailable']
if userLevelName == "201":
userLevelName = "PLUS会员"
elif userLevelName == "203":
userLevelName = "金牌会员"
elif userLevelName == "103":
userLevelName = "普通用户"
elif userLevelName == "0":
userLevelName = "无价值用户"
else:
userLevelName = "银牌会员"
# userLevelName= comment['user-level']
plusAvailable= comment['plusAvailable']
productSales= comment['productSales']
userClientShow ="京东客户端"
# userClientShow= comment['userClientShow']
# isMobile= comment['isMobile']
# 移动端PC端
isMobile = comment['userClient']
if isMobile == "4":
isMobile = "移动端"
else:
isMobile = "PC端"
days= comment['days']
afterDays= comment['afterDays']
# 写入文件
with open('comments_jd2.csv', 'a', encoding='utf8') as csv_file:
rows = (id,guid,content,creationTime,isTop,referenceTime,firstCategory,secondCategory,thirdCategory,replyCount,score,nickname,userClient,productColor,productSize, userLevelName,plusAvailable,productSales, userClientShow,isMobile,days,afterDays)
writer = csv.writer(csv_file)
writer.writerow(rows)
print('第%s页完成----------' % (i + 1))
time.sleep(4)
fp.close()