修炼爱情评论分析
前言
最近听林俊杰的歌听的比较入迷,然后看他们在歌底下的评论,带入感十分的强,眼泪已经留下来了... 😭
于是就想着把评论都弄下来,然后分析一波,看看大家伙听这首歌的时候都是啥感情🤔
🆗,开干!!!

获取修炼爱情的评论数据
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import json
import time
import datetime
import random
import csv
def open_page():
url = "https://music.163.com/#/song?id=400875269"
driver = webdriver.Chrome()
driver.get(url) # 先进行第一次get,获取最初的cookie
driver.delete_all_cookies()
with open("cookie.txt", 'r', encoding='ANSI')as f: # 获取登陆的cookie
cookie_str = f.read()
cookie_list = json.loads(cookie_str)
for cookie in cookie_list:
cookie.pop(
"sameSite") # 解决selenium.common.exceptions.InvalidArgumentException: Message: invalid argument: invalid 'sameSite'问题
driver.add_cookie(cookie)
driver.get(url)
return driver
def deal_comment(l):
if len(l) == 3: # 昵称以及评论 数据优化
l3 = l[0].split(":")
comment = ""
for i in range(len(l3) - 1):
comment += l3[i + 1]
del l[:-2]
l.append(l3[0])
l.append(comment)
# print(l)
else:
str = ",".join(l[:-2])
l3 = str.split(":")
comment = ""
for i in range(len(l3) - 1):
comment += l3[i + 1]
del l[:-2]
l.append(l3[0])
l.append(comment)
# print(l)
star_num = l[1][1:-4] # 处理点赞数
if star_num == '':
l[1] = '0'
else:
l[1] = star_num
# print(l)
if len(l[0]) == 5:
l[0] = time.strftime("%Y年%m月%d日", time.localtime())
if len(l[0]) == 7:
today = datetime.date.today()
yesterday = today - datetime.timedelta(days=1)
l[0] = yesterday.strftime("%Y年%m月%d日")
if l[0][1] == '月':
l[0] = "2021年" + l[0][:-6]
if l[0][2] == '月':
l[0] = "2021年" + l[0][:-6]
# print(l)
return l
def get_comments_data(driver):
global maxline
global roll
wait = WebDriverWait(driver, 5)
driver.switch_to.frame("contentFrame")
comment_list = []
for i in range(100):
print("正在爬取第{0}页评论...".format(i+1))
if i == 0:
roll = 45
else:
roll = 30
for k in range(roll): # 缓慢向下滚动页面 4466px 2685px
driver.execute_script('window.scrollBy(0,{0})'.format(random.randint(100, 150)))
time.sleep(random.uniform(0.1, 0.3))
if i > 0:
maxline = 21
else:
maxline = 36
for i in range(1, maxline):
Name_comment = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@class="m-cmmt"]/div[2]/div[{0}]/div[2]/div[1]'.format(i)))).text.split()
Date = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@class="m-cmmt"]/div[2]/div[{0}]/div[2]/div[last()]'.format(i)))).text.split("\n")
comment = Name_comment+Date
comment_list.append(deal_comment(comment))
next_btn = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@class="m-cmmt"]/div[3]/div/a[last()]')))
next_btn.click()
driver.execute_script("document.documentElement.scrollTop=0")#滚动到顶部
return comment_list
def save_to_csv(comment_list):
with open('修炼爱情评论.csv', 'w', newline='', encoding='utf-8-sig') as f:
csv_writer = csv.writer(f)
csv_writer.writerow(["评论日期", "点赞数", "昵称", "评论"])
csv_writer.writerows(comment_list)
if __name__ == '__main__':
driver = open_page()
comment_list = get_comments_data(driver)
save_to_csv(comment_list)
print("评论爬取以及存储完成!!!")
# for comment in comment_list:
# print(comment)
driver.quit()
调用百度接口对评论进行情感分析
import re
import requests
import json
import csv
import time
# client_id = '你的key'
# client_secret = '你的secret key'
# host = 'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={0}&client_secret={1}'.format(client_id, client_secret)
# response = requests.get(host)
# if response:
# print(response.json()) #返回的json数据中有access_token 数据
def get_emotion(data):
token = '你获取的token'
url = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/sentiment_classify?charset=UTF-8&access_token={}'.format(token)
new_each = {'text': data} # 将文本数据保存在变量new_each中,data的数据类型为string
new_each = json.dumps(new_each)
res = requests.post(url, data=new_each) # 利用URL请求百度情感分析API
res_text = res.text # 保存分析得到的结果,以string格式保存
# print("content: ", res_text)
result = res_text.find('items') # 查找得到的结果中是否有items这一项
if result != -1: # 如果结果不等于-1,则说明存在items这一项
json_data = json.loads(res.text)
negative = (json_data['items'][0]['negative_prob']) # 得到消极指数值
positive = (json_data['items'][0]['positive_prob']) # 得到积极指数值
# print("positive:", positive)
# print("negative:", negative)
if positive > negative: # 如果积极大于消极,则返回1
return 1
elif positive == negative: # 如果消极等于积极,则返回2
return 2
else:
return 0 # 否则,返回0
else:
return "分析失败"
if __name__ == "__main__":
with open("F:\\python_work\\修炼爱情评论.csv", 'r', encoding='utf-8-sig') as f:
reader = csv.reader(f)
comment_list = [row[3] for row in reader]
# print(comment_list)
res_list = []
with open("F:\\python_work\\修炼爱情评论情感值.csv", 'w', newline='', encoding="utf-8") as f:
for i in range(len(comment_list)-1):
print("正在解析第{0}条评论".format(i+1))
res_list.append(str(get_emotion(comment_list[i+1])))
time.sleep(1.1)
csv_writer = csv.writer(f)
csv_writer.writerow(["评论情感分析(1为正向情绪,0为负面情绪)"])
csv_writer.writerows(res_list)

将数据可视化
写在最后
看到饼图上的积极消极评论占比,是积极赢了,看大家伙都是积极向上的好青年嘛!😍
然后看到最后的词云上的几个大字,拼起来就是‘我喜欢你’🤡
卧槽,小丑竟然是我自己
我也想要甜甜的恋爱啊啊啊啊啊啊啊啊


我也想要甜甜的恋爱啊啊啊啊啊
浙公网安备 33010602011771号