修炼爱情评论分析

前言

最近听林俊杰的歌听的比较入迷,然后看他们在歌底下的评论,带入感十分的强,眼泪已经留下来了... 😭

于是就想着把评论都弄下来,然后分析一波,看看大家伙听这首歌的时候都是啥感情🤔

 

 

         🆗,开干!!!

获取修炼爱情的评论数据

from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import json
import time
import datetime
import random
import csv


def open_page():
    url = "https://music.163.com/#/song?id=400875269"
    driver = webdriver.Chrome()

    driver.get(url)  # 先进行第一次get,获取最初的cookie
    driver.delete_all_cookies()

    with open("cookie.txt", 'r', encoding='ANSI')as f:  # 获取登陆的cookie
        cookie_str = f.read()
    cookie_list = json.loads(cookie_str)
    for cookie in cookie_list:
        cookie.pop(
            "sameSite")  # 解决selenium.common.exceptions.InvalidArgumentException: Message: invalid argument: invalid 'sameSite'问题
        driver.add_cookie(cookie)
    driver.get(url)
    return driver


def deal_comment(l):
    if len(l) == 3:  # 昵称以及评论 数据优化
        l3 = l[0].split(":")
        comment = ""
        for i in range(len(l3) - 1):
            comment += l3[i + 1]
        del l[:-2]
        l.append(l3[0])
        l.append(comment)
        # print(l)
    else:
        str = ",".join(l[:-2])
        l3 = str.split(":")
        comment = ""
        for i in range(len(l3) - 1):
            comment += l3[i + 1]
        del l[:-2]
        l.append(l3[0])
        l.append(comment)
        # print(l)

    star_num = l[1][1:-4]  # 处理点赞数
    if star_num == '':
        l[1] = '0'
    else:
        l[1] = star_num
    # print(l)

    if len(l[0]) == 5:
        l[0] = time.strftime("%Y年%m月%d日", time.localtime())
    if len(l[0]) == 7:
        today = datetime.date.today()
        yesterday = today - datetime.timedelta(days=1)
        l[0] = yesterday.strftime("%Y年%m月%d日")
    if l[0][1] == '月':
        l[0] = "2021年" + l[0][:-6]
    if l[0][2] == '月':
        l[0] = "2021年" + l[0][:-6]

    # print(l)

    return l


def get_comments_data(driver):
    global maxline
    global roll
    wait = WebDriverWait(driver, 5)
    driver.switch_to.frame("contentFrame")
    comment_list = []
    for i in range(100):
        print("正在爬取第{0}页评论...".format(i+1))
        if i == 0:
            roll = 45
        else:
            roll = 30
        for k in range(roll):  # 缓慢向下滚动页面 4466px 2685px
            driver.execute_script('window.scrollBy(0,{0})'.format(random.randint(100, 150)))
            time.sleep(random.uniform(0.1, 0.3))
        if i > 0:
            maxline = 21
        else:
            maxline = 36
        for i in range(1, maxline):
            Name_comment = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@class="m-cmmt"]/div[2]/div[{0}]/div[2]/div[1]'.format(i)))).text.split()
            Date = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@class="m-cmmt"]/div[2]/div[{0}]/div[2]/div[last()]'.format(i)))).text.split("\n")
            comment = Name_comment+Date
            comment_list.append(deal_comment(comment))
        next_btn = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@class="m-cmmt"]/div[3]/div/a[last()]')))
        next_btn.click()
        driver.execute_script("document.documentElement.scrollTop=0")#滚动到顶部
    return comment_list


def save_to_csv(comment_list):
    with open('修炼爱情评论.csv', 'w', newline='', encoding='utf-8-sig') as f:
        csv_writer = csv.writer(f)
        csv_writer.writerow(["评论日期", "点赞数", "昵称", "评论"])
        csv_writer.writerows(comment_list)


if __name__ == '__main__':
    driver = open_page()
    comment_list = get_comments_data(driver)
    save_to_csv(comment_list)
    print("评论爬取以及存储完成!!!")
    # for comment in comment_list:
    #     print(comment)
    driver.quit()

调用百度接口对评论进行情感分析

import re
import requests
import json
import csv
import time
# client_id = '你的key'
# client_secret = '你的secret key'
# host = 'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={0}&client_secret={1}'.format(client_id, client_secret)
# response = requests.get(host)
# if response:
#     print(response.json())  #返回的json数据中有access_token 数据


def get_emotion(data):
    token = '你获取的token'
    url = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/sentiment_classify?charset=UTF-8&access_token={}'.format(token)
    new_each = {'text': data}  # 将文本数据保存在变量new_each中,data的数据类型为string
    new_each = json.dumps(new_each)
    res = requests.post(url, data=new_each)  # 利用URL请求百度情感分析API
    res_text = res.text  # 保存分析得到的结果,以string格式保存
    # print("content: ", res_text)
    result = res_text.find('items')  # 查找得到的结果中是否有items这一项
    if result != -1:  # 如果结果不等于-1,则说明存在items这一项
        json_data = json.loads(res.text)
        negative = (json_data['items'][0]['negative_prob'])  # 得到消极指数值
        positive = (json_data['items'][0]['positive_prob'])  # 得到积极指数值
        # print("positive:", positive)
        # print("negative:", negative)
        if positive > negative:  # 如果积极大于消极,则返回1
            return 1
        elif positive == negative:  # 如果消极等于积极,则返回2
            return 2
        else:
            return 0  # 否则,返回0
    else:
        return "分析失败"


if __name__ == "__main__":
    with open("F:\\python_work\\修炼爱情评论.csv", 'r', encoding='utf-8-sig') as f:
        reader = csv.reader(f)
        comment_list = [row[3] for row in reader]
        # print(comment_list)
    res_list = []
    with open("F:\\python_work\\修炼爱情评论情感值.csv", 'w', newline='', encoding="utf-8") as f:
        for i in range(len(comment_list)-1):
            print("正在解析第{0}条评论".format(i+1))
            res_list.append(str(get_emotion(comment_list[i+1])))
            time.sleep(1.1)
        csv_writer = csv.writer(f)
        csv_writer.writerow(["评论情感分析(1为正向情绪,0为负面情绪)"])
        csv_writer.writerows(res_list)

将数据可视化

写在最后 

看到饼图上的积极消极评论占比,是积极赢了,看大家伙都是积极向上的好青年嘛!😍

然后看到最后的词云上的几个大字,拼起来就是‘我喜欢你’🤡

卧槽,小丑竟然是我自己

我也想要甜甜的恋爱啊啊啊啊啊啊啊啊

 

posted @ 2021-06-18 20:43  Violet_eg  阅读(150)  评论(0)    收藏  举报
Live2D