04 2023 档案

摘要:# -*- coding: utf-8 -*- # 代码12-1 评论去重的代码 import pandas as pd import re import jieba.posseg as psg import numpy as np # 去重,去除完全重复的数据 reviews = pd.read_ 阅读全文
posted @ 2023-04-21 10:52 昌king 阅读(38) 评论(0) 推荐(0)
摘要:def count107(i): #自定义统计函数 j = i[['fullURL']][i['fullURLId'].str.contains('107')].copy() # 找出类别包含107的网址 j['type'] = None # 添加空列 j['type'][j['fullURL']. 阅读全文
posted @ 2023-04-05 11:23 昌king 阅读(41) 评论(0) 推荐(0)