#Author:Mini
#!/usr/bin/env python
import jieba
import numpy as n
import pymysql
conn = pymysql.connect(host="127.0.0.1", user="root", passwd="wangmianny111", db="galaxy_macau_ad",charset='utf8')
jieba.load_userdict("C:/Users/Administrator/Desktop/tripadvisor_gm/tripadvisor_code_python/galaxy_macau_dict.txt")
jieba.load_userdict("C:/Users/Administrator/Desktop/tripadvisor_gm/tripadvisor_code_python/chinese_sentiment_score/positive_dic.txt")
jieba.load_userdict("C:/Users/Administrator/Desktop/tripadvisor_gm/tripadvisor_code_python/chinese_sentiment_score/negative_dic.txt")
def open_dict(Dict = 'mini', path=r'/Users/apple888/PycharmProjects/Textming/Sent_Dict/Hownet/'):
path = path + '%s.txt' % Dict
dictionary = open(path, 'r', encoding='utf-8')
dict = {}
for line in dictionary:
seperate_word = line.strip().split(",")
num = len(seperate_word)
for i in range(1, num):
dict[seperate_word[i]] = seperate_word[i]
#print (dict)
try:
for word in dictionary:
word=word.strip(',')
jieba.suggest_freq(word, tune=True) # change the frequency
except:
print ("memery run out!")
return dict
def sentiment_score_list(dataset):
seg_sentence = []
seg_sentence1 = dataset.split('。')
for item in seg_sentence1:
seg_sentence2=item.split(',')
seg_sentence+=seg_sentence2
print(seg_sentence)
return seg_sentence
def judgeodd(num):
if (num % 2) == 0:
return 'even'
else:
return 'odd'
deny_word = open_dict(Dict = 'deny', path= r'C:/Users/Administrator/Desktop/tripadvisor_gm/tripadvisor_code_python/chinese_sentiment_score/')
posdict = open_dict(Dict = 'positive', path= r'C:/Users/Administrator/Desktop/tripadvisor_gm/tripadvisor_code_python/chinese_sentiment_score/')
negdict = open_dict(Dict = 'negative', path= r'C:/Users/Administrator/Desktop/tripadvisor_gm/tripadvisor_code_python/chinese_sentiment_score/')
"""
degree_word = open_dict(Dict = '程度级别词语', path= r'C:/Users/Administrator/Desktop/Textming/')
mostdict = degree_word[degree_word.index('extreme')+1 : degree_word.index('very')]#权重4,即在情感词前乘以4
verydict = degree_word[degree_word.index('very')+1 : degree_word.index('more')]#权重3
moredict = degree_word[degree_word.index('more')+1 : degree_word.index('ish')]#权重2
ishdict = degree_word[degree_word.index('ish')+1 : degree_word.index('last')]#权重0.5
"""
combine_dict = {}
for line in open("C:/Users/Administrator/Desktop/tripadvisor_gm/tripadvisor_code_python/chinese_sentiment_score/synonyms.txt", "r",encoding='utf-8'):
seperate_word = line.strip().split(",")
jieba.suggest_freq(seperate_word, tune=True) # change the frequency
#print (seperate_word)
num = len(seperate_word)
#print(num)
for i in range(1, num):
combine_dict[seperate_word[i]] = seperate_word[0]
#print (seperate_word[0])
print("loading dic and changing freq finished!")
def sentiment_score_list(dataset):
print(dataset)
seg_sentence = []
seg_sentence1 = dataset.split('。')
#print(seg_sentence1)
count1 = []
count2 = []
for item in seg_sentence1:
seg_sentence2=item.split(',')
#print (seg_sentence2)
seg_sentence+=seg_sentence2
print(seg_sentence)
#print (len(seg_sentence))
poscount_service1 = 0 # (fist time) caculate the value of this postive word
#sinsitive_count_service = 0
poscount_service2 = 0 # postive value after considering about the deny words
negcount_service1 = 0
negcount_service2 = 0
score_service = 0 # final positive value
s = 0 # record the sum of number of sinsitive words
for sen in seg_sentence: #traverse each clause of comments
segtmp = jieba.lcut(sen, cut_all=False) #cut the word, return a list of words
i = 0 #record the being-scanning-word's location
a = 0 #record the being-scanning-sintimental-word's location
#print(segtmp)
for word in segtmp:
if word =="demond_show":
print ("the customer is talking about "+word)
for word in segtmp:
print (word)
if word in posdict: # if it is a postive word
print("this customer's attitude is positive!")
poscount_service1 = 5
s+=1
c = 0
for w in segtmp[a:i]: # scan the words before the sinsitive word
if w in deny_word:
c += 1
if judgeodd(c) == 'odd': # scan deny words
poscount_service1 = 1
poscount_service2 += poscount_service1
poscount_service1 = 0
else:
poscount_service2 = poscount_service1 + poscount_service2
poscount_service1 = 0
a = i + 1 # 情感词的位置变化
print(poscount_service2)
elif word in negdict: # 消极情感的分析,与上面一致
negcount_service1 = 1
s+=1
d = 0
for w in segtmp[a:i]:
if w in deny_word:
d += 1
if judgeodd(d) == 'odd':
negcount_service1=5
negcount_service2 += negcount_service1
negcount_service1 = 0
#negcount3 = negcount + negcount2 + negcount3
else:
negcount_service2 += negcount_service1
negcount_service1 = 0
a = i + 1
else:
pass
i += 1 # 扫描词位置前移
else:
print("not talking about this certain topic!")
print("s"+str(s))
if s==0:
pass
else:
score_service = (poscount_service2 + negcount_service2)/s
score_service = float('%.1f' % score_service)
count1.append(score_service)
# sql = "UPDATE tripadvisor_chinese SET service = '"+score_service+"' WHERE ID = '"+ID+"' ;"
#conn.query(sql)
#conn.commit()
#count2.append(count1)
#count1 = []
print (count1)
return score_service
def sentiment_score(senti_score_list):
print(ID+":senti_score_list:"+str(senti_score_list))
if senti_score_list==0:
pass
else:
sql = "UPDATE tripadvisor_chinese SET demond_show = '"+str(senti_score_list)+"' WHERE customer_num = '"+str(index)+"' ;"
conn.query(sql)
conn.commit()
print("sucess!" )
"""
test1='兔子一号 我中意澳门银河,尤其喜欢银河酒店的房间还有服务,服务特别周到,服务特别好。'
test2='兔子二号 澳门银河的服务一点也不好,很差劲。'
test3='兔子三号 服务不能说不好,也不是很差。' """
"""data_combine=""
for chinese_data in open("C:/Users/Administrator/Desktop/tripadvisor_gm/tripadvisor_code_python/chinese_sentiment_score/tripadvisor_chinese.txt", "r",encoding='utf-8'):
chinese_comment= chinese_data.strip().split("\n")
print(chinese_comment)
#data=[test1,test2,test3]
#data_combine=""
for comment in chinese_comment:
print(comment)
combine_sentence = ""
words_1 = jieba.cut(comment)
for word in words_1:
#print(word)
if word in combine_dict:
word = combine_dict[word]
combine_sentence += word
else:
combine_sentence += word
print(combine_sentence)
data_combine += combine_sentence+"\n"
print(data_combine)
f_combine = open("C:/Users/Administrator/Desktop/tripadvisor_gm/tripadvisor_code_python/chinese_sentiment_score/combine_chinese.txt", "a", encoding="utf_8")
f_combine.write(data_combine)
print (data_combine)"""
index=1
for combine_data in open("C:/Users/Administrator/Desktop/tripadvisor_gm/tripadvisor_code_python/chinese_sentiment_score/combine_chinese.txt", "r",encoding='utf-8'):
seperate_sentice = combine_data.split("\n")
print(seperate_sentice)
for item in seperate_sentice:
if item=="":
pass
else:
ID_list = item.strip().split('\t')
#for i in ID_list:
#print("ID_list:"+i)
ID = ID_list[0].replace('"','')
print("ID"+ID)
service_score=sentiment_score(sentiment_score_list(item))
print(sentiment_score(sentiment_score_list(item)))
print("index:"+str(index))
index+=1
float('%.1f' % score_service)