sintimental analysis

#Author：Mini
#！/usr/bin/env python
import jieba
import numpy as n
import pymysql
conn = pymysql.connect(host="127.0.0.1", user="root", passwd="wangmianny111", db="galaxy_macau_ad",charset='utf8')

jieba.load_userdict("C:/Users/Administrator/Desktop/tripadvisor_gm/tripadvisor_code_python/galaxy_macau_dict.txt")
jieba.load_userdict("C:/Users/Administrator/Desktop/tripadvisor_gm/tripadvisor_code_python/chinese_sentiment_score/positive_dic.txt")
jieba.load_userdict("C:/Users/Administrator/Desktop/tripadvisor_gm/tripadvisor_code_python/chinese_sentiment_score/negative_dic.txt")

def open_dict(Dict = 'mini', path=r'/Users/apple888/PycharmProjects/Textming/Sent_Dict/Hownet/'):
    path = path + '%s.txt' % Dict
    dictionary = open(path, 'r', encoding='utf-8')
    dict = {}
    for line in dictionary:
        seperate_word = line.strip().split(",")
        num = len(seperate_word)
        for i in range(1, num):
            dict[seperate_word[i]] = seperate_word[i]
    #print (dict)

try:
        for word in dictionary:
            word=word.strip(',')
            jieba.suggest_freq(word, tune=True)  # change the frequency
except:
        print ("memery run out!")

    return dict


def sentiment_score_list(dataset):
    seg_sentence = []
    seg_sentence1 = dataset.split('。')
    for item in seg_sentence1:
        seg_sentence2=item.split('，')
        seg_sentence+=seg_sentence2
    print(seg_sentence)
    return seg_sentence



def judgeodd(num):
    if (num % 2) == 0:
        return 'even'
else:
        return 'odd'

deny_word = open_dict(Dict = 'deny', path= r'C:/Users/Administrator/Desktop/tripadvisor_gm/tripadvisor_code_python/chinese_sentiment_score/')
posdict = open_dict(Dict = 'positive', path= r'C:/Users/Administrator/Desktop/tripadvisor_gm/tripadvisor_code_python/chinese_sentiment_score/')
negdict = open_dict(Dict = 'negative', path= r'C:/Users/Administrator/Desktop/tripadvisor_gm/tripadvisor_code_python/chinese_sentiment_score/')
"""
degree_word = open_dict(Dict = '程度级别词语', path= r'C:/Users/Administrator/Desktop/Textming/')
mostdict = degree_word[degree_word.index('extreme')+1 : degree_word.index('very')]#权重4，即在情感词前乘以4
verydict = degree_word[degree_word.index('very')+1 : degree_word.index('more')]#权重3
moredict = degree_word[degree_word.index('more')+1 : degree_word.index('ish')]#权重2
ishdict = degree_word[degree_word.index('ish')+1 : degree_word.index('last')]#权重0.5
"""
combine_dict = {}
for line in open("C:/Users/Administrator/Desktop/tripadvisor_gm/tripadvisor_code_python/chinese_sentiment_score/synonyms.txt", "r",encoding='utf-8'):
    seperate_word = line.strip().split(",")
    jieba.suggest_freq(seperate_word, tune=True)  # change the frequency
    #print (seperate_word)
num = len(seperate_word)
    #print(num)
for i in range(1, num):
        combine_dict[seperate_word[i]] = seperate_word[0]
        #print (seperate_word[0])
print("loading dic and changing freq finished!")

def sentiment_score_list(dataset):
    print(dataset)

    seg_sentence = []
    seg_sentence1 = dataset.split('。')
    #print(seg_sentence1)
count1 = []
    count2 = []
    for item in seg_sentence1:
        seg_sentence2=item.split('，')
        #print (seg_sentence2)
seg_sentence+=seg_sentence2
    print(seg_sentence)
    #print (len(seg_sentence))


poscount_service1 = 0  # (fist time) caculate the value of this postive word
    #sinsitive_count_service = 0
poscount_service2 = 0  # postive value after considering about the deny words

negcount_service1 = 0
negcount_service2 = 0
score_service = 0  # final positive value
s = 0  # record the sum of number of sinsitive words
for sen in seg_sentence: #traverse each clause of comments
segtmp = jieba.lcut(sen, cut_all=False)  #cut the word, return a list of words
i = 0 #record the being-scanning-word's location
a = 0 #record the being-scanning-sintimental-word's location

        #print(segtmp)
for word in segtmp:
         if word =="demond_show":
          print ("the customer is talking about "+word)
          for word in segtmp:
            print (word)
            if word in posdict:  # if it is a postive word
print("this customer's attitude is positive!")
                poscount_service1 = 5
s+=1
c = 0
for w in segtmp[a:i]:  # scan the words before the sinsitive word
if w in deny_word:
                        c += 1
if judgeodd(c) == 'odd':  # scan deny words
poscount_service1 = 1
poscount_service2 += poscount_service1
                    poscount_service1 = 0

else:
                    poscount_service2 = poscount_service1 + poscount_service2
                    poscount_service1 = 0
a = i + 1  # 情感词的位置变化
print(poscount_service2)
            elif word in negdict:  # 消极情感的分析，与上面一致
negcount_service1 = 1
s+=1
d = 0
for w in segtmp[a:i]:
                    if w in deny_word:
                        d += 1
if judgeodd(d) == 'odd':
                    negcount_service1=5
negcount_service2 += negcount_service1
                    negcount_service1 = 0
#negcount3 = negcount + negcount2 + negcount3

else:
                    negcount_service2 += negcount_service1
                    negcount_service1 = 0
a = i + 1
else:
                pass
i += 1 # 扫描词位置前移
else:
             print("not talking about this certain topic!")
    print("s"+str(s))
    if s==0:
     pass
    else:
     score_service = (poscount_service2 + negcount_service2)/s
     score_service = float('%.1f' % score_service)
     count1.append(score_service)
    # sql = "UPDATE tripadvisor_chinese SET service = '"+score_service+"' WHERE ID = '"+ID+"' ;"
     #conn.query(sql)
     #conn.commit()
    #count2.append(count1)
    #count1 = []
print (count1)
    return score_service
def sentiment_score(senti_score_list):
    print(ID+":senti_score_list:"+str(senti_score_list))
    if senti_score_list==0:
        pass
    else:
     sql = "UPDATE tripadvisor_chinese SET demond_show = '"+str(senti_score_list)+"' WHERE customer_num = '"+str(index)+"' ;"
conn.query(sql)
     conn.commit()
    print("sucess!" )
"""
test1='兔子一号 我中意澳门银河，尤其喜欢银河酒店的房间还有服务，服务特别周到，服务特别好。'
test2='兔子二号 澳门银河的服务一点也不好，很差劲。'
test3='兔子三号 服务不能说不好，也不是很差。' """
"""data_combine=""
for chinese_data in open("C:/Users/Administrator/Desktop/tripadvisor_gm/tripadvisor_code_python/chinese_sentiment_score/tripadvisor_chinese.txt", "r",encoding='utf-8'):
 chinese_comment= chinese_data.strip().split("\n")
 print(chinese_comment)
#data=[test1,test2,test3]
 #data_combine=""

 for comment in chinese_comment:
  print(comment)
  combine_sentence = ""
  words_1 = jieba.cut(comment)
  for word in words_1:
    #print(word)
    if word in combine_dict:
        word = combine_dict[word]
        combine_sentence += word
    else:
        combine_sentence += word
  print(combine_sentence)
  data_combine += combine_sentence+"\n"
  print(data_combine)
f_combine = open("C:/Users/Administrator/Desktop/tripadvisor_gm/tripadvisor_code_python/chinese_sentiment_score/combine_chinese.txt", "a", encoding="utf_8")
f_combine.write(data_combine)
print (data_combine)"""
index=1
for combine_data in open("C:/Users/Administrator/Desktop/tripadvisor_gm/tripadvisor_code_python/chinese_sentiment_score/combine_chinese.txt", "r",encoding='utf-8'):
    seperate_sentice = combine_data.split("\n")
    print(seperate_sentice)
    for item in seperate_sentice:
      if item=="":
          pass
      else:
          ID_list = item.strip().split('\t')
          #for i in ID_list:
           #print("ID_list:"+i)
ID = ID_list[0].replace('"','')
          print("ID"+ID)
          service_score=sentiment_score(sentiment_score_list(item))
          print(sentiment_score(sentiment_score_list(item)))
          print("index:"+str(index))
          index+=1

float('%.1f' % score_service)

posted @ 2018-01-23 15:59 兔子的尾巴_Mini 阅读(403) 评论(0) 收藏举报

刷新页面返回顶部

兔子的尾巴_Mini

sintimental analysis

公告