fasttext的使用,预料格式,调用方法
数据格式:分词后的句子+\t__label__+标签

fasttext_model.py
from fasttext import FastText
import numpy as np
def get_data_path(by_word=True,train=True):
    if by_word:
        return "./classify/data_by_word_train.txt" if train else "./classify/data_by_word_test.txt"
    else:
        return "./classify/data_train.txt" if train else "./classify/data_test.txt"
def prepar_model():
    data_path = get_data_path(by_word=True,train=True)
    model = FastText.train_supervised(data_path,dim=100,epoch=20,wordNgrams=2)
    model.save_model("./fasttext_model/classify_by_word_100_20_2.model")
def ceshi_model():
    model = FastText.load_model("./fasttext_model/classify_by_word_100_20_2.model")
    test_data_path = get_data_path(by_word=True, train=False)
    sentences = []
    labels = []
    for line in open(test_data_path,encoding="utf-8").readlines():
        line = line.strip()
        temp_ret = line.split("\t")
        if len(temp_ret)==2:
            sentences.append(temp_ret[0])
            labels.append(temp_ret[1])
    ret = model.predict(sentences)[0]
    ret = [i[0] for i in ret]
    acc = np.mean([1 if labels[i] == ret[i] else 0 for i in range(len(labels))])
    print(acc)
if __name__ == '__main__':
    prepar_model()
    ceshi_model()
    多思考也是一种努力,做出正确的分析和选择,因为我们的时间和精力都有限,所以把时间花在更有价值的地方。

                
            
        
浙公网安备 33010602011771号