向量转libsvm输入格式

 保存向量的文本中的元素用“,”分割开,前3077是pos,后3077是neg,打上标签、打乱、转换格式。供libsvm分类。

 

 1 import numpy as np
 2 f1 = open('/home/xingshuo/files/lmy/Tensorflow-AutoEncoder-master/out.txt', 'r')
 3 lines = f1.readlines()
 4 f1.close()
 5 f2 = open("/home/xingshuo/files/lmy/corpus/libsvm/vector_label.txt",'w+')
 6 n = 1
 7 for li in lines:
 8         if n <= 3077:
 9             f2.write(str(1)+','+li)
10         if n > 3077:
11             f2.write(str(0)+','+li)
12         n += 1
13 f2.close()
14 f3 = open("/home/xingshuo/files/lmy/corpus/libsvm/vector_label.txt",'r')
15 lines_f3 = f3.readlines()
16 txtlist = []
17 for li_f3 in lines_f3:
18     li_f3 = li_f3.strip('\n')
19     txtlist.append(li_f3)
20 data = np.array(txtlist)
21 np.random.shuffle(data)
22 np.savetxt("/home/xingshuo/files/lmy/corpus/libsvm/vector_label_1.txt", data, fmt="%s", delimiter="")
23 f4 = open('/home/xingshuo/files/lmy/corpus/libsvm/vector_label_1.txt', 'r')
24 output = open('/home/xingshuo/files/lmy/corpus/libsvm/libsvm.txt', 'w+')
25 try:
26     line = f4.readline()
27     while line:
28         line = line.strip('\n')
29         index = 0
30         output_line = ''
31         for sub_line in line.split(','):
32             if index == 0:
33                output_line = sub_line
34             if index != 0:
35                the_text = '\t' + str(index) + ':' + sub_line
36                output_line = output_line+the_text
37             index = index + 1
38         output_line = output_line + '\n'
39         output.write(output_line)
40         line = f4.readline()
41 finally:
42     f4.close()

 

posted on 2017-04-18 09:49  Pod32gleo  阅读(538)  评论(0)    收藏  举报

导航