对测试集进行测试,只提供了思路,程序是不能用的

 1 from sklearn.externals import joblib
 2 import pandas as pd
 3 import numpy
 4 from sklearn.preprocessing import OneHotEncoder
 5 #import link_and_train
 6 #拼接测试集,测试集进行one-hot编码
 7 onehot = OneHotEncoder()
 8 addata = pd.read_csv("adFeature.csv")
 9 testdata = pd.read_csv("test1.csv")
10 userdata = pd.read_csv("userFeature.data")
11 data = pd.merge(testdata,userdata)
12 data = pd.merge(data,addata)
13 
14 data.to_csv("predict_data.csv",index=False)
15 
16 userfeature = ["age", "carrier", "consumptionAbility", "ct", "education", "gender", "house", "interest1",
17                    "interest2", "interest3", "interest4", "interest5", "kw1", "kw2", "kw3", "marriageStatus", "os",
18                    "topic1", "topic2", "topic3", "LBS", "appIdAction", "appIdInstall", "campaignId", "creativeId",
19                    "creativeSize", "adCategoryId", "advertiserId", "productId", "productType"]
20     # for index in data[feature] :
21 userdata = []
22 for index in range(len(data["uid"])):
23     feature_li = []
24     for feature in userfeature:
25             # a = data[feature]
26             # print(a[0],type(a[index]),isinstance(a[0],(numpy.int64)))
27 
28         if isinstance(data[feature][index], numpy.int64):
29             feature_li.append(int(data[feature][index]))
30         elif isinstance(data[feature][index], numpy.float64):
31             feature_li.append(0)  # 缺失值用0填充,这是不合理的,有待改进
32         elif isinstance(data[feature][index], numpy.float):
33                 feature_li.append(0)
34         else:
35             trans = data[feature][index].strip().split(" ")
36             trans = map(int, trans)
37             trans = sorted(trans)
38             # print(trans)
39             s = 0
40             for num in trans:
41                 s += num
42             feature_li.append(s)
43         # print(feature_li)
44     userdata.append(feature_li)
45 userdata = numpy.array(userdata)
46 onehot.fit(userdata)
47 print("--------------------------------------------------------------------")
48 print("--------------------------------------------------------------------")
49 test = onehot.transform(userdata)
50 
51 print(test)
52 print(numpy.shape(test))
53 
54 print("开始预测。。。")
55 for one in range(19000) :
56     for xx in range(1,155):
57         model = joblib.load("%d.model"%(xx))
58         result = model.predict(test[one])
59         s1 = 0
60         s2 = 0
61         if result == -1:
62             s1+=1
63         elif result == 1:
64             s2+=1
65     s = 0
66     if s1 > s2 :
67         print(one,"uid:",data["uid"][one],"aid:",data["aid"][one],"result:",s1/114)
68     elif s2>s1 :
69         print(one, "uid:", data["uid"][one], "aid:", data["aid"][one], "result:", s2/114)
70     else:
71         print("impossible")

 

posted @ 2018-04-25 11:31  我想休息  阅读(403)  评论(0编辑  收藏  举报