1 from sklearn.externals import joblib
2 import pandas as pd
3 import numpy
4 from sklearn.preprocessing import OneHotEncoder
5 #import link_and_train
6 #拼接测试集,测试集进行one-hot编码
7 onehot = OneHotEncoder()
8 addata = pd.read_csv("adFeature.csv")
9 testdata = pd.read_csv("test1.csv")
10 userdata = pd.read_csv("userFeature.data")
11 data = pd.merge(testdata,userdata)
12 data = pd.merge(data,addata)
13
14 data.to_csv("predict_data.csv",index=False)
15
16 userfeature = ["age", "carrier", "consumptionAbility", "ct", "education", "gender", "house", "interest1",
17 "interest2", "interest3", "interest4", "interest5", "kw1", "kw2", "kw3", "marriageStatus", "os",
18 "topic1", "topic2", "topic3", "LBS", "appIdAction", "appIdInstall", "campaignId", "creativeId",
19 "creativeSize", "adCategoryId", "advertiserId", "productId", "productType"]
20 # for index in data[feature] :
21 userdata = []
22 for index in range(len(data["uid"])):
23 feature_li = []
24 for feature in userfeature:
25 # a = data[feature]
26 # print(a[0],type(a[index]),isinstance(a[0],(numpy.int64)))
27
28 if isinstance(data[feature][index], numpy.int64):
29 feature_li.append(int(data[feature][index]))
30 elif isinstance(data[feature][index], numpy.float64):
31 feature_li.append(0) # 缺失值用0填充,这是不合理的,有待改进
32 elif isinstance(data[feature][index], numpy.float):
33 feature_li.append(0)
34 else:
35 trans = data[feature][index].strip().split(" ")
36 trans = map(int, trans)
37 trans = sorted(trans)
38 # print(trans)
39 s = 0
40 for num in trans:
41 s += num
42 feature_li.append(s)
43 # print(feature_li)
44 userdata.append(feature_li)
45 userdata = numpy.array(userdata)
46 onehot.fit(userdata)
47 print("--------------------------------------------------------------------")
48 print("--------------------------------------------------------------------")
49 test = onehot.transform(userdata)
50
51 print(test)
52 print(numpy.shape(test))
53
54 print("开始预测。。。")
55 for one in range(19000) :
56 for xx in range(1,155):
57 model = joblib.load("%d.model"%(xx))
58 result = model.predict(test[one])
59 s1 = 0
60 s2 = 0
61 if result == -1:
62 s1+=1
63 elif result == 1:
64 s2+=1
65 s = 0
66 if s1 > s2 :
67 print(one,"uid:",data["uid"][one],"aid:",data["aid"][one],"result:",s1/114)
68 elif s2>s1 :
69 print(one, "uid:", data["uid"][one], "aid:", data["aid"][one], "result:", s2/114)
70 else:
71 print("impossible")