Loading

探索核函数的优势和缺陷

探索核函数的优势和缺陷

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
import matplotlib.pyplot as plt
import numpy as np
from time import time
import datetime
cancer = load_breast_cancer()
x = cancer.data
y = cancer.target
plt.scatter(x[:, 0], x[:, 1], c=y)
plt.show()


from sklearn.decomposition import PCA

x_dr = PCA(n_components=2).fit_transform(x)
plt.scatter(x_dr[:, 0], x_dr[:, 1], c=y)
plt.show()


from mpl_toolkits import mplot3d

r = np.exp(-(x**2).sum(1))  # r.shape(300,)
rlim = np.linspace(min(r), max(r), 500)


# 定义一个绘制三维图像的函数
# elev表示上下旋转的角度
# azim表示平行旋转的角度
def plot_3D(elev=45, azim=30, x=x, y=y) -> None:
    ax = plt.subplot(projection="3d")
    ax.scatter3D(x[:, 0], x[:, 1], r, c=y, s=50, cmap="rainbow")
    ax.view_init(elev=elev, azim=azim)
    ax.set_xlabel("x")
    ax.set_ylabel("y")
    ax.set_zlabel("r")
    plt.show()


plot_3D()


x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.3, random_state=420
)

kernel = ["linear", "poly", "rbf", "sigmoid"]
time0 = time()
for i in kernel:
    clf = SVC(kernel=i, gamma="auto", degree=1, cache_size=5000).fit(x_train, y_train)
    print("The accuracy under kernel %s is %f" % (i, clf.score(x_test, y_test)))
    print(datetime.datetime.fromtimestamp(time() - time0).strftime("%M:%S:%f"))
The accuracy under kernel linear is 0.929825
00:00:278035
The accuracy under kernel poly is 0.923977
00:00:311496
The accuracy under kernel rbf is 0.596491
00:00:329552
The accuracy under kernel sigmoid is 0.596491
00:00:336953
import pandas as pd

data = pd.DataFrame(x)
data.describe([0.1, 0.15, 0.25, 0.50, 0.75, 0.90, 0.99]).T
# 量纲不统一,需要标准化
count mean std min 10% 15% 25% 50% 75% 90% 99% max
0 569.0 14.127292 3.524049 6.981000 10.260000 11.040000 11.700000 13.370000 15.780000 19.530000 24.371600 28.11000
1 569.0 19.289649 4.301036 9.710000 14.078000 14.934000 16.170000 18.840000 21.800000 24.992000 30.652000 39.28000
2 569.0 91.969033 24.298981 43.790000 65.830000 70.670000 75.170000 86.240000 104.100000 129.100000 165.724000 188.50000
3 569.0 654.889104 351.914129 143.500000 321.600000 371.180000 420.300000 551.100000 782.700000 1177.400000 1786.600000 2501.00000
4 569.0 0.096360 0.014064 0.052630 0.079654 0.081948 0.086370 0.095870 0.105300 0.114820 0.132888 0.16340
5 569.0 0.104341 0.052813 0.019380 0.049700 0.053620 0.064920 0.092630 0.130400 0.175460 0.277192 0.34540
6 569.0 0.088799 0.079720 0.000000 0.013686 0.019932 0.029560 0.061540 0.130700 0.203040 0.351688 0.42680
7 569.0 0.048919 0.038803 0.000000 0.011158 0.014366 0.020310 0.033500 0.074000 0.100420 0.164208 0.20120
8 569.0 0.181162 0.027414 0.106000 0.149580 0.154620 0.161900 0.179200 0.195700 0.214940 0.259564 0.30400
9 569.0 0.062798 0.007060 0.049960 0.055338 0.056302 0.057700 0.061540 0.066120 0.072266 0.085438 0.09744
10 569.0 0.405172 0.277313 0.111500 0.183080 0.202840 0.232400 0.324200 0.478900 0.748880 1.291320 2.87300
11 569.0 1.216853 0.551648 0.360200 0.640400 0.710700 0.833900 1.108000 1.474000 1.909400 2.915440 4.88500
12 569.0 2.866059 2.021855 0.757000 1.280200 1.435400 1.606000 2.287000 3.357000 5.123200 9.690040 21.98000
13 569.0 40.337079 45.491006 6.802000 13.160000 14.938000 17.850000 24.530000 45.190000 91.314000 177.684000 542.20000
14 569.0 0.007041 0.003003 0.001713 0.004224 0.004539 0.005169 0.006380 0.008146 0.010410 0.017258 0.03113
15 569.0 0.025478 0.017908 0.002252 0.009169 0.010860 0.013080 0.020450 0.032450 0.047602 0.089872 0.13540
16 569.0 0.031894 0.030186 0.000000 0.007726 0.010734 0.015090 0.025890 0.042050 0.058520 0.122292 0.39600
17 569.0 0.011796 0.006170 0.000000 0.005493 0.006325 0.007638 0.010930 0.014710 0.018688 0.031194 0.05279
18 569.0 0.020542 0.008266 0.007882 0.013012 0.013746 0.015160 0.018730 0.023480 0.030120 0.052208 0.07895
19 569.0 0.003795 0.002646 0.000895 0.001710 0.001892 0.002248 0.003187 0.004558 0.006185 0.012650 0.02984
20 569.0 16.269190 4.833242 7.930000 11.234000 11.996000 13.010000 14.970000 18.790000 23.682000 30.762800 36.04000
21 569.0 25.677223 6.146258 12.020000 17.800000 19.252000 21.080000 25.410000 29.720000 33.646000 41.802400 49.54000
22 569.0 107.261213 33.602542 50.410000 72.178000 77.984000 84.110000 97.660000 125.400000 157.740000 208.304000 251.20000
23 569.0 880.583128 569.356993 185.200000 384.720000 440.080000 515.300000 686.500000 1084.000000 1673.000000 2918.160000 4254.00000
24 569.0 0.132369 0.022832 0.071170 0.102960 0.108620 0.116600 0.131300 0.146000 0.161480 0.188908 0.22260
25 569.0 0.254265 0.157336 0.027290 0.093676 0.108820 0.147200 0.211900 0.339100 0.447840 0.778644 1.05800
26 569.0 0.272188 0.208624 0.000000 0.045652 0.071612 0.114500 0.226700 0.382900 0.571320 0.902380 1.25200
27 569.0 0.114606 0.065732 0.000000 0.038460 0.050026 0.064930 0.099930 0.161400 0.208940 0.269216 0.29100
28 569.0 0.290076 0.061867 0.156500 0.226120 0.235100 0.250400 0.282200 0.317900 0.360080 0.486908 0.66380
29 569.0 0.083946 0.018061 0.055040 0.065792 0.067848 0.071460 0.080040 0.092080 0.106320 0.140628 0.20750
from sklearn.preprocessing import StandardScaler

x = StandardScaler().fit_transform(x)
count mean std min 10% 15% 25% 50% 75% 90% 99% max
0 569.0 -2.497514e-17 1.00088 -2.029648 -1.098366 -0.876835 -0.689385 -0.215082 0.469393 1.534446 2.909529 3.971288
1 569.0 -2.497514e-17 1.00088 -2.229249 -1.212786 -1.013589 -0.725963 -0.104636 0.584176 1.326975 2.644095 4.651889
2 569.0 0.000000e+00 1.00088 -1.984504 -1.076672 -0.877311 -0.691956 -0.235980 0.499677 1.529432 3.037982 3.976130
3 569.0 3.746271e-17 1.00088 -1.454443 -0.947908 -0.806898 -0.667195 -0.295187 0.363507 1.486075 3.218702 5.250529
4 569.0 -6.243785e-17 1.00088 -3.112085 -1.188910 -1.025656 -0.710963 -0.034891 0.636199 1.313694 2.599511 4.770911
5 569.0 -1.248757e-17 1.00088 -1.610136 -1.035527 -0.961238 -0.747086 -0.221940 0.493857 1.347811 3.275782 4.568425
6 569.0 1.373633e-16 1.00088 -1.114873 -0.943046 -0.864627 -0.743748 -0.342240 0.526062 1.434288 3.300560 4.243589
7 569.0 6.243785e-17 1.00088 -1.261820 -0.974010 -0.891263 -0.737944 -0.397721 0.646935 1.328412 2.973759 3.927930
8 569.0 1.248757e-16 1.00088 -2.744117 -1.153036 -0.969028 -0.703240 -0.071627 0.530779 1.233221 2.862418 4.484751
9 569.0 -3.121893e-17 1.00088 -1.819865 -1.057477 -0.920820 -0.722639 -0.178279 0.470983 1.342243 3.209454 4.910919
10 569.0 -8.741299e-17 1.00088 -1.059924 -0.801577 -0.730259 -0.623571 -0.292245 0.266100 1.240514 3.198294 8.906909
11 569.0 -2.185325e-17 1.00088 -1.554264 -1.045885 -0.918336 -0.694809 -0.197498 0.466552 1.256518 3.081820 6.655279
12 569.0 7.492542e-17 1.00088 -1.044049 -0.785049 -0.708220 -0.623768 -0.286652 0.243031 1.117354 3.378079 9.461986
13 569.0 5.619407e-17 1.00088 -0.737829 -0.597942 -0.558823 -0.494754 -0.347783 0.106773 1.121579 3.021867 11.041842
14 569.0 1.404852e-17 1.00088 -1.776065 -0.939031 -0.834027 -0.624018 -0.220335 0.368355 1.123053 3.405812 8.029999
15 569.0 4.370650e-17 1.00088 -1.298098 -0.911510 -0.817001 -0.692926 -0.281020 0.389654 1.236492 3.598943 6.143482
16 569.0 6.243785e-17 1.00088 -1.057501 -0.801336 -0.701593 -0.557161 -0.199065 0.336752 0.882848 2.997338 12.072680
17 569.0 -1.873136e-17 1.00088 -1.913447 -1.022462 -0.887439 -0.674490 -0.140496 0.472657 1.117927 3.146456 6.649601
18 569.0 -2.497514e-17 1.00088 -1.532890 -0.911757 -0.822886 -0.651681 -0.219430 0.355692 1.159654 3.834036 7.071917
19 569.0 3.121893e-17 1.00088 -1.096968 -0.788466 -0.719776 -0.585118 -0.229940 0.288642 0.904208 3.349301 9.851593
20 569.0 4.995028e-17 1.00088 -1.726901 -1.042700 -0.884903 -0.674921 -0.269040 0.522016 1.535063 3.001373 4.094189
21 569.0 -3.746271e-17 1.00088 -2.223994 -1.282757 -1.046308 -0.748629 -0.043516 0.658341 1.297666 2.625885 3.885905
22 569.0 -2.497514e-17 1.00088 -1.693361 -1.044983 -0.872046 -0.689578 -0.285980 0.540279 1.503553 3.009644 4.287337
23 569.0 1.123881e-16 1.00088 -1.222423 -0.871684 -0.774366 -0.642136 -0.341181 0.357589 1.393000 3.581882 5.930172
24 569.0 9.990056e-17 1.00088 -2.682695 -1.289152 -1.041041 -0.691230 -0.046843 0.597545 1.276124 2.478455 3.955374
25 569.0 6.243785e-17 1.00088 -1.443878 -1.021571 -0.925234 -0.681083 -0.269501 0.539669 1.231407 3.335783 5.112877
26 569.0 -4.995028e-17 1.00088 -1.305831 -1.086814 -0.962270 -0.756514 -0.218232 0.531141 1.435090 3.023359 4.700669
27 569.0 -1.248757e-17 1.00088 -1.745063 -1.159448 -0.983337 -0.756400 -0.223469 0.712510 1.436382 2.354181 2.685877
28 569.0 -1.123881e-16 1.00088 -2.160960 -1.034661 -0.889384 -0.641864 -0.127409 0.450138 1.132518 3.184317 6.046041
29 569.0 -7.492542e-17 1.00088 -1.601839 -1.006009 -0.892074 -0.691912 -0.216444 0.450762 1.239884 3.141089 6.846856
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.3, random_state=420
)

kernel = ["linear", "poly", "rbf", "sigmoid"]
time0 = time()
for i in kernel:
    clf = SVC(kernel=i, gamma="auto", degree=1, cache_size=5000).fit(x_train, y_train)
    print("The accuracy under kernel %s is %f" % (i, clf.score(x_test, y_test)))
    print(datetime.datetime.fromtimestamp(time() - time0).strftime("%M:%S:%f"))
The accuracy under kernel linear is 0.976608
00:00:003820
The accuracy under kernel poly is 0.964912
00:00:007333
The accuracy under kernel rbf is 0.970760
00:00:013197
The accuracy under kernel sigmoid is 0.953216
00:00:019282
gamma_range = np.logspace(-10, 1, 50)
score = []
for i in gamma_range:
    clf = SVC(kernel="rbf", gamma=i, degree=1, cache_size=5000).fit(x_train, y_train)
    score.append(clf.score(x_test, y_test))
print(max(score), gamma_range[score.index(max(score))])
plt.plot(gamma_range, score)
plt.show()
0.9766081871345029 0.012067926406393264

from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import GridSearchCV

gamma_range = np.logspace(-10, 1, 20)
coef0_range = np.linspace(0, 5, 10)

param = dict(gamma=gamma_range, coef0=coef0_range)

cv = StratifiedShuffleSplit(n_splits=5, test_size=0.3, random_state=420)

grid = GridSearchCV(
    SVC(kernel="poly", cache_size=5000, degree=1), param_grid=param, cv=cv
)

grid.fit(x, y)

print(
    "The best parameters are %s with a score of %0.5f"
    % (grid.best_params_, grid.best_score_)
)
print(datetime.datetime.fromtimestamp(time() - time0).strftime("%M:%S:%f"))
The best parameters are {'coef0': 0.0, 'gamma': 0.18329807108324375} with a score of 0.96959
41:01:513573

重要参数 C

# (0.01,30,50)
C_range = np.linspace(0.01, 30, 50)
score = []
for c in C_range:
    clf = SVC(kernel="linear", C=c, cache_size=5000).fit(x_train, y_train)
    score.append(clf.score(x_test, y_test))
print(max(score), C_range[score.index(max(score))])
plt.plot(C_range, score)
plt.show()
0.9766081871345029 1.2340816326530613

# (0.01,30,50)
C_range = np.linspace(0.01, 30, 50)
score = []
for c in C_range:
    clf = SVC(kernel="rbf", C=c, cache_size=5000).fit(x_train, y_train)
    score.append(clf.score(x_test, y_test))
print(max(score), C_range[score.index(max(score))])
plt.plot(C_range, score)
plt.show()
0.9883040935672515 25.103673469387758

# 进一步细化
C_range = np.linspace(0.01, 30, 50)
score = []
for c in C_range:
    clf = SVC(kernel="rbf", C=c, cache_size=5000).fit(x_train, y_train)
    score.append(clf.score(x_test, y_test))
print(max(score), C_range[score.index(max(score))])
plt.plot(C_range, score)
plt.show()
print(datetime.datetime.fromtimestamp(time() - time0).strftime("%M:%S:%f"))
0.9883040935672515 25.026315789473685

10:56:038874
# 进行交叉验证
from sklearn.model_selection import cross_val_score

C_range = np.linspace(3, 4, 50)
score = []
for c in C_range:
    clf = SVC(kernel="rbf", C=c, cache_size=5000)
    score.append(cross_val_score(clf, x, y, cv=10).mean())
print(max(score), C_range[score.index(max(score))])
plt.plot(C_range, score)
plt.show()
print(datetime.datetime.fromtimestamp(time() - time0).strftime("%M:%S:%f"))
0.982456140350877 3.0

24:23:139221
clf = SVC(kernel="rbf", C=3, gamma="auto", cache_size=5000).fit(x_train, y_train)
clf.score(x_test, y_test)
clf.get_params()
{'C': 3,
 'break_ties': False,
 'cache_size': 5000,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'auto',
 'kernel': 'rbf',
 'max_iter': -1,
 'probability': False,
 'random_state': None,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}
posted @ 2023-04-25 16:38  ThankCAT  阅读(22)  评论(0编辑  收藏  举报