探索核函数的优势和缺陷

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
import matplotlib.pyplot as plt
import numpy as np
from time import time
import datetime

cancer = load_breast_cancer()
x = cancer.data
y = cancer.target

plt.scatter(x[:, 0], x[:, 1], c=y)
plt.show()

from sklearn.decomposition import PCA

x_dr = PCA(n_components=2).fit_transform(x)
plt.scatter(x_dr[:, 0], x_dr[:, 1], c=y)
plt.show()

from mpl_toolkits import mplot3d

r = np.exp(-(x**2).sum(1))  # r.shape(300,)
rlim = np.linspace(min(r), max(r), 500)


# 定义一个绘制三维图像的函数
# elev表示上下旋转的角度
# azim表示平行旋转的角度
def plot_3D(elev=45, azim=30, x=x, y=y) -> None:
    ax = plt.subplot(projection="3d")
    ax.scatter3D(x[:, 0], x[:, 1], r, c=y, s=50, cmap="rainbow")
    ax.view_init(elev=elev, azim=azim)
    ax.set_xlabel("x")
    ax.set_ylabel("y")
    ax.set_zlabel("r")
    plt.show()


plot_3D()

x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.3, random_state=420
)

kernel = ["linear", "poly", "rbf", "sigmoid"]
time0 = time()
for i in kernel:
    clf = SVC(kernel=i, gamma="auto", degree=1, cache_size=5000).fit(x_train, y_train)
    print("The accuracy under kernel %s is %f" % (i, clf.score(x_test, y_test)))
    print(datetime.datetime.fromtimestamp(time() - time0).strftime("%M:%S:%f"))

The accuracy under kernel linear is 0.929825
00:00:278035
The accuracy under kernel poly is 0.923977
00:00:311496
The accuracy under kernel rbf is 0.596491
00:00:329552
The accuracy under kernel sigmoid is 0.596491
00:00:336953

import pandas as pd

data = pd.DataFrame(x)
data.describe([0.1, 0.15, 0.25, 0.50, 0.75, 0.90, 0.99]).T
# 量纲不统一，需要标准化

	count	mean	std	min	10%	15%	25%	50%	75%	90%	99%	max
0	569.0	14.127292	3.524049	6.981000	10.260000	11.040000	11.700000	13.370000	15.780000	19.530000	24.371600	28.11000
1	569.0	19.289649	4.301036	9.710000	14.078000	14.934000	16.170000	18.840000	21.800000	24.992000	30.652000	39.28000
2	569.0	91.969033	24.298981	43.790000	65.830000	70.670000	75.170000	86.240000	104.100000	129.100000	165.724000	188.50000
3	569.0	654.889104	351.914129	143.500000	321.600000	371.180000	420.300000	551.100000	782.700000	1177.400000	1786.600000	2501.00000
4	569.0	0.096360	0.014064	0.052630	0.079654	0.081948	0.086370	0.095870	0.105300	0.114820	0.132888	0.16340
5	569.0	0.104341	0.052813	0.019380	0.049700	0.053620	0.064920	0.092630	0.130400	0.175460	0.277192	0.34540
6	569.0	0.088799	0.079720	0.000000	0.013686	0.019932	0.029560	0.061540	0.130700	0.203040	0.351688	0.42680
7	569.0	0.048919	0.038803	0.000000	0.011158	0.014366	0.020310	0.033500	0.074000	0.100420	0.164208	0.20120
8	569.0	0.181162	0.027414	0.106000	0.149580	0.154620	0.161900	0.179200	0.195700	0.214940	0.259564	0.30400
9	569.0	0.062798	0.007060	0.049960	0.055338	0.056302	0.057700	0.061540	0.066120	0.072266	0.085438	0.09744
10	569.0	0.405172	0.277313	0.111500	0.183080	0.202840	0.232400	0.324200	0.478900	0.748880	1.291320	2.87300
11	569.0	1.216853	0.551648	0.360200	0.640400	0.710700	0.833900	1.108000	1.474000	1.909400	2.915440	4.88500
12	569.0	2.866059	2.021855	0.757000	1.280200	1.435400	1.606000	2.287000	3.357000	5.123200	9.690040	21.98000
13	569.0	40.337079	45.491006	6.802000	13.160000	14.938000	17.850000	24.530000	45.190000	91.314000	177.684000	542.20000
14	569.0	0.007041	0.003003	0.001713	0.004224	0.004539	0.005169	0.006380	0.008146	0.010410	0.017258	0.03113
15	569.0	0.025478	0.017908	0.002252	0.009169	0.010860	0.013080	0.020450	0.032450	0.047602	0.089872	0.13540
16	569.0	0.031894	0.030186	0.000000	0.007726	0.010734	0.015090	0.025890	0.042050	0.058520	0.122292	0.39600
17	569.0	0.011796	0.006170	0.000000	0.005493	0.006325	0.007638	0.010930	0.014710	0.018688	0.031194	0.05279
18	569.0	0.020542	0.008266	0.007882	0.013012	0.013746	0.015160	0.018730	0.023480	0.030120	0.052208	0.07895
19	569.0	0.003795	0.002646	0.000895	0.001710	0.001892	0.002248	0.003187	0.004558	0.006185	0.012650	0.02984
20	569.0	16.269190	4.833242	7.930000	11.234000	11.996000	13.010000	14.970000	18.790000	23.682000	30.762800	36.04000
21	569.0	25.677223	6.146258	12.020000	17.800000	19.252000	21.080000	25.410000	29.720000	33.646000	41.802400	49.54000
22	569.0	107.261213	33.602542	50.410000	72.178000	77.984000	84.110000	97.660000	125.400000	157.740000	208.304000	251.20000
23	569.0	880.583128	569.356993	185.200000	384.720000	440.080000	515.300000	686.500000	1084.000000	1673.000000	2918.160000	4254.00000
24	569.0	0.132369	0.022832	0.071170	0.102960	0.108620	0.116600	0.131300	0.146000	0.161480	0.188908	0.22260
25	569.0	0.254265	0.157336	0.027290	0.093676	0.108820	0.147200	0.211900	0.339100	0.447840	0.778644	1.05800
26	569.0	0.272188	0.208624	0.000000	0.045652	0.071612	0.114500	0.226700	0.382900	0.571320	0.902380	1.25200
27	569.0	0.114606	0.065732	0.000000	0.038460	0.050026	0.064930	0.099930	0.161400	0.208940	0.269216	0.29100
28	569.0	0.290076	0.061867	0.156500	0.226120	0.235100	0.250400	0.282200	0.317900	0.360080	0.486908	0.66380
29	569.0	0.083946	0.018061	0.055040	0.065792	0.067848	0.071460	0.080040	0.092080	0.106320	0.140628	0.20750

from sklearn.preprocessing import StandardScaler

x = StandardScaler().fit_transform(x)

	count	mean	std	min	10%	15%	25%	50%	75%	90%	99%	max
0	569.0	-2.497514e-17	1.00088	-2.029648	-1.098366	-0.876835	-0.689385	-0.215082	0.469393	1.534446	2.909529	3.971288
1	569.0	-2.497514e-17	1.00088	-2.229249	-1.212786	-1.013589	-0.725963	-0.104636	0.584176	1.326975	2.644095	4.651889
2	569.0	0.000000e+00	1.00088	-1.984504	-1.076672	-0.877311	-0.691956	-0.235980	0.499677	1.529432	3.037982	3.976130
3	569.0	3.746271e-17	1.00088	-1.454443	-0.947908	-0.806898	-0.667195	-0.295187	0.363507	1.486075	3.218702	5.250529
4	569.0	-6.243785e-17	1.00088	-3.112085	-1.188910	-1.025656	-0.710963	-0.034891	0.636199	1.313694	2.599511	4.770911
5	569.0	-1.248757e-17	1.00088	-1.610136	-1.035527	-0.961238	-0.747086	-0.221940	0.493857	1.347811	3.275782	4.568425
6	569.0	1.373633e-16	1.00088	-1.114873	-0.943046	-0.864627	-0.743748	-0.342240	0.526062	1.434288	3.300560	4.243589
7	569.0	6.243785e-17	1.00088	-1.261820	-0.974010	-0.891263	-0.737944	-0.397721	0.646935	1.328412	2.973759	3.927930
8	569.0	1.248757e-16	1.00088	-2.744117	-1.153036	-0.969028	-0.703240	-0.071627	0.530779	1.233221	2.862418	4.484751
9	569.0	-3.121893e-17	1.00088	-1.819865	-1.057477	-0.920820	-0.722639	-0.178279	0.470983	1.342243	3.209454	4.910919
10	569.0	-8.741299e-17	1.00088	-1.059924	-0.801577	-0.730259	-0.623571	-0.292245	0.266100	1.240514	3.198294	8.906909
11	569.0	-2.185325e-17	1.00088	-1.554264	-1.045885	-0.918336	-0.694809	-0.197498	0.466552	1.256518	3.081820	6.655279
12	569.0	7.492542e-17	1.00088	-1.044049	-0.785049	-0.708220	-0.623768	-0.286652	0.243031	1.117354	3.378079	9.461986
13	569.0	5.619407e-17	1.00088	-0.737829	-0.597942	-0.558823	-0.494754	-0.347783	0.106773	1.121579	3.021867	11.041842
14	569.0	1.404852e-17	1.00088	-1.776065	-0.939031	-0.834027	-0.624018	-0.220335	0.368355	1.123053	3.405812	8.029999
15	569.0	4.370650e-17	1.00088	-1.298098	-0.911510	-0.817001	-0.692926	-0.281020	0.389654	1.236492	3.598943	6.143482
16	569.0	6.243785e-17	1.00088	-1.057501	-0.801336	-0.701593	-0.557161	-0.199065	0.336752	0.882848	2.997338	12.072680
17	569.0	-1.873136e-17	1.00088	-1.913447	-1.022462	-0.887439	-0.674490	-0.140496	0.472657	1.117927	3.146456	6.649601
18	569.0	-2.497514e-17	1.00088	-1.532890	-0.911757	-0.822886	-0.651681	-0.219430	0.355692	1.159654	3.834036	7.071917
19	569.0	3.121893e-17	1.00088	-1.096968	-0.788466	-0.719776	-0.585118	-0.229940	0.288642	0.904208	3.349301	9.851593
20	569.0	4.995028e-17	1.00088	-1.726901	-1.042700	-0.884903	-0.674921	-0.269040	0.522016	1.535063	3.001373	4.094189
21	569.0	-3.746271e-17	1.00088	-2.223994	-1.282757	-1.046308	-0.748629	-0.043516	0.658341	1.297666	2.625885	3.885905
22	569.0	-2.497514e-17	1.00088	-1.693361	-1.044983	-0.872046	-0.689578	-0.285980	0.540279	1.503553	3.009644	4.287337
23	569.0	1.123881e-16	1.00088	-1.222423	-0.871684	-0.774366	-0.642136	-0.341181	0.357589	1.393000	3.581882	5.930172
24	569.0	9.990056e-17	1.00088	-2.682695	-1.289152	-1.041041	-0.691230	-0.046843	0.597545	1.276124	2.478455	3.955374
25	569.0	6.243785e-17	1.00088	-1.443878	-1.021571	-0.925234	-0.681083	-0.269501	0.539669	1.231407	3.335783	5.112877
26	569.0	-4.995028e-17	1.00088	-1.305831	-1.086814	-0.962270	-0.756514	-0.218232	0.531141	1.435090	3.023359	4.700669
27	569.0	-1.248757e-17	1.00088	-1.745063	-1.159448	-0.983337	-0.756400	-0.223469	0.712510	1.436382	2.354181	2.685877
28	569.0	-1.123881e-16	1.00088	-2.160960	-1.034661	-0.889384	-0.641864	-0.127409	0.450138	1.132518	3.184317	6.046041
29	569.0	-7.492542e-17	1.00088	-1.601839	-1.006009	-0.892074	-0.691912	-0.216444	0.450762	1.239884	3.141089	6.846856

x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.3, random_state=420
)

kernel = ["linear", "poly", "rbf", "sigmoid"]
time0 = time()
for i in kernel:
    clf = SVC(kernel=i, gamma="auto", degree=1, cache_size=5000).fit(x_train, y_train)
    print("The accuracy under kernel %s is %f" % (i, clf.score(x_test, y_test)))
    print(datetime.datetime.fromtimestamp(time() - time0).strftime("%M:%S:%f"))

The accuracy under kernel linear is 0.976608
00:00:003820
The accuracy under kernel poly is 0.964912
00:00:007333
The accuracy under kernel rbf is 0.970760
00:00:013197
The accuracy under kernel sigmoid is 0.953216
00:00:019282

gamma_range = np.logspace(-10, 1, 50)
score = []
for i in gamma_range:
    clf = SVC(kernel="rbf", gamma=i, degree=1, cache_size=5000).fit(x_train, y_train)
    score.append(clf.score(x_test, y_test))
print(max(score), gamma_range[score.index(max(score))])
plt.plot(gamma_range, score)
plt.show()

0.9766081871345029 0.012067926406393264

from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import GridSearchCV

gamma_range = np.logspace(-10, 1, 20)
coef0_range = np.linspace(0, 5, 10)

param = dict(gamma=gamma_range, coef0=coef0_range)

cv = StratifiedShuffleSplit(n_splits=5, test_size=0.3, random_state=420)

grid = GridSearchCV(
    SVC(kernel="poly", cache_size=5000, degree=1), param_grid=param, cv=cv
)

grid.fit(x, y)

print(
    "The best parameters are %s with a score of %0.5f"
    % (grid.best_params_, grid.best_score_)
)
print(datetime.datetime.fromtimestamp(time() - time0).strftime("%M:%S:%f"))

The best parameters are {'coef0': 0.0, 'gamma': 0.18329807108324375} with a score of 0.96959
41:01:513573

重要参数 C

# (0.01,30,50)
C_range = np.linspace(0.01, 30, 50)
score = []
for c in C_range:
    clf = SVC(kernel="linear", C=c, cache_size=5000).fit(x_train, y_train)
    score.append(clf.score(x_test, y_test))
print(max(score), C_range[score.index(max(score))])
plt.plot(C_range, score)
plt.show()

0.9766081871345029 1.2340816326530613

# (0.01,30,50)
C_range = np.linspace(0.01, 30, 50)
score = []
for c in C_range:
    clf = SVC(kernel="rbf", C=c, cache_size=5000).fit(x_train, y_train)
    score.append(clf.score(x_test, y_test))
print(max(score), C_range[score.index(max(score))])
plt.plot(C_range, score)
plt.show()

0.9883040935672515 25.103673469387758

# 进一步细化
C_range = np.linspace(0.01, 30, 50)
score = []
for c in C_range:
    clf = SVC(kernel="rbf", C=c, cache_size=5000).fit(x_train, y_train)
    score.append(clf.score(x_test, y_test))
print(max(score), C_range[score.index(max(score))])
plt.plot(C_range, score)
plt.show()
print(datetime.datetime.fromtimestamp(time() - time0).strftime("%M:%S:%f"))

0.9883040935672515 25.026315789473685

10:56:038874

# 进行交叉验证
from sklearn.model_selection import cross_val_score

C_range = np.linspace(3, 4, 50)
score = []
for c in C_range:
    clf = SVC(kernel="rbf", C=c, cache_size=5000)
    score.append(cross_val_score(clf, x, y, cv=10).mean())
print(max(score), C_range[score.index(max(score))])
plt.plot(C_range, score)
plt.show()
print(datetime.datetime.fromtimestamp(time() - time0).strftime("%M:%S:%f"))

0.982456140350877 3.0

24:23:139221

clf = SVC(kernel="rbf", C=3, gamma="auto", cache_size=5000).fit(x_train, y_train)
clf.score(x_test, y_test)
clf.get_params()

{'C': 3,
 'break_ties': False,
 'cache_size': 5000,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'auto',
 'kernel': 'rbf',
 'max_iter': -1,
 'probability': False,
 'random_state': None,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

posted @ 2023-04-25 16:38 ThankCAT 阅读(22) 评论(0) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

Loading

Thank CAT

探索核函数的优势和缺陷

探索核函数的优势和缺陷

重要参数 C

公告