跟着Leo机器学习实战:sklearn之clustering函数导图(干货分享)
一个很有趣的个人博客,不信你来撩 fangzengye.com
整篇函数导图(干货分享)

类包
class sklearn.cluster.KMeans(n_clusters=8, init='k-means++', n_init=10, max_iter=300, tol=0.0001, precompute_distances='auto', verbose=0, random_state=None, copy_x=True, n_jobs=None, algorithm='auto')
KMeans
from sklearn.cluster import KMeans import numpy as np X = np.array([[1, 2], [1, 4], [1, 0], [10, 2], [10, 4], [10, 0]]) kmeans = KMeans(n_clusters=2, random_state=0).fit(X) kmeans.labels_kmeans.predict([[0, 0], [12, 3]])
kmeans.cluster_centers_
源地址
https://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html#sklearn.cluster.KMeans
2.3.3. Affinity Propagation
类包
class sklearn.cluster.AffinityPropagation(damping=0.5, max_iter=200, convergence_iter=15, copy=True, preference=None, affinity='euclidean', verbose=False)
例子
from sklearn.cluster import AffinityPropagation import numpy as np X = np.array([[1, 2], [1, 4], [1, 0], [4, 2], [4, 4], [4, 0]]) clustering = AffinityPropagation().fit(X) clusteringclustering.labels_
clustering.predict([[0, 0], [4, 4]])
clustering.cluster_centers_
源地址
https://scikit-learn.org/stable/modules/generated/sklearn.cluster.AffinityPropagation.html#sklearn.cluster.AffinityPropagation
2.3.4. Mean Shift
类包
class sklearn.cluster.MeanShift(bandwidth=None, seeds=None, bin_seeding=False, min_bin_freq=1, cluster_all=True, n_jobs=None, max_iter=300)
例子
from sklearn.cluster import MeanShift import numpy as np X = np.array([[1, 1], [2, 1], [1, 0], [4, 7], [3, 5], [3, 6]]) clustering = MeanShift(bandwidth=2).fit(X) clustering.labels_clustering.predict([[0, 0], [5, 5]])
clustering
源地址
https://scikit-learn.org/stable/modules/generated/sklearn.cluster.MeanShift.html#sklearn.cluster.MeanShift
2.3.5. Spectral clustering
类包
class sklearn.cluster.SpectralClustering(n_clusters=8, eigen_solver=None, n_components=None, random_state=None, n_init=10, gamma=1.0, affinity='rbf', n_neighbors=10, eigen_tol=0.0, assign_labels='kmeans', degree=3, coef0=1, kernel_params=None, n_jobs=None)
例子
from sklearn.cluster import SpectralClustering import numpy as np X = np.array([[1, 1], [2, 1], [1, 0], [4, 7], [3, 5], [3, 6]]) clustering = SpectralClustering(n_clusters=2, assign_labels="discretize", random_state=0).fit(X) clustering.labels_clustering
源地址
https://scikit-learn.org/stable/modules/generated/sklearn.cluster.SpectralClustering.html#sklearn.cluster.SpectralClustering
2.3.6. Hierarchical clustering
类包
class sklearn.cluster.AgglomerativeClustering(n_clusters=2, affinity='euclidean', memory=None, connectivity=None, compute_full_tree='auto', linkage='ward', distance_threshold=None)
例子
from sklearn.cluster import AgglomerativeClustering import numpy as np X = np.array([[1, 2], [1, 4], [1, 0], [4, 2], [4, 4], [4, 0]]) clustering = AgglomerativeClustering().fit(X) clustering
clustering.labels_
源地址
https://scikit-learn.org/stable/modules/generated/sklearn.cluster.AgglomerativeClustering.html#sklearn.cluster.AgglomerativeClustering
2.3.7. DBSCAN
类包
class sklearn.cluster.DBSCAN(eps=0.5, min_samples=5, metric='euclidean', metric_params=None, algorithm='auto', leaf_size=30, p=None, n_jobs=None)
例子
from sklearn.cluster import DBSCAN import numpy as np X = np.array([[1, 2], [2, 2], [2, 3], [8, 7], [8, 8], [25, 80]]) clustering = DBSCAN(eps=3, min_samples=2).fit(X) clustering.labels_
clustering
源地址
https://scikit-learn.org/stable/modules/generated/sklearn.cluster.DBSCAN.html#sklearn.cluster.DBSCAN
2.3.8. OPTICS
类包
class sklearn.cluster.OPTICS(min_samples=5, max_eps=inf, metric='minkowski', p=2, metric_params=None, cluster_method='xi', eps=None, xi=0.05, predecessor_correction=True, min_cluster_size=None, algorithm='auto', leaf_size=30, n_jobs=None
例子
from sklearn.cluster import OPTICS
import numpy as np
X = np.array([[1, 2], [2, 5], [3, 6],
[8, 7], [8, 8], [7, 3]])
clustering = OPTICS(min_samples=2).fit(X)
clustering.labels_
源地址
https://scikit-learn.org/stable/modules/generated/sklearn.cluster.OPTICS.html#sklearn.cluster.OPTICS
2.3.9. Birch
类包
class sklearn.cluster.Birch(threshold=0.5, branching_factor=50, n_clusters=3, compute_labels=True, copy=True)
例子
from sklearn.cluster import Birch X = [[0, 1], [0.3, 1], [-0.3, 1], [0, -1], [0.3, -1], [-0.3, -1]] brc = Birch(n_clusters=None) brc.fit(X)
brc.predict(X)
源地址
https://scikit-learn.org/stable/modules/generated/sklearn.cluster.Birch.html#sklearn.cluster.Birch
2.3.10. Clustering performance evaluation
2.3.10.1. Adjusted Rand index
from sklearn import metrics labels_true = [0, 0, 0, 1, 1, 1] labels_pred = [0, 0, 1, 1, 2, 2]
metrics.adjusted_rand_score(labels_true, labels_pred)
2.3.10.2. Mutual Information based scores
from sklearn import metrics labels_true = [0, 0, 0, 1, 1, 1] labels_pred = [0, 0, 1, 1, 2, 2]
metrics.adjusted_mutual_info_score(labels_true, labels_pred)
normalized_mutual_info_score
metrics.normalized_mutual_info_score(labels_true, labels_pred)
mutual_info_score
metrics.mutual_info_score(labels_true, labels_pred)
2.3.10.3. Homogeneity, completeness and V-measure
from sklearn import metrics labels_true = [0, 0, 0, 1, 1, 1] labels_pred = [0, 0, 1, 1, 2, 2]metrics.homogeneity_score(labels_true, labels_pred)
metrics.completeness_score(labels_true, labels_pred)
metrics.v_measure_score(labels_true, labels_pred)
metrics.homogeneity_completeness_v_measure(labels_true, labels_pred)
2.3.10.4. Fowlkes-Mallows scores
from sklearn import metrics
labels_true = [0, 0, 0, 1, 1, 1]
labels_pred = [0, 0, 1, 1, 2, 2]
metrics.fowlkes_mallows_score(labels_true, labels_pred)
2.3.10.5. Silhouette Coefficient
import numpy as np
from sklearn.cluster import KMeans
kmeans_model = KMeans(n_clusters=3, random_state=1).fit(X)
labels = kmeans_model.labels_
metrics.silhouette_score(X, labels, metric='euclidean')
2.3.10.6. Calinski-Harabasz Index
import numpy as np
from sklearn.cluster import KMeans
kmeans_model = KMeans(n_clusters=3, random_state=1).fit(X)
labels = kmeans_model.labels_
metrics.calinski_harabasz_score(X, labels)
2.3.10.7. Davies-Bouldin Index
from sklearn import datasets
iris = datasets.load_iris()
X = iris.data
from sklearn.cluster import KMeans
from sklearn.metrics import davies_bouldin_score
kmeans = KMeans(n_clusters=3, random_state=1).fit(X)
labels = kmeans.labels_
davies_bouldin_score(X, labels)
2.3.10.8. Contingency Matrix
from sklearn.metrics.cluster import contingency_matrix
x = ["a", "a", "a", "b", "b", "b"]
y = [0, 0, 1, 1, 2, 2]
contingency_matrix(x, y)
整篇源地址
https://scikit-learn.org/stable/modules/clustering.html#clustering
浙公网安备 33010602011771号