pyspark 高斯混合模型

from pyspark.ml.clustering import GaussianMixture
from pyspark.sql import SparkSession
spark= SparkSession\
                .builder \
                .appName("dataFrame") \
                .getOrCreate()
# loads data
dataset = spark.read.format("libsvm").load("/home/luogan/lg/softinstall/spark-2.3.0-bin-hadoop2.7/data/mllib/sample_kmeans_data.txt")

gmm = GaussianMixture().setK(2).setSeed(538009335)
model = gmm.fit(dataset)

print("Gaussians shown as a DataFrame: ")
model.gaussiansDF.show(truncate=False)
posted @ 2022-08-19 22:57  luoganttcc  阅读(18)  评论(0)    收藏  举报