2025/1/24

 

// 数据导入
val data = spark.read.option("header", "true").csv("data/adult.csv")

 

// 数据预处理
val assembler = new VectorAssembler()
.setInputCols(Array("age", "fnlwgt", "education-num", "capital-gain", "capital-loss", "hours-per-week"))
.setOutputCol("features")

 

val dataWithFeatures = assembler.transform(data)

 

// 训练模型
val lr = new LogisticRegression().setLabelCol("label").setFeaturesCol("features")
val Array(trainingData, testData) = dataWithFeatures.randomSplit(Array(0.7, 0.3))
val model = lr.fit(trainingData)

 

// 预测
val predictions = model.transform(testData)
predictions.select("features", "label", "prediction").show(5)

 

posted @ 2025-01-24 19:14  为20岁努力  阅读(7)  评论(0)    收藏  举报