System.setProperty("hadoop.home.dir", "C:\\hadoop-2.7.2");
val spark = SparkSession.builder().config(new SparkConf().setAppName("LR").setMaster("local[*]")).config("spark.sql.warehouse.dir", "file:///").getOrCreate()
val sc = spark.sparkContext
val rdd = sc.textFile("C:\\Users\\Daxin\\Documents\\GitHub\\OptimizedRF\\sql_data\\LRDATA")
val schemaString = "label features"
// val fields = schemaString.split(" ").map(StructField(_, StringType, true))
// org.apache.spark.ml.linalg.SQLDataTypes.VectorType替换org.apache.spark.ml.linalg.VectorUDT(一个spark包私有的类型)
val fields = Array(StructField("label", DoubleType, true), StructField("features", org.apache.spark.ml.linalg.SQLDataTypes.VectorType, true))
val rowRdd = rdd.map {
x =>
Row(x.split(",")(1).toDouble, Vectors.dense(Array[Double](x.split(",")(0).toDouble)))
}
val schema = StructType(fields)
val Array(train, test) = spark.createDataFrame(rowRdd, schema).randomSplit(Array[Double](0.6, 0.4))
val lr = new LinearRegression()
.setMaxIter(100)
.setRegParam(0.3)
.setElasticNetParam(0.8) //.setTol(0.01) // 收敛阈值
val lrModel = lr.fit(train)
println(lrModel.transform(test).columns.toBuffer)
lrModel.transform(test).select("label", "prediction").show()
println(s"Coefficients: ${lrModel.coefficients} Intercept: ${lrModel.intercept}")