Spark polynomialExpansion 多项式扩展

1、概念

特征升维

2、code

package com.home.spark.ml

import org.apache.spark.SparkConf
import org.apache.spark.ml.feature.PolynomialExpansion
import org.apache.spark.ml.linalg.Vectors
import org.apache.spark.sql.SparkSession

/**
  * @Description: TODO
  * 多项式扩展是将要素扩展到多项式空间的过程,该空间由原始尺寸的n次组合构成。
  * PolynomialExpansion类提供此功能。下面的示例显示如何将特征扩展到3度多项式空间
  * 设置degree为2就可以将(x, y)转化为(x, x x, y, x y, y y)
  **/
object Ex_PolynomialExpansion {
  def main(args: Array[String]): Unit = {
    val conf: SparkConf = new SparkConf(true).setMaster("local[2]").setAppName("spark ml")
    val spark = SparkSession.builder().config(conf).getOrCreate()

    val data = Array(
      Vectors.dense(2.0, 1.0),
      Vectors.dense(0.0, 0.0),
      Vectors.dense(2.0, 3.0),
      Vectors.dense(3.0, -1.0)
    )
    val df = spark.createDataFrame(data.map(Tuple1.apply)).toDF("features")

    val polynomialExpansion = new PolynomialExpansion().setInputCol("features").setOutputCol("polyFeatures")
      //setDegree表示多项式最高次幂 比如1.0,5.0可以是 三次:1.0^3 5.0^3 二次:1.0+5.0^2 1.0^2+5.0 1.0^2 5.0^2 1.0+5.0 一次:1.0 5.0
      .setDegree(3)
    val result = polynomialExpansion.transform(df)

    result.show(false)


    spark.stop()
  }
}


+----------+------------------------------------------+
|features  |polyFeatures                              |
+----------+------------------------------------------+
|[2.0,1.0] |[2.0,4.0,8.0,1.0,2.0,4.0,1.0,2.0,1.0]     |
|[0.0,0.0] |[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]     |
|[2.0,3.0] |[2.0,4.0,8.0,3.0,6.0,12.0,9.0,18.0,27.0]  |
|[3.0,-1.0]|[3.0,9.0,27.0,-1.0,-3.0,-9.0,1.0,3.0,-1.0]|
+----------+------------------------------------------+
posted @ 2020-01-16 10:24  我是属车的  阅读(473)  评论(0编辑  收藏  举报