[root@centos00 ~]$ cd hadoop-2.6.0-cdh5.14.2/
[root@centos00 hadoop-2.6.0-cdh5.14.2]$ sbin/hadoop-daemon.sh start namenode
[root@centos00 hadoop-2.6.0-cdh5.14.2]$ sbin/hadoop-daemon.sh start datanode
[root@centos00 hadoop-2.6.0-cdh5.14.2]$ sbin/yarn-daemon.sh start resourcemanager
[root@centos00 ~]$ cd /opt/cdh5.14.2/hive-1.1.0-cdh5.14.2/
[root@centos00 hive-1.1.0-cdh5.14.2]$ bin/hive --service metastore &
[root@centos00 ~]$ cd /opt/cdh5.14.2/spark-2.2.1-cdh5.14.2/
[root@centos00 spark-2.2.1-cdh5.14.2]$ sbin/start-master.sh
[root@centos00 spark-2.2.1-cdh5.14.2]$ sbin/start-slaves.sh
scala> val df = Seq((1,"Jack",50),(1,"Tony",100),(1,"Alex",125),(2,"Jack",75),(2,"Tony",150),(2,"Alex",175)).toDF("id","name","salary")
df: org.apache.spark.sql.DataFrame = [id: int, name: string ... 1 more field]
scala> df.show(false)
+---+----+------+
|id |name|salary|
+---+----+------+
|1 |Jack|50 |
|1 |Tony|100 |
|1 |Alex|125 |
|2 |Jack|75 |
|2 |Tony|150 |
|2 |Alex|175 |
+---+----+------+
scala> val df2 = df.groupBy("id").pivot("name").max("salary")
df2: org.apache.spark.sql.DataFrame = [id: int, Alex: int ... 2 more fields]
scala> df2.show(false)
+---+----+----+----+
|id |Alex|Jack|Tony|
+---+----+----+----+
|1 |125 |50 |100 |
|2 |175 |75 |150 |
+---+----+----+----+