[root@centos00 hadoop-2.6.0-cdh5.14.2]$ sbin/hadoop-daemon.sh start namenode
[root@centos00 hadoop-2.6.0-cdh5.14.2]$ sbin/hadoop-daemon.sh start datanode
[root@centos00 hadoop-2.6.0-cdh5.14.2]$ sbin/yarn-daemon.sh start resourcemanager
[root@centos00 ~]$ cd /opt/cdh5.14.2/hive-1.1.0-cdh5.14.2/
[root@centos00 hive-1.1.0-cdh5.14.2]$ bin/hive --service metastore &
[root@centos00 ~]$ cd /opt/cdh5.14.2/spark-2.2.1-cdh5.14.2/
[root@centos00 spark-2.2.1-cdh5.14.2]$ sbin/start-master.sh
[root@centos00 spark-2.2.1-cdh5.14.2]$ sbin/start-slaves.sh
[root@centos00 ~]$ service kudu-master start
[root@centos00 ~]$ service kudu-tserver start
[root@centos00 spark-2.2.1-cdh5.14.2]$ bin/spark-shell --master local[2] --packages org.apache.kudu:kudu-spark2_2.11:1.8.0
Ivy Default Cache set to: /home/root/.ivy2/cache
The jars for the packages stored in: /home/root/.ivy2/jars
:: loading settings :: url = jar:file:/opt/cdh5.14.2/spark-2.2.1-cdh5.14.2/jars/ivy-2.4.0.jar!/org/apache/ivy/core/settings/ivysettings.xml
org.apache.kudu#kudu-spark2_2.11 added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent;1.0
confs: [default]
found org.apache.kudu#kudu-spark2_2.11;1.8.0 in central
:: resolution report :: resolve 204ms :: artifacts dl 14ms
:: modules in use:
org.apache.kudu#kudu-spark2_2.11;1.8.0 from central in [default]
---------------------------------------------------------------------
| | modules || artifacts |
| conf | number| search|dwnlded|evicted|| number|dwnlded|
---------------------------------------------------------------------
| default | 1 | 0 | 0 | 0 || 1 | 0 |
---------------------------------------------------------------------
:: retrieving :: org.apache.spark#spark-submit-parent
confs: [default]
0 artifacts copied, 1 already retrieved (0kB/6ms)
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
20/09/09 18:59:25 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Spark context Web UI available at http://192.168.208.3:4040
Spark context available as 'sc' (master = local[2], app id = local-1599645566105).
Spark session available as 'spark'.
Welcome to
____ __
/ __/__ ___ _____/ /__
_\ \/ _ \/ _ `/ __/ '_/
/___/ .__/\_,_/_/ /_/\_\ version 2.2.1
/_/
Using Scala version 2.11.8 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_112)
Type in expressions to have them evaluated.
Type :help for more information.
===================================================== Read =====================================================
scala> import org.apache.kudu.spark.kudu._
import org.apache.kudu.spark.kudu._
scala> val df = spark.read.options(Map("kudu.master" -> "centos00:7051", "kudu.table" -> "my_kudu_table")).kudu
df: org.apache.spark.sql.DataFrame = [id: bigint, name: string ... 1 more field]
scala> df.show(3,false)
+---+-------+------+
|id |name |salary|
+---+-------+------+
|9 |Richard|1800 |
|2 |Tony |1000 |
|3 |David |2000 |
+---+-------+------+
only showing top 3 rows
scala> df.select("id","name").show(3,false)
+---+-------+
|id |name |
+---+-------+
|9 |Richard|
|2 |Tony |
|3 |David |
+---+-------+
only showing top 3 rows
scala> df.select("id","name","salary").filter("salary < 1500")show(3,false)
+---+----+------+
|id |name|salary|
+---+----+------+
|2 |Tony|1000 |
|6 |Alex|1400 |
+---+----+------+
// 创建临时表
scala> df.registerTempTable("t")
scala> spark.sql("select id, name, salary from t where id = 1").show(false)
+---+-------+------+
|id |name |salary|
+---+-------+------+
|1 |Phoenix|2000 |
+---+-------+------+
// 创建临时视图
scala> df.createOrReplaceTempView("v")
scala> spark.sql("select id, name, salary from v where id = 1").show(false)
+---+-------+------+
|id |name |salary|
+---+-------+------+
|1 |Phoenix|2000 |
+---+-------+------+
===================================================== insertRows =====================================================
// 创建KuduContext对象
scala> import org.apache.kudu.spark.kudu.KuduContext
import org.apache.kudu.spark.kudu.KuduContext
scala> val kc = new KuduContext("centos00:7051",spark.sparkContext)
kc: org.apache.kudu.spark.kudu.KuduContext = org.apache.kudu.spark.kudu.KuduContext@70f5f57d
scala> df.schema
res0: org.apache.spark.sql.types.StructType = StructType(StructField(id,LongType,false), StructField(name,StringType,true), StructField(salary,StringType,true))
scala> df.schema.printTreeString
root
|-- id: long (nullable = false)
|-- name: string (nullable = true)
|-- salary: string (nullable = true)
scala> import org.apache.kudu.client._
import org.apache.kudu.client._
scala> import collection.JavaConverters._
import collection.JavaConverters._
scala> kc.createTable("mykudu", df.schema, Seq("id"), new CreateTableOptions().setNumReplicas(1).addHashPartitions(List("id").asJava,3))
res1: org.apache.kudu.client.KuduTable = org.apache.kudu.client.KuduTable@ed6d97f
scala> val tmp = df.filter("id < 6")
tmp: org.apache.spark.sql.Dataset[org.apache.spark.sql.Row] = [id: bigint, name: string ... 1 more field]
scala> tmp.show
+---+-------+------+
| id| name|salary|
+---+-------+------+
| 2| Tony| 1000|
| 3| David| 2000|
| 1|Phoenix| 2000|
| 5| Jimy| 1900|
| 4| Mike| 1500|
+---+-------+------+
scala> kc.insertRows(tmp, "mykudu")
scala> val m = spark.read.options(Map("kudu.master" -> "centos00:7051", "kudu.table" -> "mykudu")).kudu
m: org.apache.spark.sql.DataFrame = [id: bigint, name: string ... 1 more field]
scala> m.show(false)
+---+-------+------+
|id |name |salary|
+---+-------+------+
|1 |Phoenix|2000 |
|5 |Jimy |1900 |
|2 |Tony |1000 |
|3 |David |2000 |
|4 |Mike |1500 |
+---+-------+------+
===================================================== deleteRows =====================================================
scala> df.show(false)
+---+-------+------+
|id |name |salary|
+---+-------+------+
|9 |Richard|1800 |
|2 |Tony |1000 |
|3 |David |2000 |
|10 |Phoniex|null |
|1 |Phoenix|2000 |
|8 |Kevin |8000 |
|5 |Jimy |1900 |
|6 |Alex |1400 |
|7 |Bob |1600 |
|4 |Mike |1500 |
+---+-------+------+
scala> tmp.show(false)
+---+-------+------+
|id |name |salary|
+---+-------+------+
|2 |Tony |1000 |
|3 |David |2000 |
|1 |Phoenix|2000 |
|5 |Jimy |1900 |
|4 |Mike |1500 |
+---+-------+------+
scala> kc.deleteRows(tmp.select("id"), "my_kudu_table")
scala> df.show
+---+-------+------+
| id| name|salary|
+---+-------+------+
| 9|Richard| 1800|
| 10|Phoniex| null|
| 8| Kevin| 8000|
| 6| Alex| 1400|
| 7| Bob| 1600|
+---+-------+------+
===================================================== upsertRows =====================================================
scala> df.orderBy("id").show(false)
+---+-------+------+
|id |name |salary|
+---+-------+------+
|6 |Alex |1400 |
|7 |Bob |1600 |
|8 |Kevin |8000 |
|9 |Richard|1800 |
|10 |Phoniex|null |
+---+-------+------+
scala> df.filter("8 < id and id < 10").show
+---+-------+------+
| id| name|salary|
+---+-------+------+
| 9|Richard| 1800|
+---+-------+------+
scala> kc.upsertRows(df.filter("8 < id and id < 10"), "mykudu")
scala> m.show(false)
+---+-------+------+
|id |name |salary|
+---+-------+------+
|9 |Richard|1800 |
+---+-------+------+
===================================================== updateRows =====================================================
scala> val t = spark.read.options(Map("kudu.master" -> "centos00:7051", "kudu.table" -> "kudutable")).kudu
t: org.apache.spark.sql.DataFrame = [id: string, name: string ... 1 more field]
scala> t.show(false)
+---+------+------+
|id |name |salary|
+---+------+------+
|1 |Jordon|2500 |
+---+------+------+
scala> val d = sc.makeRDD(Seq(("1", "Lincoln", "3000"))).toDF("id", "name", "salary")
d: org.apache.spark.sql.DataFrame = [id: string, name: string ... 1 more field]
scala> d.show(false)
+---+-------+------+
|id |name |salary|
+---+-------+------+
|1 |Lincoln|3000 |
+---+-------+------+
scala> kc.updateRows(d, "kudutable")
scala> t.show(false)
+---+-------+------+
|id |name |salary|
+---+-------+------+
|1 |Lincoln|3000 |
+---+-------+------+