map与flatMap的区别
spark版本:spark 2.0.2
scala版本:2.11.8
服务器版本:CentOS 6.7
对比map和flatMap在RDD中的使用:
val rdd1 = sc.parallelize(Seq(("one two three four five six seven"), ("one two three four five six seven"), ("one two three four five six seven")))
rdd1.map(_.split(" ")).collect
/*
res6: Array[Array[String]] = Array(Array(one, two, three, four, five, six, seven),
Array(one, two, three, four, five, six, seven),
Array(one, two, three, four, five, six, seven))
*/
rdd1.flatMap(_.split(" ")).collect
/*
res7: Array[String] = Array(one, two, three, four, five, six, seven,
one, two, three, four, five, six, seven,
one, two, three, four, five, six, seven)
*/
val rdd2 = sc.parallelize(Seq((1, "one two three four five six seven"), (2, "one two three four five six seven"), (3, "one two three four five six seven")))
rdd2.map(x => (x._1, x._2.split(" "))).collect
/*
res14: Array[(Int, Array[String])] = Array((1,Array(one, two, three, four, five, six, seven)),
(2,Array(one, two, three, four, five, six, seven)),
(3,Array(one, two, three, four, five, six, seven)))
*/
rdd2.map(x => {
val x2 = x._2.split(" ")
(x._1, x2.toIterable)}).collect
/*
res4: Array[(Int, Iterable[String])] = Array((1,WrappedArray(one, two, three, four, five, six, seven)),
(2,WrappedArray(one, two, three, four, five, six, seven)),
(3,WrappedArray(one, two, three, four, five, six, seven)))
*/
rdd2.map(x => {
val x2 = x._2.split(" ")
(x._1, x2.toIterable)}).flatMap{x =>
val y = x._2
for (w <- y) yield (x._1, w)}.collect
/*
res7: Array[(Int, String)] = Array((1,one), (1,two), (1,three), (1,four), (1,five), (1,six), (1,seven),
(2,one), (2,two), (2,three), (2,four), (2,five), (2,six), (2,seven),
(3,one), (3,two), (3,three), (3,four), (3,five), (3,six), (3,seven))
*/

浙公网安备 33010602011771号