TransformOperator2

package com.bjsxt.sparkstreaming;

import java.util.ArrayList;
import java.util.List;

import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.api.java.function.VoidFunction;
import org.apache.spark.broadcast.Broadcast;
import org.apache.spark.streaming.Durations;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaPairDStream;
import org.apache.spark.streaming.api.java.JavaReceiverInputDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;

import com.google.common.base.Optional;

import scala.Tuple2;

/**
 * 过滤黑名单(使用广播变量)
 * 
 * @author root
 *
 */
public class TransformOperator2 {

	public static void main(String[] args) {
		SparkConf conf = new SparkConf();
		conf.setMaster("local[2]").setAppName("transform");
		JavaStreamingContext jsc = new JavaStreamingContext(conf,Durations.seconds(5));
		
		//模拟黑名单
		List<String> blackList = new ArrayList<String>();
		blackList.add("zhangsan");
		//广播黑名单	
		final Broadcast<List<String>> broadcastList = jsc.sparkContext().broadcast(blackList);
		
		//接受socket数据源
		JavaReceiverInputDStream<String> nameList = jsc.socketTextStream("node5", 9999);
		JavaPairDStream<String, String> pairNameList = nameList.mapToPair(new PairFunction<String, String, String>() {
			private static final long serialVersionUID = 1L;

			@Override
			public Tuple2<String, String> call(String s) throws Exception {
				return new Tuple2<String, String>(s.split(" ")[1], s);
			}
		});
		JavaDStream<String> transFormResult = pairNameList.transform(new Function<JavaPairRDD<String,String>, JavaRDD<String>>() {
			private static final long serialVersionUID = 1L;

			@Override
			public JavaRDD<String> call(JavaPairRDD<String, String> nameRDD)
					throws Exception {
				JavaPairRDD<String, String> filter = 
						nameRDD.filter(new Function<Tuple2<String,String>, Boolean>() {
					private static final long serialVersionUID = 1L;

					@Override
					public Boolean call(Tuple2<String, String> v1)
							throws Exception {
						//得到广播变量
						List<String> blackList = broadcastList.value();
						
						return !blackList.contains(v1._1);
					}
				});
				return filter.map(new Function<Tuple2<String,String>, String>() {
					private static final long serialVersionUID = 1L;

					@Override
					public String call(Tuple2<String, String> v1)
							throws Exception {
						return v1._2;
					}
				});
			}
		});
		transFormResult.print();
		jsc.start();
		jsc.awaitTermination();
		jsc.stop();
	}
}

  

posted @ 2018-06-18 13:54  uuhh  阅读(39)  评论(0)    收藏  举报