package MapFilterPk {
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FSDataInputStream, FileSystem, Path}
import org.apache.hadoop.io.{IntWritable, LongWritable, NullWritable, Text}
import org.apache.hadoop.mapreduce.lib.input.{FileInputFormat, FileSplit}
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
import org.apache.hadoop.mapreduce.{InputSplit, Job, Mapper, Reducer}
/*
* 需求
* 1. 过滤蜀国、魏国人员
*
* */
// Mapper 类
class MFilterMapper extends Mapper[LongWritable, Text, Text, NullWritable] {
private val outkey = new Text()
private var outvalue = new Text()
override def map(key: LongWritable, value: Text, context: Mapper[LongWritable, Text, Text, NullWritable]#Context) = {
val strings = value.toString.split(" +")
//var one: List[String] = List("曹操", "曹仁", "曹植")
//var two: List[String] = List("张飞", "刘备", "关羽")
var three: List[String] = List("孙权", "张昭", "周瑜")
strings.foreach(e => if (three.contains(e)) {
outkey.set(e)
context.write(outkey, NullWritable.get)
})
}
}
// Reducer 类
// Driver
object MFilterDriver {
def main(args: Array[String]): Unit = {
//1. 获取配置信息以及 获取job对象
//读取配置文件 Configuration: core-default.xml, core-site.xml
var configuration = new Configuration
var job: Job = Job.getInstance(configuration)
//2. 注册本Driver程序的jar
job.setJarByClass(this.getClass)
job.setJobName("Map Join")
//3. 注册 Mapper 和 Reducer的jar
job.setMapperClass(classOf[MFilterMapper])
//4. 设置Mapper 类输出key-value 数据类型
// job.setMapOutputKeyClass(classOf[Text])
// job.setMapOutputValueClass(classOf[NullWritable])
//5. 设置最终输出key-value 数据类型
job.setOutputKeyClass(classOf[Text])
job.setOutputValueClass(classOf[NullWritable])
//6. 设置输入输出路径
FileInputFormat.setInputPaths(job, "src/main/data/input/1.txt")
FileOutputFormat.setOutputPath(job, new Path("src/main/data/output"))
//map端合并完后,直接输出,不需要Reduce阶段
job.setNumReduceTasks(0)
//8. 提交job
val bool: Boolean = job.waitForCompletion(false)
System.exit(bool match {
case true => "0".toInt
case false => "1".toInt
})
}
}
}