数据筛选

数据准备

route_log

Apr 23 11:49:54 hostapd: wlan0: STA 14:7d:c5:9e:fb:84

Apr 23 11:49:52 hostapd: wlan0: STA 74:e5:0b:04:28:f2

Apr 23 11:49:50 hostapd: wlan0: STA cc:af:78:cc:d5:5d

Apr 23 11:49:44 hostapd: wlan0: STA cc:af:78:cc:d5:5d

Apr 23 11:49:43 hostapd: wlan0: STA 14:7d:c5:9e:fb:84

Apr 23 11:49:42 hostapd: wlan0: STA 74:e5:0b:04:28:f2

将route_log上传到HDFS上,从route_log中筛选  : month day mac 

代码编写

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class Route_filter extends Configured implements Tool {

@Override
public int run(String[] args) throws Exception {
// TODO Auto-generated method stub
Configuration conf = getConf();
Job job = new Job(conf, "route_filter");
job.setJarByClass(Route_filter.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
job.setMapperClass(RouteMap.class);
FileInputFormat.addInputPath(job, new Path("/value/route_log"));
FileOutputFormat.setOutputPath(job, new Path("/outvalue/outroute_log"));
job.submit();
return job.isSuccessful() ? 0 : 1;
}

public static void main(String[] args) throws Exception {
ToolRunner.run(new Configuration(), new Route_filter(), null);
}
}

class RouteMap extends Mapper<LongWritable, Text, Text, NullWritable> {
private Text result = new Text();

protected void map(LongWritable key, Text value, Context context)
throws java.io.IOException, InterruptedException {
String lineValue = value.toString();
String[] lineSplit = lineValue.split(" ");
String month = lineSplit[0];
String day = lineSplit[1];
String mac = lineSplit[6];
result.set(month + " " + day + " " + mac);
context.write(result, NullWritable.get());
}
}

  

最终输出结果:

Apr 23 14:7d:c5:9e:fb:84
Apr 23 14:7d:c5:9e:fb:84
Apr 23 74:e5:0b:04:28:f2
Apr 23 74:e5:0b:04:28:f2
Apr 23 cc:af:78:cc:d5:5d
Apr 23 cc:af:78:cc:d5:5d

posted @ 2015-12-10 19:36  李小新  阅读(206)  评论(0编辑  收藏  举报