大数据学习第11天

天气比较代码

 

package com.bjsxt.weather;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class RunJob {

public static void main(String[] args) {
System.setProperty("hadoop.home.dir", "E:\\peiyou\\hadoop-2.6.5");
Configuration conf =new Configuration();
try {
Job job =Job.getInstance(conf);
FileSystem fs =FileSystem.get(conf);
job.setJobName("wc");
job.setJarByClass(RunJob.class);

job.setMapperClass(WeatherMapper.class);
job.setReducerClass(WeatherReducer.class);
job.setGroupingComparatorClass(MyGroupCompareTo.class);
job.setInputFormatClass(KeyValueTextInputFormat.class); //KeyValueTextInputFormat :把一行数据中按照制表符切割,前面为Key,后面为Value

job.setPartitionerClass(MyPartitioner.class);
job.setMapOutputKeyClass(MyKey.class);
job.setMapOutputValueClass(Text.class);


job.setNumReduceTasks(3); //设置reduce的数量


//设置计算输入数据
FileInputFormat.addInputPath(job, new Path("/input/weather"));
//设置计算输出目录(mapreduce计算完成之后,最后的结果存放的目录)
Path outpath =new Path("/output/weather/"); //该目录必须不能存在,如果存在计算框架会出错
if(fs.exists(outpath)){//如果存在该目录,则删除
fs.delete(outpath, true);
}
FileOutputFormat.setOutputPath(job, outpath);

//开始执行
boolean f =job.waitForCompletion(true);
if(f){
System.out.println("mapreduce程序执行成功");
}

} catch (Exception e) {
e.printStackTrace();
}
}
}

----------------------------------------------------------------------------------

package com.bjsxt.weather;

import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapTask;
import org.apache.hadoop.mapreduce.Mapper;


public class WeatherMapper extends Mapper<Text, Text, MyKey, Text>{
static SimpleDateFormat sdf =new SimpleDateFormat("yyyy-MM-dd");
protected void map(Text key, Text value,
Context context)
throws IOException, InterruptedException {
try {
Date date =sdf.parse(key.toString());
Calendar c =Calendar.getInstance();
c.setTime(date);
int year =c.get(Calendar.YEAR);
int month =c.get(Calendar.MONTH);
Double temp =Double.parseDouble(value.toString().substring(0,value.toString().length()-1));
MyKey outkey=new MyKey(year,month,temp);
Text outVale =new Text(key+"\t"+value);
context.write(outkey, outVale);
} catch (Exception e) {
e.printStackTrace();
}

}

}

----------------------------------------------------------------------------------------------------------------------------------------------------------

package com.bjsxt.weather;

import java.io.IOException;

import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class WeatherReducer extends Reducer<MyKey, Text, Text, NullWritable>{

protected void reduce(MyKey key, Iterable<Text> iter,
Context context)
throws IOException, InterruptedException {
int num=0;
for(Text value:iter){
if(num>=3){
break;
}
context.write(value, NullWritable.get());
num++;
}
}
}

 

---------------------------------------------------------------------------------------------------------

package com.bjsxt.weather;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;

public class MyPartitioner extends Partitioner<MyKey, Text>{

/**
* @param numPartitions 代表reduce的数量,当前是3
*/
public int getPartition(MyKey key, Text value, int numPartitions) {
return key.getYear()%numPartitions;
}

}

----------------------------------------------------------------------------------------------------

package com.bjsxt.weather;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.WritableComparable;

/**
* 自定义的key
* @author root
*
*/
public class MyKey implements WritableComparable<MyKey>{

private int year;
private int month;
private double temperature;
public MyKey(){
super();
}

public MyKey(int year, int month, double temperature) {
super();
this.year = year;
this.month = month;
this.temperature = temperature;
}
public void write(DataOutput out) throws IOException {
out.writeInt(year);
out.writeInt(month);
out.writeDouble(temperature);
}
public void readFields(DataInput in) throws IOException {
this.year=in.readInt();
this.month=in.readInt();
this.temperature=in.readDouble();
}

/**
* 当前Key 的比较方法,是在排序的时候调用的
*/
public int compareTo(MyKey o) {
int r1=Integer.compare(this.getYear(), o.getYear());
if(r1==0){
int r2 =Integer.compare(this.getMonth(), o.getMonth());
if(r2==0){
return -Double.compare(this.getTemperature(), o.getTemperature());
}
return r2;
}
return r1;
}
public int getYear() {
return year;
}
public void setYear(int year) {
this.year = year;
}
public int getMonth() {
return month;
}
public void setMonth(int month) {
this.month = month;
}
public double getTemperature() {
return temperature;
}
public void setTemperature(double temperature) {
this.temperature = temperature;
}


}

-------------------------------------------------------------------------------------------------------------------

package com.bjsxt.weather;

import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;

public class MyGroupCompareTo extends WritableComparator{

//比较器必须有构造方法
public MyGroupCompareTo() {
super(MyKey.class,true);
}

public int compare(WritableComparable a, WritableComparable b) {
MyKey k1 =(MyKey) a;
MyKey k2 =(MyKey) b;

int r1=Integer.compare(k1.getYear(), k2.getYear());
if(r1==0){
return Integer.compare(k1.getMonth(), k2.getMonth());
}
return r1;
}

}

 

posted @ 2019-06-19 10:03  lkoooox  阅读(137)  评论(0)    收藏  举报