hadoop自定义类型注意问题
自定义类型要实现WritableComparable 接口,(之前只实现Writable ,结果报错)
问题的主要原因是因为自定义类型在Partitioners 阶段要用到hashCode() 方法,以及compareTo(),也可以看一下IntWritable的实现代码。
所有还是要了解hadoop数据流的整个流程,遇到问题时才游刃有余。
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class Byes_Myself_Key implements WritableComparable {
public String s_key;
public String s_value;
public Byes_Myself_Key() {
this("","");
}
public Byes_Myself_Key(String key,String value) {
this.s_key=key;
this.s_value=value;
}
/**
* @return {@link Vector} that this is to write, or has
* just read
*/
public String getFirst() {
return s_key;
}
public String getValue() {
return s_value;
}
public void set(String key,String value) {
this.s_key=key;
this.s_value=value;
}
public String toString(){
return s_key+":"+s_value;
}
@Override
public void readFields(DataInput in) throws IOException {
// TODO Auto-generated method stub
this.s_key=in.readUTF();
this.s_value=in.readUTF();
}
@Override
public void write(DataOutput out) throws IOException {
// TODO Auto-generated method stub
out.writeUTF(s_key);
out.writeUTF(s_value);
}
@Override
public int compareTo(Object arg0) {
// TODO Auto-generated method stub
Byes_Myself_Key other=(Byes_Myself_Key)arg0;
return this.s_key.compareToIgnoreCase(other.s_key);
}
/**
* @return true if this is allowed to encode {@link Vector}
* values using fewer bytes, possibly losing precision. In particular this means
* that floating point values will be encoded as floats, not doubles.
*/
/* 实现 hashCode() 方法很重要
* Hadoop的Partitioners会用到这个方法,后面再说
*/
public int hashCode(){
return this.toString().hashCode();
}
}
浙公网安备 33010602011771号