hadoop自定义类型注意问题

自定义类型要实现WritableComparable 接口,(之前只实现Writable  ,结果报错)

问题的主要原因是因为自定义类型在Partitioners 阶段要用到hashCode() 方法,以及compareTo(),也可以看一下IntWritable的实现代码。

所有还是要了解hadoop数据流的整个流程,遇到问题时才游刃有余。

import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;


public class Byes_Myself_Key   implements WritableComparable { 

  public String  s_key;
  public String s_value;


  public Byes_Myself_Key() {
	  this("","");
  }

  public Byes_Myself_Key(String key,String value) {
     this.s_key=key;
     this.s_value=value;
  }

  /**
   * @return {@link Vector} that this is to write, or has
   *  just read
   */
  public String getFirst() {
    return s_key;
  }
  public String getValue() {
	    return s_value;
	  }
  public void set(String key,String value) {
    this.s_key=key;
    this.s_value=value;
  }
public String toString(){
	return s_key+":"+s_value;
}
  
@Override
public void readFields(DataInput in) throws IOException {
	// TODO Auto-generated method stub
     this.s_key=in.readUTF();
     this.s_value=in.readUTF();
}

@Override
public void write(DataOutput out) throws IOException {
	// TODO Auto-generated method stub
	out.writeUTF(s_key);
	out.writeUTF(s_value);
}

@Override
public int compareTo(Object arg0) {
	// TODO Auto-generated method stub
	Byes_Myself_Key other=(Byes_Myself_Key)arg0;
	
	return this.s_key.compareToIgnoreCase(other.s_key);
}

  /**
   * @return true if this is allowed to encode {@link Vector}
   *  values using fewer bytes, possibly losing precision. In particular this means
   *  that floating point values will be encoded as floats, not doubles.
   */
/* 实现 hashCode() 方法很重要
     * Hadoop的Partitioners会用到这个方法,后面再说 
     */

public int hashCode(){
	
	return this.toString().hashCode();
}



}

 

posted @ 2013-01-05 13:28  肉馅饺子  阅读(342)  评论(0)    收藏  举报