用Spark向HBase中插入数据

java代码如下:

package db.insert;
import java.util.Iterator;
import java.util.StringTokenizer;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.VoidFunction;

public class SparkInsertData {
    public static void main(String[] args){
        //初始化sparkContext,
        SparkConf sparkConf = new SparkConf().setAppName("HBaseTest").setMaster("local[2]");
        JavaSparkContext sc = new JavaSparkContext(sparkConf); 
        final HBaseDBDao hb = new HBaseDBDao();
        try {
            final String tableName = "mapCar";
            hb.deleteTable(tableName);
            // 第一步:创建数据库表:“mapCar”
            String[] columnFamilys = { "cids", "gis", "times"};
            if(!hb.isExist(tableName)){
                hb.createTable(tableName, columnFamilys);
            }   
            hb.initHTable(tableName);
            // 第二步:向数据表的添加数据
            // 添所有车辆数据到表中
            JavaRDD<String> fcar = sc.textFile(
                    "/usr/local/myjar/mongo/地图数据/mongo/MongoDB/mapCar.txt", 10);
            fcar.foreachPartition(new VoidFunction<Iterator<String>>(){
                private static final long serialVersionUID = 1L;
                @Override
                public void call(Iterator<String> iter) throws Exception {
                    while(iter.hasNext()){
                        String s = iter.next();
                        StringTokenizer stk = new StringTokenizer(s);
                        String cid = stk.nextToken();
                        String lat = stk.nextToken();
                        String lon = stk.nextToken();
                        String time = stk.nextToken();
                        int n = 13-time.length();
                        StringBuilder sb = new StringBuilder(time);
                        for(int i=0;i<n;i++){
                            sb.insert(0, '0');
                        }
                        String row = cid + "_" + sb.toString();
                        hb.addRowBatch(tableName, row, "cids", "cid", cid);
                        hb.addRowBatch(tableName, row, "gis", "lat", lat);
                        hb.addRowBatch(tableName, row, "gis", "lon", lon);
                        hb.addRowBatch(tableName, row, "times", "time", time);
                        System.out.println("row: " + row + ", cid : " + cid
                                + ", lat: " + lat + ", lon: " + lon + ", time: "
                                + time);
                    }
                    hb.flushCommits(tableName);
                }
                
            });
            System.out.println("插入完毕!");
            // 第三步:获取所有数据
//            hb.getAllRows(tableName);
 
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

所需jar包如下:

posted @ 2015-11-28 11:17  ~风轻云淡~  阅读(1365)  评论(0编辑  收藏  举报