搭建同步的静态的MongoDB连接器,适用于Spark

package com.excellence.mongo;

public class MongoDBInfo {
    public String hostName = "127.0.0.1";
    public int port = 27017;
    public String dbName = "exiaresource";
    public String user = "admin";
    public char[] pwd = { '1','2','3','4','5','6' };
    
    
    public String getHostName() {
        return hostName;
    }
    public void setHostName(String hostName) {
        this.hostName = hostName;
    }
    public int getPort() {
        return port;
    }
    public void setPort(int port) {
        this.port = port;
    }
    public String getDbName() {
        return dbName;
    }
    public void setDbName(String dbName) {
        this.dbName = dbName;
    }
    public String getUser() {
        return user;
    }
    public void setUser(String user) {
        this.user = user;
    }
    public char[] getPwd() {
        return pwd;
    }
    public void setPwd(char[] pwd) {
        this.pwd = pwd;
    }
}

mongoDataWorker

package com.excellence.mongo;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;

import org.apache.hadoop.hive.ql.parse.HiveParser.insertClause_return;
import org.bson.Document;
import org.bson.conversions.Bson;

import com.mongodb.BasicDBObject;
import com.mongodb.DB;
import com.mongodb.DBCursor;
import com.mongodb.DBObject;
import com.mongodb.MongoClient;
import com.mongodb.MongoCredential;
import com.mongodb.ServerAddress;
import com.mongodb.client.FindIterable;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoCursor;
import com.mongodb.client.MongoDatabase;
import com.mongodb.client.model.Filters;
import com.mongodb.client.model.Projections;
import com.mongodb.client.result.UpdateResult;
import com.sun.javafx.collections.MappingChange.Map;

import scala.tools.nsc.symtab.classfile.ClassfileParser.LazyAliasType;


public class MongoDataWorker {
    private MongoClient mongoClient;
    private DB database;
    
    public MongoDataWorker(MongoDBInfo info,String collection){
        List<MongoCredential> credentials = new ArrayList<MongoCredential>();
        credentials.add(MongoCredential.createCredential(info.getUser(),info.getUser(),info.getPwd()));
        mongoClient = new MongoClient(new ServerAddress(info.getHostName(),info.getPort()),credentials);
        database = mongoClient.getDB(info.getDbName());
        System.out.println("获取数据库成功");
    }
    
    public List<HashMap<String, String>> findAllContent(String collection){
        List<HashMap<String, String>> contentlist = new ArrayList<>();
        DBCursor cursor = null;
        try {
            cursor = database.getCollection(collection).find();
            while(cursor.hasNext()) {
                HashMap<String, String> map = new HashMap<>();
                DBObject recordObject = cursor.next();
                map.put("content",recordObject.get("content").toString());
                map.put("id",recordObject.get("_id").toString());
                map.put("libNum",recordObject.get("libNum").toString());
                map.put("title",recordObject.get("title").toString());
                contentlist.add(map);
            }
            return contentlist;
        } catch (Exception e) {
            return null;// TODO: handle exception
        }finally {
            if(cursor != null) {
                cursor.close();
            }
        }        
    }
    
    // 插入文档
    /**
     * @param findCollection          被查找的collection
     * @param updateCollection      更新插入的collection
     * @param content                句子
     * @param map                   句子的原文来源map,
     * @param tags                  长短句的识别标签
     */
    public void insertContent(String findCollection,String updateCollection,String content,HashMap<String, String> map,String tags) {
        DBCursor cursor = null;
        try {
            DBObject query = new BasicDBObject(); 
            query.put("content",content);
            
            cursor = database.getCollection(updateCollection).find(query);  // 查找是否已有该句子
            if(cursor.hasNext()) {
                while(cursor.hasNext()) {
                    DBObject cursorObject =  cursor.next();
                    int frequency = Integer.valueOf(cursorObject.get("frequency").toString()) + 1;
                    BasicDBObject newDocument = new BasicDBObject();
                    newDocument.put("frequency", frequency);
    
                    BasicDBObject updateObj = new BasicDBObject();
                    updateObj.put("$set", newDocument);
                    
                    BasicDBObject updateObj2 = new BasicDBObject();
                    HashMap<String, String> newMap = new HashMap<>();
                    newMap.put("id", map.get("id"));
                    newMap.put("title", map.get("title"));
                    newMap.put("libNum", map.get("libNum"));
                    updateObj2.put("$push", new BasicDBObject("source",newMap));      // 获取新的map来源,也就是去除了content
                    
                    database.getCollection(updateCollection).update(query, updateObj); // 更新频次
                    database.getCollection(updateCollection).update(query, updateObj2); // 更新来源
                }
            }else {
                DBObject document = new BasicDBObject();
                document.put("content",content);
                if(content.length()>=5) {
                    String[] top5 = content.substring(0, 5).split("");
                    document.put("top5", top5);
                }
                if(content.length()>=8) {
                    String[] top8 = content.substring(0, 8).split("");
                    document.put("top8", top8);
                }
                if(content.length()>=11) {
                    String[] top11 = content.substring(0, 11).split("");
                    document.put("top11", top11);
                }
                int frequency = 1;
                document.put("frequency", frequency);
                document.put("tags", tags); // 加入长短句识别
                
                HashMap<String, String> newMap = new HashMap<>();
                newMap.put("id", map.get("id"));
                newMap.put("title", map.get("title"));
                newMap.put("libNum", map.get("libNum"));
                List<HashMap<String, String>> listSource = new ArrayList<>();
                listSource.add(newMap);     // 来源数组列表
                document.put("source", listSource); // 加入来源
                
                database.getCollection(updateCollection).insert(document);
            }    
        } catch (Exception e) {
            e.printStackTrace();// TODO: handle exception
        }finally {
            if(cursor != null) {
                cursor.close();
            }
        }            
    }
    
    
//    public long update(String word,String collection,Bson data) {
//    UpdateResult result = database.getCollection(collection).updateOne(Filters.eq("word",word), data);
//    return 1;
//}

    
//    // 拿到content的来源
//    public HashMap<String, String> getSource(String collection,String content) {
//        DBObject query = new BasicDBObject();
//        query.put("content",content);
//        DBCursor cursor = database.getCollection(collection).find(query);
//        HashMap<String, String> map = new HashMap<String,String>();
//        try {
//            while (cursor.hasNext()) {
//                DBObject recordObject = cursor.next();
//                String id = recordObject.get("_id").toString();
//                String libNum  = recordObject.get("libNum").toString();
//                String title = recordObject.get("title").toString();
//                map.put("id",id);
//                map.put("libNum", libNum);
//                map.put("title",title);
//                break;
//            }
//            return map;
//        } catch (Exception e) {
//            e.printStackTrace();// TODO: handle exception
//            return map;
//        }finally {
//            if(cursor != null) {
//                cursor.close();
//            }
//        }
//        
//    }

    
//    public List<String> findAllword(String collection){
//        MongoCursor<Document> cursor = null;
//        List<String> result = new ArrayList<String>();
//        try {
//            FindIterable<Document> iterable = database.getCollection(collection).find();
//            if( iterable!= null) {
//                cursor = iterable.iterator();
//                while(cursor.hasNext()) {
//                    result.add(cursor.next().getString("word"));
//                }
//            }
//        return result;
//        } catch (Exception e) {
//            return null;// TODO: handle exception
//        }
//    }
//    
//    public Document findByword(String collection,String word) {
//        FindIterable<Document> iterable = database.getCollection(collection).find(Filters.eq("word",word));
//        Document result = null;
//        if( iterable!= null) {
//            MongoCursor<Document> cursor = iterable.iterator();
//            while(cursor.hasNext()) {
//                result=cursor.next();
//                break;
//            }
//        }
//    return result;
//    }
//    
//    public List<String> getAllword(String collection){
//        List<String> words = new ArrayList<String>();
//        FindIterable<Document> iterable = database.getCollection(collection).find().projection(Projections.include("word"));
//        try {
//            if( iterable!= null) {
//                MongoCursor<Document> cursor = iterable.iterator();
//                while(cursor.hasNext()) {
//                    words.add(cursor.next().getString("word"));
//                }
//            }
//        return words;
//        } catch (Exception e) {
//            return null;// TODO: handle exception
//        }        
//    }
}

MongoUtils

package com.excellence.mongo;


public class MongoUtils {
    public static String hostname="127.0.0.1";
    public static int port = 27017;
    public static String dbname = "exiaresource";
    public static String User = "admin";
    public static String pwd = "123456";
    
    private static MongoDataWorker worker = null;
    public synchronized static MongoDataWorker getMongoDataWorker() {
        if(worker != null) 
            return worker;
        MongoDBInfo info = new MongoDBInfo();
        info.setHostName(MongoUtils.hostname);
        info.setPort(MongoUtils.port);
        info.setPwd(MongoUtils.pwd.toCharArray());
        info.setUser(MongoUtils.User);
        info.setDbName(MongoUtils.dbname);
        worker = new MongoDataWorker(info, "");
        return worker;                    
    }
}

 

posted @ 2018-11-13 17:28  一朵包纸  阅读(298)  评论(0)    收藏  举报