搭建同步的静态的MongoDB连接器,适用于Spark
package com.excellence.mongo; public class MongoDBInfo { public String hostName = "127.0.0.1"; public int port = 27017; public String dbName = "exiaresource"; public String user = "admin"; public char[] pwd = { '1','2','3','4','5','6' }; public String getHostName() { return hostName; } public void setHostName(String hostName) { this.hostName = hostName; } public int getPort() { return port; } public void setPort(int port) { this.port = port; } public String getDbName() { return dbName; } public void setDbName(String dbName) { this.dbName = dbName; } public String getUser() { return user; } public void setUser(String user) { this.user = user; } public char[] getPwd() { return pwd; } public void setPwd(char[] pwd) { this.pwd = pwd; } }
mongoDataWorker
package com.excellence.mongo; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import org.apache.hadoop.hive.ql.parse.HiveParser.insertClause_return; import org.bson.Document; import org.bson.conversions.Bson; import com.mongodb.BasicDBObject; import com.mongodb.DB; import com.mongodb.DBCursor; import com.mongodb.DBObject; import com.mongodb.MongoClient; import com.mongodb.MongoCredential; import com.mongodb.ServerAddress; import com.mongodb.client.FindIterable; import com.mongodb.client.MongoCollection; import com.mongodb.client.MongoCursor; import com.mongodb.client.MongoDatabase; import com.mongodb.client.model.Filters; import com.mongodb.client.model.Projections; import com.mongodb.client.result.UpdateResult; import com.sun.javafx.collections.MappingChange.Map; import scala.tools.nsc.symtab.classfile.ClassfileParser.LazyAliasType; public class MongoDataWorker { private MongoClient mongoClient; private DB database; public MongoDataWorker(MongoDBInfo info,String collection){ List<MongoCredential> credentials = new ArrayList<MongoCredential>(); credentials.add(MongoCredential.createCredential(info.getUser(),info.getUser(),info.getPwd())); mongoClient = new MongoClient(new ServerAddress(info.getHostName(),info.getPort()),credentials); database = mongoClient.getDB(info.getDbName()); System.out.println("获取数据库成功"); } public List<HashMap<String, String>> findAllContent(String collection){ List<HashMap<String, String>> contentlist = new ArrayList<>(); DBCursor cursor = null; try { cursor = database.getCollection(collection).find(); while(cursor.hasNext()) { HashMap<String, String> map = new HashMap<>(); DBObject recordObject = cursor.next(); map.put("content",recordObject.get("content").toString()); map.put("id",recordObject.get("_id").toString()); map.put("libNum",recordObject.get("libNum").toString()); map.put("title",recordObject.get("title").toString()); contentlist.add(map); } return contentlist; } catch (Exception e) { return null;// TODO: handle exception }finally { if(cursor != null) { cursor.close(); } } } // 插入文档 /** * @param findCollection 被查找的collection * @param updateCollection 更新插入的collection * @param content 句子 * @param map 句子的原文来源map, * @param tags 长短句的识别标签 */ public void insertContent(String findCollection,String updateCollection,String content,HashMap<String, String> map,String tags) { DBCursor cursor = null; try { DBObject query = new BasicDBObject(); query.put("content",content); cursor = database.getCollection(updateCollection).find(query); // 查找是否已有该句子 if(cursor.hasNext()) { while(cursor.hasNext()) { DBObject cursorObject = cursor.next(); int frequency = Integer.valueOf(cursorObject.get("frequency").toString()) + 1; BasicDBObject newDocument = new BasicDBObject(); newDocument.put("frequency", frequency); BasicDBObject updateObj = new BasicDBObject(); updateObj.put("$set", newDocument); BasicDBObject updateObj2 = new BasicDBObject(); HashMap<String, String> newMap = new HashMap<>(); newMap.put("id", map.get("id")); newMap.put("title", map.get("title")); newMap.put("libNum", map.get("libNum")); updateObj2.put("$push", new BasicDBObject("source",newMap)); // 获取新的map来源,也就是去除了content database.getCollection(updateCollection).update(query, updateObj); // 更新频次 database.getCollection(updateCollection).update(query, updateObj2); // 更新来源 } }else { DBObject document = new BasicDBObject(); document.put("content",content); if(content.length()>=5) { String[] top5 = content.substring(0, 5).split(""); document.put("top5", top5); } if(content.length()>=8) { String[] top8 = content.substring(0, 8).split(""); document.put("top8", top8); } if(content.length()>=11) { String[] top11 = content.substring(0, 11).split(""); document.put("top11", top11); } int frequency = 1; document.put("frequency", frequency); document.put("tags", tags); // 加入长短句识别 HashMap<String, String> newMap = new HashMap<>(); newMap.put("id", map.get("id")); newMap.put("title", map.get("title")); newMap.put("libNum", map.get("libNum")); List<HashMap<String, String>> listSource = new ArrayList<>(); listSource.add(newMap); // 来源数组列表 document.put("source", listSource); // 加入来源 database.getCollection(updateCollection).insert(document); } } catch (Exception e) { e.printStackTrace();// TODO: handle exception }finally { if(cursor != null) { cursor.close(); } } } // public long update(String word,String collection,Bson data) { // UpdateResult result = database.getCollection(collection).updateOne(Filters.eq("word",word), data); // return 1; //} // // 拿到content的来源 // public HashMap<String, String> getSource(String collection,String content) { // DBObject query = new BasicDBObject(); // query.put("content",content); // DBCursor cursor = database.getCollection(collection).find(query); // HashMap<String, String> map = new HashMap<String,String>(); // try { // while (cursor.hasNext()) { // DBObject recordObject = cursor.next(); // String id = recordObject.get("_id").toString(); // String libNum = recordObject.get("libNum").toString(); // String title = recordObject.get("title").toString(); // map.put("id",id); // map.put("libNum", libNum); // map.put("title",title); // break; // } // return map; // } catch (Exception e) { // e.printStackTrace();// TODO: handle exception // return map; // }finally { // if(cursor != null) { // cursor.close(); // } // } // // } // public List<String> findAllword(String collection){ // MongoCursor<Document> cursor = null; // List<String> result = new ArrayList<String>(); // try { // FindIterable<Document> iterable = database.getCollection(collection).find(); // if( iterable!= null) { // cursor = iterable.iterator(); // while(cursor.hasNext()) { // result.add(cursor.next().getString("word")); // } // } // return result; // } catch (Exception e) { // return null;// TODO: handle exception // } // } // // public Document findByword(String collection,String word) { // FindIterable<Document> iterable = database.getCollection(collection).find(Filters.eq("word",word)); // Document result = null; // if( iterable!= null) { // MongoCursor<Document> cursor = iterable.iterator(); // while(cursor.hasNext()) { // result=cursor.next(); // break; // } // } // return result; // } // // public List<String> getAllword(String collection){ // List<String> words = new ArrayList<String>(); // FindIterable<Document> iterable = database.getCollection(collection).find().projection(Projections.include("word")); // try { // if( iterable!= null) { // MongoCursor<Document> cursor = iterable.iterator(); // while(cursor.hasNext()) { // words.add(cursor.next().getString("word")); // } // } // return words; // } catch (Exception e) { // return null;// TODO: handle exception // } // } }
MongoUtils
package com.excellence.mongo; public class MongoUtils { public static String hostname="127.0.0.1"; public static int port = 27017; public static String dbname = "exiaresource"; public static String User = "admin"; public static String pwd = "123456"; private static MongoDataWorker worker = null; public synchronized static MongoDataWorker getMongoDataWorker() { if(worker != null) return worker; MongoDBInfo info = new MongoDBInfo(); info.setHostName(MongoUtils.hostname); info.setPort(MongoUtils.port); info.setPwd(MongoUtils.pwd.toCharArray()); info.setUser(MongoUtils.User); info.setDbName(MongoUtils.dbname); worker = new MongoDataWorker(info, ""); return worker; } }

浙公网安备 33010602011771号