第一篇
package com.zxtech.aig.java_study.similar; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; import java.util.List; public class SimilarCrops { public static void main(String[] args) throws IOException { List<String> strList = new ArrayList<String>(); save(simillar(loaddata())); } public static List<String> loaddata() throws IOException { String wordVectFilename="src/data/all_word_vec_200_hs.txt"; String allSentenceFilename="src/data/副本导出HGE已删除价格.txt"; String allVectFilename="src/data/all_sentence_vect_1872.txt"; String userDictFilename="src/data/nze.txt"; SentenceNlpTool.init(wordVectFilename, allSentenceFilename, allVectFilename, userDictFilename); List<String> strList = new ArrayList<String>(); try (FileReader reader = new FileReader(allSentenceFilename); BufferedReader br = new BufferedReader(reader)) { String line; while ((line = br.readLine()) != null) { strList.add(line); } } catch (IOException e) { e.printStackTrace(); } return strList; } public static List<List<String>> simillar (List<String> strList) { double sim; double maxsim = 0; int index = 0; int a=0; List<List<String>> cropslist = new ArrayList<List<String>>(); for (int i = 0; i < 1872; i++) { //System.out.println(strList.size()); for (int j = 0; j < cropslist.size(); j++) { sim = 0; maxsim = 0; for(int k = 0; k < cropslist.get(j).size(); k++) { double[] vect1 = SentenceNlpTool.getSentenceVector(cropslist.get(j).get(k)); double[] vect2 = SentenceNlpTool.getSentenceVector(strList.get(0)); sim = sim + SentenceNlpTool.calTwoSentenceSimilar(vect1,vect2); } sim = sim/cropslist.get(j).size(); //计算strList(i)与cropList(j)的平均相似度 if(sim>maxsim) { index = j; maxsim=sim; } } if (maxsim>=0.6) { cropslist.get(index).add(strList.get(0)); strList.remove(0); //System.out.println(maxsim); } else { List<String> list = new ArrayList<String>(); list.add(strList.get(0)); cropslist.add(list); strList.remove(0); } } for(int i = 0; i < cropslist.size(); i++) { if(cropslist.get(i).size()<=5) { cropslist.remove(i);i--; a++; } } System.out.println(a); System.out.println(cropslist.size()); return cropslist; } public static void save(List<List<String>> croplist) throws IOException { for (int i=0;i<croplist.size();i++) { File writeName = new File("D:/result/" + i + ".txt"); writeName.createNewFile(); try ( FileWriter writer = new FileWriter(writeName); BufferedWriter out = new BufferedWriter(writer)) { for (int j=0;j<croplist.get(i).size();j++) out.write(croplist.get(i).get(j) + "\n"); } } } }
我不怕千万人阻挡,只怕自己投降