第一篇

 

 

package com.zxtech.aig.java_study.similar;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class SimilarCrops {

    public static void main(String[] args) throws IOException {
        List<String> strList = new ArrayList<String>();
        save(simillar(loaddata()));
    }
    public static List<String> loaddata() throws IOException {
        String wordVectFilename="src/data/all_word_vec_200_hs.txt";
        String allSentenceFilename="src/data/副本导出HGE已删除价格.txt";
        String allVectFilename="src/data/all_sentence_vect_1872.txt";
        String userDictFilename="src/data/nze.txt";
        SentenceNlpTool.init(wordVectFilename, allSentenceFilename, allVectFilename, userDictFilename);
        List<String> strList = new ArrayList<String>();
        try (FileReader reader = new FileReader(allSentenceFilename);
                BufferedReader br = new BufferedReader(reader)) {
            String line;
            while ((line = br.readLine()) != null) {
                strList.add(line);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        return strList;
    }
    public static List<List<String>> simillar (List<String> strList) {
        double sim;
        double maxsim = 0;
        int index = 0;
        int a=0;
        List<List<String>> cropslist = new ArrayList<List<String>>();
            for (int i = 0; i < 1872; i++) {
                //System.out.println(strList.size());
                for (int j = 0; j < cropslist.size(); j++) {
                    sim = 0;
                    maxsim = 0;
                    for(int k = 0; k < cropslist.get(j).size(); k++)
                    {
                        double[] vect1 = SentenceNlpTool.getSentenceVector(cropslist.get(j).get(k));
                        double[] vect2 = SentenceNlpTool.getSentenceVector(strList.get(0));
                        sim = sim + SentenceNlpTool.calTwoSentenceSimilar(vect1,vect2);
                    }
                        sim = sim/cropslist.get(j).size();
                    //计算strList(i)与cropList(j)的平均相似度
                    if(sim>maxsim) {
                        index = j;
                        maxsim=sim;
                    }
                }
                if (maxsim>=0.6) {
                    cropslist.get(index).add(strList.get(0));
                    strList.remove(0);
                    //System.out.println(maxsim);
                } else {
                    List<String> list = new ArrayList<String>();
                    list.add(strList.get(0));
                    cropslist.add(list);
                    strList.remove(0);
                }
            }
            for(int i = 0; i < cropslist.size(); i++)
            {
                if(cropslist.get(i).size()<=5)
                {
                    cropslist.remove(i);i--;
                    a++;
                }
            }
            System.out.println(a);
            System.out.println(cropslist.size());
            return cropslist;
    }
    public static void save(List<List<String>> croplist) throws IOException 
    {
        for (int i=0;i<croplist.size();i++)
        {
        File writeName = new File("D:/result/" + i + ".txt");
        writeName.createNewFile();
        try (    FileWriter writer = new FileWriter(writeName);
                BufferedWriter out = new BufferedWriter(writer)) {
            for (int j=0;j<croplist.get(i).size();j++)
            out.write(croplist.get(i).get(j) + "\n");
        }
        }
        
    }
}

 

posted @ 2019-08-22 15:23  寒星暖月  阅读(218)  评论(0编辑  收藏  举报