从Excel读取数据,然后分析相似的数据,多线程处理(多线程比较相似的字符串,统计出相似的数量及字符串)

之前的jar包有问题,现已修改.

需要的jar包,已修改

 

自己去Maven中央仓库下载jar包.

excel数据:

 

 

直接上代码.

程序再度优化了一遍.之后如果想再度精准,可能需要建模,最近没空继续做了.

实体类:

package org.analysisitem20181016.pojo;

public class Item {

    private int index;
    private int match_text_length;
    private String item_name;
    private String activity_id;
    private String type;
    private String user_id;
    private String selled_count;
    private int similarity; 
    private String matchText;
    
    public String getItem_name() {
        return item_name;
    }
    public void setItem_name(String item_name) {
        this.item_name = item_name;
    }
    public String getActivity_id() {
        return activity_id;
    }
    public void setActivity_id(String activity_id) {
        this.activity_id = activity_id;
    }
    public String getType() {
        return type;
    }
    public void setType(String type) {
        this.type = type;
    }
    public String getUser_id() {
        return user_id;
    }
    public void setUser_id(String user_id) {
        this.user_id = user_id;
    }
    public String getSelled_count() {
        return selled_count;
    }
    public void setSelled_count(String selled_count) {
        this.selled_count = selled_count;
    }
    public int getSimilarity() {
        return similarity;
    }
    public void setSimilarity(int similarity) {
        this.similarity = similarity;
    }
    public String getMatchText() {
        return matchText;
    }
    public void setMatchText(String matchText) {
        this.matchText = matchText;
    }
    public int getIndex() {
        return index;
    }
    public void setIndex(int index) {
        this.index = index;
    }
    public int getMatch_text_length() {
        return match_text_length;
    }
    public void setMatch_text_length(int match_text_length) {
        this.match_text_length = match_text_length;
    }
    
}

 

线程处理类(改良后使用了calculate2方法来匹配):

package org.analysisitem20181016.main;

import org.analysisitem20181016.pojo.Item;

public class ThreadMain implements Runnable{

    private int index;
    private Item item;
    
    public ThreadMain(int index, Item item){
        this.index = index;
        this.item = item;
    }
    
    @Override
    public void run() {
        System.out.println("任务" + index + "开始执行!");
        for(int i = 0; i < CompareMain.itemList.size(); i++){
            if(i == index){
                continue;
            }
            String text = item.getItem_name();
            String text2 = CompareMain.itemList.get(i).getItem_name();
            String initText = null;
            String initText2 = null;
            if(text.length() <= text2.length()){
                initText = text;
                initText2 = text2;
            }else{
                initText = text2;
                initText2 = text;
            }
//            String calculatedText = calculate(initText, initText, initText2, 0, 2);
            String calculatedText = calculate2(initText, initText, initText2, 0, 2);
            /*if(initText.equals("蒜瓣肉")){
                System.out.println(item.getSimilarity());
                if(item.getSimilarity() > 9){
                    System.out.println("initText:" + initText);
                    System.out.println("text:" + text);
                    System.out.println("text2:" + text2);
                }
            }*/
            if(calculatedText != null && calculatedText.equals("")){
                calculatedText = "无匹配数据";
            }
            if(calculatedText != null && !calculatedText.equals("无匹配数据")){
//                System.out.println("匹配字符串:" + calculatedText);
                item.setMatchText(calculatedText);
                item.setSimilarity(item.getSimilarity() + 1);
            }
        }
        /*if(item.getItem_name().equals("蒜瓣肉") && item.getSimilarity() > 9){
            System.out.println("相似数量:" + item.getSimilarity());
        }*/
        CompareMain.calculatedItemList.add(item);
    }
    
    public static String calculate2(String initText, String text, String initText2, int beginIndex, int len){
        String subText = null;
        if(initText2.contains(text)){
            if(initText.equals("芹菜文") && initText2.equals("芹菜文")){
                System.out.println(4);
                System.out.println("4最后结果:" + text);
                System.out.println("4结束!");
            }
            return text;
        }else{
            while(initText.length() < len){
                len--;
            }
            if(len >= CompareMain.minTextLen){
                if(initText.equals("芹菜文")){
                    System.out.println(1);
                }
                if(beginIndex + len < initText.length()){
                    subText = initText.substring(beginIndex, beginIndex + len);
                    beginIndex++;
                    return calculate2(initText, subText, initText2, beginIndex, len);
                }else if(beginIndex + len >= initText.length()){
                    subText = initText.substring(beginIndex);
                    beginIndex = 0;
                    len--;
                    return calculate2(initText, subText, initText2, beginIndex, len);
                }
            }
        }
        return null;
    }
    
    public static String calculate(String initText, String text, String text2, int beginIndex, int len){
        if(text2.contains(text)){
            return text;
        }else{
            String subText = null;
            if(len < initText.length()){
                if(beginIndex + len < initText.length()){
                    subText = initText.substring(beginIndex, beginIndex + len);
                }else{
                    subText = initText.substring(beginIndex);
                }
//                System.out.println("subText:" + subText);
                if(subText.length() == len){
//                    System.out.println("subText.length():" + subText.length());
                    beginIndex++;
                    return calculate(initText, subText, text2, beginIndex, len);
                }
            }
        }
        return null;
    }
}

 

修复了一个bug.

分析主类(改变了一点代码,逻辑没变):

package org.analysisitem20181016.main;

import java.io.File;
import java.io.FileOutputStream;
import java.util.ArrayList;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;

import org.analysisitem20181016.pojo.Item;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;

public class CompareMain{

    public static ArrayList<Item> itemList = new ArrayList<Item>();
    private static String replaceReg = "[^\u4e00-\u9fa5]+";
    public static int maxTextLen = 4;
    public static int minTextLen = 2;
    public static ArrayList<Item> calculatedItemList = new ArrayList<Item>();
    
    public static void main(String[] args){
        try{
            CompareMain compareMain = new CompareMain();
            compareMain.readExcel();
//            compareMain.compare();
            compareMain.subsectionCalculate();
            compareMain.show();
            compareMain.writeExcel();
        }catch(Exception e) {
            e.printStackTrace();
        }
    }
    
    public void writeExcel() throws Exception{
        File file = new File("G:/Database/Item20181016_YangBing/notitle2.xls");
        Workbook wb = new HSSFWorkbook();
        Sheet sheet = wb.createSheet();
        Row row = sheet.createRow(0);
        Cell cell = row.createCell(0);
        cell.setCellValue("item_name");
        cell = row.createCell(1);
        cell.setCellValue("activity_id");
        cell = row.createCell(2);
        cell.setCellValue("type");
        cell = row.createCell(3);
        cell.setCellValue("user_id");
        cell = row.createCell(4);
        cell.setCellValue("selled_count");
        cell = row.createCell(5);
        cell.setCellValue("相似数量");
        cell = row.createCell(6);
        cell.setCellValue("匹配字符串");
        for (int i = 0; i < calculatedItemList.size(); i++) {
            Item item = calculatedItemList.get(i);
            if(item != null){
                row = sheet.createRow(i + 1);
                cell = row.createCell(0);
                cell.setCellValue(item.getItem_name());
                cell = row.createCell(1);
                cell.setCellValue(item.getActivity_id());
                cell = row.createCell(2);
                cell.setCellValue(item.getType());
                cell = row.createCell(3);
                cell.setCellValue(item.getUser_id());
                cell = row.createCell(4);
                cell.setCellValue(item.getSelled_count());
                cell = row.createCell(5);
                /*if(item.getItem_name().equals("蒜瓣肉")){
                    System.out.println("相似数量:" + item.getSimilarity());
                }*/
                cell.setCellValue(item.getSimilarity());
                cell = row.createCell(6);
                cell.setCellValue(item.getMatchText());
            }
        }
        FileOutputStream fos = new FileOutputStream(file);
        wb.write(fos);
        fos.flush();
        fos.close();
        wb.close();
        System.out.println("写入Excel文件完成!");
    }
    
    public void show(){
//        System.out.println(calculatedItemList.size());
        for(Item item : calculatedItemList){
            if(item != null){
//                System.out.println("item_name:" + item.getItem_name() + ",匹配字符串:" + item.getMatchText() + ",count:" + item.getSimilarity());
            }
        }
    }
    
    public void subsectionCalculate() throws Exception{
        LinkedBlockingQueue<Runnable> workQueue = new LinkedBlockingQueue<Runnable>();
        int size = itemList.size();
        ThreadPoolExecutor executor = new ThreadPoolExecutor(size, size, 7200, TimeUnit.SECONDS, workQueue);
        for(int i = 0; i < itemList.size(); i++){
            Item outerItem = itemList.get(i);
            ThreadMain threadMain = new ThreadMain(i, outerItem);
            executor.execute(threadMain);
        }
        while(true){
            if(executor.getCompletedTaskCount() >= size){
                executor.shutdown();
                executor.shutdownNow();
                break;
            }
            Thread.sleep(1000);
        }
    }
    
    /*public void compare(){
        System.out.println("正在比较中...");
        for(int i = 0; i < itemList.size(); i++){
            Item outerItem = itemList.get(i);
            for(int j = i + 1; j < itemList.size(); j++){
                Item innerItem = itemList.get(j);
                String outerItemName = outerItem.getItem_name();
                String innerItemName = innerItem.getItem_name();
                if(!filtered){
                    outerItemName = outerItemName.replaceAll(replaceReg, "");
                    innerItemName = innerItemName.replaceAll(replaceReg, "");
                }
//                int count = calculate(outerItemName, innerItemName, initialLen);
                outerItem.setSimilarity(outerItem.getSimilarity() + count);
            }
//            calculatedItemList.add(outerItem);
        }
        System.out.println("计算完毕!");
    }*/
    
    public void readExcel() throws Exception{
        File file = new File("G:/Database/Item20181016_YangBing/notitle.xls");
        POIFSFileSystem fs = new POIFSFileSystem(file);
        Workbook wb = new HSSFWorkbook(fs);
//        int sheet_size = wb.getNumberOfSheets();
        Sheet sheet = wb.getSheetAt(0);
        for(int i = 1; i < sheet.getPhysicalNumberOfRows(); i++){
            Row row = sheet.getRow(i);
            Item item = new Item();
            for(int j = 0; j < row.getLastCellNum(); j++){
                Cell cell = row.getCell(j);
                if(j == 0){
                    String item_name = cell.getStringCellValue();
                    item_name = item_name.replaceAll(replaceReg, "");
                    item.setItem_name(item_name);
                }else if(j == 1){
                    double activity_id = cell.getNumericCellValue();
                    item.setActivity_id((long)activity_id + "");
                }else if(j == 2){
                    String type = cell.getStringCellValue();
                    item.setType(type);
                }else if(j == 3){
                    double user_id = cell.getNumericCellValue();
                    item.setUser_id((long)user_id + "");
                }else if(j == 4){
                    double selled_count = cell.getNumericCellValue();
                    item.setSelled_count((long)selled_count + "");
                }
            }
            itemList.add(item);
        }
        wb.close();
        fs.close();
    }
    
}

 

现在可以匹配多个字符了,会有一点bug,暂时没空解决.

好了,有兴趣的自己看代码吧!

 解析结果:

 

 非常有问题,但是暂时没空也没心思解决.

posted @ 2018-10-18 16:12  ラピスラズリ(Dawn)  阅读(680)  评论(0编辑  收藏  举报