JAVA多线程处理大量数据(二)--推荐

背景说明：要对服务器上一个目录进行全量文件读取

1、多线程执行类--FileThreadUtils.java

import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.io.FileUtil;
import com.alibaba.fastjson.JSONObject;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.util.*;
import java.util.concurrent.*;
import java.util.stream.Collectors;

/**
 * @Description: 文件多线程工具类
 * @Date: 2023/3/9 14:12
 * @Version: 1.0
 */
@
Slf4j@ Component
public class FileThreadUtils {

    /**
     * @description:  根据文件数量动态创建线程数
     * @date: 2023/3/10 15:55
     * @param fileNumber
     * @return java.lang.Long
     */
    public Integer getThreadSize(int fileNumber) {　　　　　　
        int prcessors = Runtime.getRuntime().availableProcessors();　　　　　　
        prcessors *= 2;　　　　　　
        prcessors += 1;　　　　　　
        return (fileNumber + prcessors - 1) / prcessors;
    }

    /**
     * @description:  使用多线程处理文件
     * @date: 2023/3/10 16:00
     * @param scanDTO
     * @return java.lang.String
     */
    public void scanAllFile(List < String > fileList) throws InterruptedException, ExecutionException {
        //所少条数据开启一个子线程
        Integer perCount = getThreadSize(fileList.size());
        List < List < String >> groupList = ListUtils.partition(fileList, perCount);
        //初始化线程池方式1
        //ExecutorService es = Executors.newFixedThreadPool(groupList.size());    
        //初始化线程池方式2（推荐）
        ThreadPoolExecutor es = new ThreadPoolExecutor(groupList.size(), groupList.size(), 1, TimeUnit.SECONDS, new ArrayBlockingQueue < > (1));
        List < FileTask > tasks = new ArrayList < > ();
        for (int i = 0; i < groupList.size(); i++) {
            log.info("一批数据加入线程池: " + i);
            tasks.add(new FileTask(groupList.get(i)));
        }
        List < Future < Map < String, Object >>> futures = es.invokeAll(tasks);
        es.shutdown();
        //汇总各个子线程的数据结果 
        //这里处理线程结果 
        for (int i = 0; i < futures.size(); i++) {
            Future < Map < String, Object >> mapFuture = futures.get(i);
            //System.out.println("index:" + i + ",future:"+ futures.get(i).get()); 
            Map < String, Object > stringObjectMap = mapFuture.get();
        }
    }

    /**
     * @description: 文件处理单线程
     * @date: 2023/3/10 16:01
     * @return
     */
    public static class FileTask implements Callable < Map < String, Object >> {
        List < String > subFileList;
        public FileTask(List < String > list) {
            this.subFileList = list;
        }
      
       @Override
        public Map < String, Object > call() throws Exception {
            for (String filePath: subFileList) {
                filePath = filePath.replaceAll("\\\\", "/");
                log.info(Thread.currentThread().getName() + " 线程开始处理文件 ：" + filePath);
            }
            Map < String, Object > subResultMap = new HashMap < > ();
            //log.info("subMap{}",subResultMap); 
            return subResultMap;
        }
    }
}

2、数据分组工具类-- ListUtils.java

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

/**
 * @Description: TODO
 * @Date: 2023/3/8 9:39
 * @Version: 1.0
 */
public class ListUtils {
    /**
     * 集合按长度分组
     *
     * @param list
     * @param size
     * @param <T>
     * @return
     */
    public static <T> List<List<T>> partition(final List<T> list, final int size) {
        if (list == null) {
            throw new IllegalArgumentException("List must not be null");


        }
        if (size <= 0) {
            throw new IllegalArgumentException("Size must be greater than 0");
        }
        List<List<T>> result = new ArrayList<>();
        Iterator<T> it = list.iterator();
        List<T> subList = null;
        while (it.hasNext()) {
            if (subList == null) {
                subList = new ArrayList<>();
            }
            T t = it.next();
            subList.add(t);
            if (subList.size() == size) {
                result.add(subList);
                subList = null;
            }
        }
        //补充最后一页
        if (subList != null) {
            result.add(subList);
        }
        return result;

    }
}

Tips: 异步处理数据

ExecutorService executor = Executors.newFixedThreadPool(1);
CompletableFuture<Map> map1Future = CompletableFuture.supplyAsync(() -> {return new HashMap();}, executor);
CompletableFuture.allOf(map1Future).get();

posted @ 2023-03-10 19:19 bug毁灭者阅读(774) 评论(0) 收藏举报

刷新页面返回顶部

BUG工厂

物必先腐，而后虫生

JAVA多线程处理大量数据(二)--推荐

公告