JAVA多线程处理大量数据(二)--推荐
背景说明:要对服务器上一个目录进行全量文件读取
1、多线程执行类--FileThreadUtils.java
import cn.hutool.core.collection.CollUtil; import cn.hutool.core.io.FileUtil; import com.alibaba.fastjson.JSONObject; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.StringUtils; import org.springframework.stereotype.Component; import java.io.IOException; import java.util.*; import java.util.concurrent.*; import java.util.stream.Collectors; /** * @Description: 文件多线程工具类 * @Date: 2023/3/9 14:12 * @Version: 1.0 */ @ Slf4j@ Component public class FileThreadUtils { /** * @description: 根据文件数量动态创建线程数 * @date: 2023/3/10 15:55 * @param fileNumber * @return java.lang.Long */ public Integer getThreadSize(int fileNumber) { int prcessors = Runtime.getRuntime().availableProcessors(); prcessors *= 2; prcessors += 1; return (fileNumber + prcessors - 1) / prcessors; } /** * @description: 使用多线程处理文件 * @date: 2023/3/10 16:00 * @param scanDTO * @return java.lang.String */ public void scanAllFile(List < String > fileList) throws InterruptedException, ExecutionException { //所少条数据开启一个子线程 Integer perCount = getThreadSize(fileList.size()); List < List < String >> groupList = ListUtils.partition(fileList, perCount); //初始化线程池方式1 //ExecutorService es = Executors.newFixedThreadPool(groupList.size()); //初始化线程池方式2(推荐) ThreadPoolExecutor es = new ThreadPoolExecutor(groupList.size(), groupList.size(), 1, TimeUnit.SECONDS, new ArrayBlockingQueue < > (1)); List < FileTask > tasks = new ArrayList < > (); for (int i = 0; i < groupList.size(); i++) { log.info("一批数据加入线程池: " + i); tasks.add(new FileTask(groupList.get(i))); } List < Future < Map < String, Object >>> futures = es.invokeAll(tasks); es.shutdown(); //汇总各个子线程的数据结果 //这里处理线程结果 for (int i = 0; i < futures.size(); i++) { Future < Map < String, Object >> mapFuture = futures.get(i); //System.out.println("index:" + i + ",future:"+ futures.get(i).get()); Map < String, Object > stringObjectMap = mapFuture.get(); } } /** * @description: 文件处理单线程 * @date: 2023/3/10 16:01 * @return */ public static class FileTask implements Callable < Map < String, Object >> { List < String > subFileList; public FileTask(List < String > list) { this.subFileList = list; } @Override public Map < String, Object > call() throws Exception { for (String filePath: subFileList) { filePath = filePath.replaceAll("\\\\", "/"); log.info(Thread.currentThread().getName() + " 线程开始处理文件 :" + filePath); } Map < String, Object > subResultMap = new HashMap < > (); //log.info("subMap{}",subResultMap); return subResultMap; } } }
2、数据分组工具类-- ListUtils.java
import java.util.ArrayList; import java.util.Iterator; import java.util.List; /** * @Description: TODO * @Date: 2023/3/8 9:39 * @Version: 1.0 */ public class ListUtils { /** * 集合按长度分组 * * @param list * @param size * @param <T> * @return */ public static <T> List<List<T>> partition(final List<T> list, final int size) { if (list == null) { throw new IllegalArgumentException("List must not be null"); } if (size <= 0) { throw new IllegalArgumentException("Size must be greater than 0"); } List<List<T>> result = new ArrayList<>(); Iterator<T> it = list.iterator(); List<T> subList = null; while (it.hasNext()) { if (subList == null) { subList = new ArrayList<>(); } T t = it.next(); subList.add(t); if (subList.size() == size) { result.add(subList); subList = null; } } //补充最后一页 if (subList != null) { result.add(subList); } return result; } }
Tips: 异步处理数据
ExecutorService executor = Executors.newFixedThreadPool(1); CompletableFuture<Map> map1Future = CompletableFuture.supplyAsync(() -> {return new HashMap();}, executor); CompletableFuture.allOf(map1Future).get();

浙公网安备 33010602011771号