Java 热词计算
一、pom引入字典
<dependency>
<groupId>com.hankcs</groupId>
<artifactId>hanlp</artifactId>
<version>portable-1.8.4</version>
</dependency>
<!-- 引入 apache commons-compress 依赖 -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
<version>1.26.0</version>
</dependency>
二、配置文件添加过滤词
hotword: stop-words: - 的 - 了 - 在 - 是 - 我 - 有 - 和 - 就 - 不 - 人 - 都 - 一 - 一个 - 上 - 也 - 很 - 到 - 说 - 要 - 去 - 你 - 会 - 着 - 没有 - 看 - 好 - 自己 - 这 - 那 - 他 - 她 - 它 - 们 - 这个 - 那个 - 什么 - 怎么 - 可以 - 没 - 与 - 还 - 之 - 大 - 小 - 多 - 少 - 啊 - 呀 - 呢 - 吧 - 嘛 - 啦 - 哦 - 嗯 - 哎 - 一个人 - 一头 - 一只
三、读取配置
import lombok.Data; import org.springframework.boot.context.properties.ConfigurationProperties; import org.springframework.stereotype.Component; import java.util.List; @Data @Component @ConfigurationProperties(prefix = "hotword") public class HotWordProperties { /** * 停用词列表 */ private List<String> stopWords; }
四、热词处理
import cn.hutool.core.date.DateUtil; import xxx.config.HotWordProperties; import com.hankcs.hanlp.HanLP; import com.hankcs.hanlp.seg.common.Term; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.jdbc.core.JdbcTemplate; import org.springframework.stereotype.Component; import java.time.YearMonth; import java.time.format.DateTimeFormatter; import java.util.*; import java.util.stream.Collectors; @Slf4j @Component public class HotWordCalculator { @Autowired private JdbcTemplate jdbcTemplate; @Autowired private HotWordProperties hotWordProperties; private static final DateTimeFormatter MONTH_FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM"); // private static final Set<String> STOP_WORDS = new HashSet<>(Arrays.asList( // "的", "了", "在", "是", "我", "有", "和", "就", "不", "人", // "都", "一", "一个", "上", "也", "很", "到", "说", "要", "去", // "你", "会", "着", "没有", "看", "好", "自己", "这", "那", // "他", "她", "它", "们", "这个", "那个", "什么", "怎么", "可以", // "没", "与", "还", "而", "之", "大", "小", "多", "少", // "啊", "呀", "呢", "吧", "嘛", "啦", "哦", "嗯", "哎", // "一个人", "一头", "一只", "一个" // )); private Set<String> getStopWords() { List<String> stopWords = hotWordProperties.getStopWords(); return stopWords != null ? new HashSet<>(stopWords) : new HashSet<>(); } public void calculateHotWords(String startDate, String endDate) { log.info("开始计算热词,时间范围:{} 至 {}", startDate, endDate); String sql = "SELECT A_CONTENT, B_TIME FROM TAB " + "WHERE B_TIME >= TO_DATE(?, 'yyyy-MM-dd') " + "AND B_TIME < TO_DATE(?, 'yyyy-MM-dd')"; List<Map<String, Object>> events = jdbcTemplate.queryForList(sql, startDate, endDate); log.info("共查询到 {} 条事件数据", events.size()); Map<String, Map<String, Integer>> monthlyHotWords = new HashMap<>(); for (Map<String, Object> event : events) { String content = (String) event.get("A_CONTENT"); Date registerTime = (Date) event.get("B_TIME"); if (content == null || registerTime == null) { continue; } String month = DateUtil.format(registerTime, "yyyy-MM"); List<String> words = extractWords(content); monthlyHotWords.computeIfAbsent(month, k -> new HashMap<>()); Map<String, Integer> wordCount = monthlyHotWords.get(month); for (String word : words) { wordCount.put(word, wordCount.getOrDefault(word, 0) + 1); } } for (Map.Entry<String, Map<String, Integer>> entry : monthlyHotWords.entrySet()) { String month = entry.getKey(); log.info("++ 处理月份:{}", month); Map<String, Integer> wordCount = entry.getValue(); // 按数量降序排序,相同数量按名称升序排序 LinkedHashMap<String, Integer> sortedWordCount = wordCount.entrySet() .stream() .sorted(Map.Entry.<String, Integer>comparingByValue().reversed() .thenComparing(Map.Entry.comparingByKey())) .collect(Collectors.toMap( Map.Entry::getKey, Map.Entry::getValue, (e1, e2) -> e1, LinkedHashMap::new )); for (Map.Entry<String, Integer> wordEntry : sortedWordCount.entrySet()) { String word = wordEntry.getKey(); Integer num = wordEntry.getValue(); log.info("{} =》 {}", word, num); } } log.info("热词计算完成"); } private List<String> extractWords(String text) { if (text == null || text.trim().isEmpty()) { return new ArrayList<>(); } Set<String> stopWords = getStopWords(); List<Term> terms = HanLP.segment(text); return terms.stream() .map(term -> term.word.trim()) .filter(word -> !word.isEmpty()) .filter(word -> !stopWords.contains(word)) .filter(word -> word.length() >= 2) .filter(word -> !word.matches(".*[0-9a-zA-Z].*") || word.length() > 2) .collect(Collectors.toList()); } }
五、调用
import xx.utils.HotWordCalculator; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.CommandLineRunner; import org.springframework.stereotype.Component; import javax.annotation.PostConstruct; @Component @Slf4j public class TestJob implements CommandLineRunner { @Autowired private HotWordCalculator hotWordCalculator; @Override public void run(String... args) throws Exception { log.info("this is => CommandLineRunner"); hotWordCalculator.calculateHotWords("2026-01-01", "2026-03-30"); } @PostConstruct public void test() { log.info("this is => PostConstruct"); } }
有些事情,没经历过不知道原理,没失败过不明白奥妙,没痛苦过不了解真谛。临渊羡鱼,不如退而结网!

浙公网安备 33010602011771号