木心

毕竟几人真得鹿,不知终日梦为鱼

导航

lucene案例-blog

  本demo功能:

  1)新增博客,并添加lucene索引;以及更新、删除博客(同时维护索引);以及通过lucene索引搜索博客;

  2)添加lucene索引时使用lucene-analyzers-smartcn中文分词,搜索的结果进行高亮显示。

 

  demo结构

 

  pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <parent>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-parent</artifactId>
        <version>2.1.1.RELEASE</version>
        <relativePath /> <!-- lookup parent from repository -->
    </parent>
    <groupId>com.oy</groupId>
    <artifactId>blog</artifactId>
    <version>1.0.0</version>
    <packaging>jar</packaging>
    <name>blog-demo</name>
    <description>Demo project for Spring Boot</description>

    <properties>
        <java.version>1.8</java.version>
    </properties>

    <dependencies>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-web</artifactId>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-tomcat</artifactId>
            <scope>provided</scope>
        </dependency>

        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-thymeleaf</artifactId>
        </dependency>

        <dependency>
            <groupId>org.mybatis.spring.boot</groupId>
            <artifactId>mybatis-spring-boot-starter</artifactId>
            <version>1.3.2</version>
        </dependency>
        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>5.1.36</version>
        </dependency>

        <dependency>
            <groupId>commons-lang</groupId>
            <artifactId>commons-lang</artifactId>
            <version>2.5</version>
        </dependency>

        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.75</version>
        </dependency>

        <!-- 百度编辑器ueditor -->
        <dependency>
            <groupId>commons-fileupload</groupId>
            <artifactId>commons-fileupload</artifactId>
            <version>1.3.1</version>
        </dependency>

        <!-- lucene -->
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
            <version>5.3.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-analyzers-common</artifactId>
            <version>5.3.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-analyzers-smartcn</artifactId>
            <version>5.3.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-queryparser</artifactId>
            <version>5.3.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-highlighter</artifactId>
            <version>5.3.1</version>
        </dependency>

    </dependencies>

    <build>
        <plugins>
            <plugin>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-maven-plugin</artifactId>
            </plugin>
        </plugins>
    </build>

</project>

  application.properties

server.port=80
server.servlet.context-path=/

logging.level.root=info
logging.file=d:/logs/boot-demo.log

#datasource
spring.datasource.driver-class-name=com.mysql.jdbc.Driver
spring.datasource.url=jdbc:mysql://127.0.0.1:3306/db_blog?useUnicode=true&characterEncoding=utf8&serverTimezone=GMT%2B8
spring.datasource.username=root
spring.datasource.password=
spring.datasource.tomcat.min-idle=5

##################### MyBatis相关配置 [start] #####################
#MyBatis映射文件
mybatis.mapper-locations=classpath:com/oy/mapping/*.xml
#扫描生成实体的别名,需要和注解@Alias联合使用
mybatis.type-aliases-package=com.oy.entity
#MyBatis配置文件,当你的配置比较复杂的时候,可 以使用
#mybatis.config-location=
#级联延迟加载。true:开启延迟加载
mybatis.configuration.lazy-loading-enabled=true
#积极的懒加载。false:按需加载
mybatis.configuration.aggressive-lazy-loading=false
##################### MyBatis相关配置 [end]  ######################

# 博客索引库目录
indexDir=D:/blogLuceneIndexDir
# 通过关键字查询博客索引库,对结果分页展示时每页的记录数
blogLuceneIndexShowRows=10

  sql.txt

CREATE TABLE `blog` (
  `id` int(11) NOT NULL AUTO_INCREMENT,
  `title` varchar(200) DEFAULT NULL,
  `summary` varchar(400) DEFAULT NULL,
  `releaseDate` datetime DEFAULT NULL,
  `content` text,
  `keyWord` varchar(200) DEFAULT NULL,
  `contentNoTag` text,
  PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

  BlogController:新增博客,并添加lucene索引;以及更新、删除博客(同时维护索引);以及通过lucene索引搜索博客

package com.oy.controller;

import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.ResponseBody;
import org.springframework.web.servlet.config.annotation.ViewControllerRegistry;
import org.springframework.web.servlet.config.annotation.WebMvcConfigurer;

import com.oy.entity.Blog;
import com.oy.lucene.BlogIndex;
import com.oy.service.BlogService;
import com.oy.util.StringUtil;

/**
 * @author oy
 * @version 1.0
 * @date 2021年1月24日
 * @time 下午5:04:21
 */
@Controller
@RequestMapping("/blog")
public class BlogController implements WebMvcConfigurer {

    @Value("${indexDir}")
    private String indexDir;
    // 通过关键字查询博客索引库,对结果分页展示时每页的记录数
    @Value("${blogLuceneIndexShowRows}")
    private Integer rows;

    @Autowired
    private BlogService blogService;

    @Override
    public void addViewControllers(ViewControllerRegistry registry) {
        registry.addViewController("/page/blog/save").setViewName("blog/save");
    }

    @GetMapping("/list")
    public String list(Model model) {
        Map<String, Object> queryInfo = new HashMap<>();
        model.addAttribute("blogList", blogService.findBlog(queryInfo));
        return "blog/list";
    }

    @GetMapping("/preEdit/{id}")
    public String preEdit(@PathVariable Integer id, Model model) {
        Blog blog = blogService.findById(id);
        model.addAttribute("blog", blog);
        return "blog/save";
    }

    @PostMapping("/save")
    public void save(Blog blog, Model model) throws Exception {
        BlogIndex blogIndex = new BlogIndex(indexDir);
        
        if (blog.getId() != null) { // 修改
            blogService.update(blog);
            blogIndex.updateIndex(blog); // 更新博客索引
        } else { // 添加
            blog.setReleaseDate(new Date());
            blogService.add(blog);
            blogIndex.addIndex(blog); // 给博客添加索引
        }
    }

    @GetMapping("/del/{id}")
    public String del(@PathVariable Integer id, Model model) throws Exception {
        blogService.deleteById(id);
        BlogIndex blogIndex = new BlogIndex(indexDir);
        blogIndex.deleteIndex("" + id);
        return "redirect:/blog/list";
    }

    @GetMapping("/{id}")
    @ResponseBody
    public Blog findById(@PathVariable Integer id, Model model) {
        Blog blog = blogService.findById(id);
        return blog;
    }

    // ==================================================================
    /**
     * 根据关键字查询相关博客信息
     * 
     * @param q
     *            搜索关键字
     * @param page
     *            当前页
     * @return
     * @throws Exception
     */
    @RequestMapping("/q")
    public String search(@RequestParam(value = "q", required = true) String q,
            @RequestParam(value = "page", required = false) String page, Model model) throws Exception {
        if (StringUtil.isEmpty(q)) {
            return "redirect:/blog/list";
        }
        
        // 前台不传当前页参数,则默认显示第1页
        if (StringUtil.isEmpty(page)) {
            page = "1";
        }

        // 根据关键字查询相关博客信息
        BlogIndex blogIndex = new BlogIndex(indexDir);
        List<Blog> blogList = blogIndex.searchBlog(q);
        System.out.println("据关键字查询相关博客信息, blogList:" + blogList);

        // 从blogList集合中取对应数据进行分页展示
        int start = (Integer.parseInt(page) - 1) * rows;
        int end = start + 10;
        if (end > blogList.size()) {
            end = blogList.size();
        }
        // subList方法返回索引[start,end)的list子集,包左不包右
        List<Blog> BlogSubList = blogList.subList(start, end);

        model.addAttribute("blogList", BlogSubList);
        model.addAttribute("q", q);
        model.addAttribute("resultTotal", blogList.size());
        return "blog/query";
    }
}

  BlogIndex:操作lucene索引

package com.oy.lucene;

import java.io.StringReader;
import java.nio.file.Paths;
import java.util.LinkedList;
import java.util.List;

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

import com.oy.entity.Blog;
import com.oy.util.DateUtil;
import com.oy.util.StringUtil;

/**
 * 给博客添加索引
 * 
 * @author oy
 * @version 1.0
 * @date 2018年12月5日
 * @time 下午4:22:55
 */
public class BlogIndex {
    private String indexDir; // 索引库目录

    /**
     * 构造方法
     * 
     * @param indexDir
     *            索引库目录
     * @throws Exception
     */
    public BlogIndex(String indexDir) throws Exception {
        this.indexDir = indexDir;
    }

    /**
     * 获取IndexWriter实例
     * 
     * @return
     * @throws Exception
     */
    public IndexWriter getIndexWriter() throws Exception {
        // 索引库目录
        Directory dir = FSDirectory.open(Paths.get(indexDir));

        // 使用中文分词器SmartChineseAnalyzer
        SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();

        IndexWriterConfig config = new IndexWriterConfig(analyzer);
        IndexWriter writer = new IndexWriter(dir, config);
        return writer;
    }

    /**
     * 添加索引
     * 
     * @param dataDir
     *            数据源目录
     * @throws Exception
     */
    public void addIndex(Blog blog) throws Exception {
        IndexWriter writer = getIndexWriter();
        Document doc = new Document();
        doc.add(new StringField("id", String.valueOf(blog.getId()), Field.Store.YES));
        doc.add(new TextField("title", blog.getTitle(), Field.Store.YES));
        doc.add(new StringField("releaseDate", DateUtil.formatDate(blog.getReleaseDate(), "yyyy-MM-dd HH:mm:ss"),
                Field.Store.YES));
        // content实际存储的是contentNoTag,即去除html标签后的内容
        doc.add(new TextField("content", blog.getContentNoTag(), Field.Store.YES));
        writer.addDocument(doc);
        writer.close();
    }

    /**
     * 删除指定博客的索引
     * 
     * @param blogId
     * @throws Exception
     */
    public void deleteIndex(String blogId) throws Exception {
        IndexWriter writer = getIndexWriter();
        writer.deleteDocuments(new Term("id", blogId));
        writer.forceMergeDeletes(); // 强制删除
        writer.commit();
        writer.close();
    }

    /**
     * 更新博客索引
     * 
     * @param blog
     * @throws Exception
     */
    public void updateIndex(Blog blog) throws Exception {
        IndexWriter writer = getIndexWriter();
        Document doc = new Document();
        doc.add(new StringField("id", String.valueOf(blog.getId()), Field.Store.YES));
        doc.add(new TextField("title", blog.getTitle(), Field.Store.YES));
        doc.add(new StringField("releaseDate", DateUtil.formatDate(blog.getReleaseDate(), "yyyy-MM-dd HH:mm:ss"),
                Field.Store.YES));
        doc.add(new TextField("content", blog.getContentNoTag(), Field.Store.YES));
        writer.updateDocument(new Term("id", String.valueOf(blog.getId())), doc);
        writer.close();
    }

    /**
     * 通过关键字搜索博客
     * 
     * @param queryStr
     *            搜索关键字
     * @return
     * @throws Exception
     */
    public List<Blog> searchBlog(String queryStr) throws Exception {
        // 创建IndexSearch对象
        Directory dir = FSDirectory.open(Paths.get(indexDir));
        IndexReader reader = DirectoryReader.open(dir);
        IndexSearcher indexSearcher = new IndexSearcher(reader);

        // 组合查询BooleanQuery
        BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();

        // 中文分词器smartcn
        SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();

        // 第一个查询条件:查询title
        QueryParser parser = new QueryParser("title", analyzer);
        Query query = parser.parse(queryStr);

        // 第二个查询条件:查询content
        QueryParser parser2 = new QueryParser("content", analyzer);
        Query query2 = parser2.parse(queryStr);

        booleanQuery.add(query, BooleanClause.Occur.SHOULD);
        booleanQuery.add(query2, BooleanClause.Occur.SHOULD);

        // 执行搜索
        TopDocs hits = indexSearcher.search(booleanQuery.build(), 100);

        // 对搜索结果进行高亮设置
        QueryScorer scorer = new QueryScorer(query);
        Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
        SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<b><font color='blue'>", "</font></b>");
        Highlighter highlighter = new Highlighter(simpleHTMLFormatter, scorer);
        highlighter.setTextFragmenter(fragmenter);

        List<Blog> blogList = new LinkedList<Blog>();
        for (ScoreDoc scoreDoc : hits.scoreDocs) {
            Document doc = indexSearcher.doc(scoreDoc.doc);
            Blog blog = new Blog();
            blog.setId(Integer.parseInt(doc.get("id")));
            blog.setReleaseDate(DateUtil.formatString(doc.get("releaseDate"), "yyyy-MM-dd HH:mm:ss"));

            // 先获取title文本
            String title = doc.get("title");

            // 先获取content文本,并对文本中特殊字符进行转义
            // String content = StringEscapeUtils.escapeHtml(doc.get("content"));
            String content = doc.get("content");
            System.out.println("索引库存储的content:" + content);

            // 然后,对title文本中"命中率最高的部分"进行高亮显示
            if (title != null) {
                TokenStream tokenStream = analyzer.tokenStream("title", new StringReader(title));
                // 获取title文本中"命中率最高的部分"
                String hTitle = highlighter.getBestFragment(tokenStream, title);
                if (StringUtil.isEmpty(hTitle)) {
                    // 如果没有命中,将整个title文本设置给blog对象
                    blog.setTitle(title);
                } else {
                    blog.setTitle(hTitle);
                }
            }

            // 然后,对content文本中"命中率最高的部分"进行高亮显示
            if (content != null) {
                TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(content));
                // 获取content文本中"命中率最高的部分"
                String hContent = highlighter.getBestFragment(tokenStream, content);
                if (StringUtil.isEmpty(hContent)) {
                    // 如果没有命中,将content文本前200个字符设置给blog对象
                    if (content.length() <= 2000) {
                        blog.setContent(content);
                    } else {
                        blog.setContent(content.substring(0, 2000));
                    }
                } else {
                    blog.setContent(hContent);
                }
            }
            blogList.add(blog);
        }

        return blogList;
    }
}

 

  列表

 

  添加博客

 

  搜索

---

posted on 2021-01-24 22:16  wenbin_ouyang  阅读(118)  评论(0编辑  收藏  举报