Lucene简单使用一

一:功能说明

   添加数据时,会对数据进行分词并建立索引,存储到索引库中,然后将真正的内容,存储到文档区域

   查询数据时,将查询条件分词,先在索引库中查找,会返回文档id,然后根据文档ID,再到存储文档的区域查找真正的内容

二:核心pom.xml依赖配置

 

        <!-- lucene的核心 -->
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
            <version>6.6.0</version>
        </dependency>
        <!-- lucene的分词器,有标准的英文相关的分词器,没有中文的 -->
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-analyzers-common</artifactId>
            <version>6.6.0</version>
        </dependency>
        <!-- 查询解析器 -->
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-queryparser</artifactId>
            <version>6.6.0</version>
        </dependency>
        <!-- 各种查询方式 -->
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-queries</artifactId>
            <version>6.6.0</version>
        </dependency>
        <!-- 关键字高亮 -->
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-highlighter</artifactId>
            <version>6.6.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-demo</artifactId>
            <version>6.6.0</version>
        </dependency>

三:文档对象对应的类

public class Article {
    private Long id;
    private String title;
    private String content;
    private String author;
    private String url;
    public Article(){}
    
    public Article(Long id, String title, String content, String author,
            String url) {
        super();
        this.id = id;
        this.title = title;
        this.content = content;
        this.author = author;
        this.url = url;
    }
    public Long getId() {
        return id;
    }
    public void setId(Long id) {
        this.id = id;
    }
    public String getTitle() {
        return title;
    }
    public void setTitle(String title) {
        this.title = title;
    }
    public String getContent() {
        return content;
    }
    public void setContent(String content) {
        this.content = content;
    }
    public String getAuthor() {
        return author;
    }
    public void setAuthor(String author) {
        this.author = author;
    }
    public String getUrl() {
        return url;
    }
    public void setUrl(String url) {
        this.url = url;
    }
    
    public Document toDocument(){
        //Lucene存储的格式(Map装的k,v)
        Document doc = new Document();
        //向文档中添加一个long类型的属性,建立索引
        doc.add(new LongPoint("id", id));
        //在文档中存储
        doc.add(new StoredField("id", id));
        //设置一个文本类型,会对内容进行分词,建立索引,并将内容在文档中存储
        doc.add(new TextField("title", title, Store.YES));
        //设置一个文本类型,会对内容进行分词,建立索引,存在文档中存储 / No代表不存储
        //Store.No只是不在文档中存储
        doc.add(new TextField("content", content, Store.YES));
        //StringField,不分词,建立索引,文档中存储,因为不分词,所以查询时要输入全内容
        doc.add(new StringField("author", author, Store.YES));
        //不分词,不建立索引,在文档中存储,
        doc.add(new StoredField("url", url));
        return doc;
    }
    public static Article parseArticle(Document doc){
        Long id = Long.parseLong(doc.get("id"));
        String title = doc.get("title");
        String content = doc.get("content");
        String author = doc.get("author");
        String url = doc.get("url");
        Article article = new Article(id, title, content, author, url);
        return article;
    }
    @Override
    public String toString() {
        return "id : " + id + " , title : " + title + " , content : " + content + " , author : " + author + " , url : " + url;
    }
}

四:添加数据模块

    /**
     * 往用lucene写入数据
     * @throws IOException
     */
    @Test
    public void testCreate() throws IOException {
        Article article = new Article();
//      即使重复也可以 article.setId(
108L); article.setAuthor("张三"); article.setTitle("学习lucene"); article.setContent("lucene,单机程序!"); article.setUrl("http://www.edu360.cn/a10011"); // 指定目录 数据写入目录 String indexPath = "/Users/zx/Documents/dev/lucene/index"; FSDirectory fsDirectory = FSDirectory.open(Paths.get(indexPath)); //创建一个标准分词器,一个字分一次 无法分中文 例如:“老师”会分成“老”“师” //Analyzer analyzer = new StandardAnalyzer(); Analyzer analyzer = new IKAnalyzer(true); //写入索引的配置,设置了分词器 IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); //指定了写入数据目录和配置 IndexWriter indexWriter = new IndexWriter(fsDirectory, indexWriterConfig); //创建一个文档对象 Document document = article.toDocument(); //通过IndexWriter写入 indexWriter.addDocument(document); indexWriter.close(); }

五:搜索模块

   @Test
    public void testSearch() throws IOException, ParseException {
        
        String indexPath = "/Users/zx/Documents/dev/lucene/index";
//      读跟写要用相同的分词器
        Analyzer analyzer = new IKAnalyzer(true);
        //Analyzer analyzer = new IKAnalyzer(true);
        DirectoryReader directoryReader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath)));
        //索引查询器
        IndexSearcher indexSearcher = new IndexSearcher(directoryReader);
//      要查找的关键字
        String queryStr = "数据";
        //创建一个查询条件解析器 "content"表示从content中查找
        QueryParser parser = new QueryParser("content", analyzer);
        //对查询条件进行解析
        Query query = parser.parse(queryStr);
         //TermQuery将查询条件当成是一个固定的词
        //Query query = new TermQuery(new Term("url", "http://www.edu360.cn/a10010"));
        //在【索引库】中进行查找 10 表示查找前10个
        TopDocs topDocs = indexSearcher.search(query, 10);
        //获取到查找到的文文档ID和得分
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (ScoreDoc scoreDoc : scoreDocs) {
            //从索引中查询到文档的ID,跟我们自定的实体类id没有关系
            int doc = scoreDoc.doc;
            //在根据ID到文档中查找文档内容
            Document document = indexSearcher.doc(doc);
            //将文档转换成对应的实体类
            Article article = Article.parseArticle(document);
            System.out.println(article);
        }
//      释放资源
        directoryReader.close();
    }

六:删除模块

@Test
    public void testDelete() throws IOException, ParseException {

        String indexPath = "/Users/zx/Documents/dev/lucene/index";
        Analyzer analyzer = new IKAnalyzer(true);
        FSDirectory fsDirectory = FSDirectory.open(Paths.get(indexPath));
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
        IndexWriter indexWriter = new IndexWriter(fsDirectory, indexWriterConfig);

        //Term词条查找,内容必须完全匹配,不分词
        //indexWriter.deleteDocuments(new Term("content", "学好"));
        //以分词器作为查询条件
//QueryParser parser = new QueryParser("title", analyzer); //Query query = parser.parse("大数据老师"); //LongPoint是建立索引的 范围查找 //Query query = LongPoint.newRangeQuery("id", 99L, 120L);

        //等值查找 Query query = LongPoint.newExactQuery("id", 105L); indexWriter.deleteDocuments(query); indexWriter.commit(); indexWriter.close(); }

七:更新模块  (就是先删除再插入)

   /**
     * lucene的update比较特殊,update的代价太高,先删除,然后在插入
     * @throws IOException
     * @throws ParseException
     */
    @Test
    public void testUpdate() throws IOException, ParseException {

        String indexPath = "/Users/zx/Documents/dev/lucene/index";
        StandardAnalyzer analyzer = new StandardAnalyzer();
        FSDirectory fsDirectory = FSDirectory.open(Paths.get(indexPath));
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
        IndexWriter indexWriter = new IndexWriter(fsDirectory, indexWriterConfig);
        Article article = new Article();
        article.setId(106L);
        article.setAuthor("李四");
        article.setTitle("不学");
        article.setContent("adsafsa工在!!!");
        article.setUrl("http://www.baidu.cn");
        Document document = article.toDocument();
        indexWriter.updateDocument(new Term("author", "张三"), document);
        indexWriter.commit();
        indexWriter.close();
    }

八:多字段查找

    /**
     * 可以从多个字段中查找
     * @throws IOException
     * @throws ParseException
     */
    @Test
    public void testMultiField() throws IOException, ParseException {

        String indexPath = "/Users/zx/Documents/dev/lucene/index";
        Analyzer analyzer = new IKAnalyzer(true);
        DirectoryReader directoryReader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath)));
        IndexSearcher indexSearcher = new IndexSearcher(directoryReader);
        //多字段数组
        String[] fields = {"title", "content"};
        //多字段的查询转换器
        MultiFieldQueryParser queryParser = new MultiFieldQueryParser(fields, analyzer);
        Query query = queryParser.parse("三");

        TopDocs topDocs = indexSearcher.search(query, 10);
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (ScoreDoc scoreDoc : scoreDocs) {
            int doc = scoreDoc.doc;
            Document document = indexSearcher.doc(doc);
            Article article = Article.parseArticle(document);
            System.out.println(article);
        }

        directoryReader.close();
    }


九:全字段内查找

    /**
     * 查找全部的数据
     * @throws IOException
     * @throws ParseException
     */
    @Test
    public void testMatchAll() throws IOException, ParseException {

        String indexPath = "/Users/zx/Documents/dev/lucene/index";
        DirectoryReader directoryReader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath)));
        IndexSearcher indexSearcher = new IndexSearcher(directoryReader);
        Query query = new MatchAllDocsQuery();
        TopDocs topDocs = indexSearcher.search(query, 10);
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (ScoreDoc scoreDoc : scoreDocs) {
            int doc = scoreDoc.doc;
            Document document = indexSearcher.doc(doc);
            Article article = Article.parseArticle(document);
            System.out.println(article);
        }

        directoryReader.close();
    }


十:组合查询 布尔查询(组合多个查询条件)

    /**
     * 布尔查询,可以组合多个查询条件
     * @throws Exception
     */
    @Test
    public void testBooleanQuery() throws Exception {
        String indexPath = "/Users/zx/Documents/dev/lucene/index";
        DirectoryReader directoryReader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath)));
        IndexSearcher indexSearcher = new IndexSearcher(directoryReader);
        Query query1 = new TermQuery(new Term("title", "三"));
        Query query2 = new TermQuery(new Term("content", "a"));
        BooleanClause bc1 = new BooleanClause(query1, BooleanClause.Occur.MUST);//必须满足
        BooleanClause bc2 = new BooleanClause(query2, BooleanClause.Occur.MUST_NOT);//必须不满足
        BooleanQuery boolQuery = new BooleanQuery.Builder().add(bc1).add(bc2).build();
        System.out.println(boolQuery);
        TopDocs topDocs = indexSearcher.search(boolQuery, 10);
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (ScoreDoc scoreDoc : scoreDocs) {
            int doc = scoreDoc.doc;
            Document document = indexSearcher.doc(doc);
            Article article = Article.parseArticle(document);
            System.out.println(article);
        }
        directoryReader.close();
    }

十一:非连续范围查找 (相当于in or)

    @Test
    public void testQueryParser() throws Exception {
        String indexPath = "/Users/zx/Documents/dev/lucene/index";
        DirectoryReader directoryReader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath)));
        IndexSearcher indexSearcher = new IndexSearcher(directoryReader);
        //创建一个QueryParser对象。参数1:默认搜索域 参数2:分析器对象。
        QueryParser queryParser = new QueryParser("title", new IKAnalyzer(true));
        //Query query = queryParser.parse("数据");
        Query query = queryParser.parse("title:学好 OR title:学习");
        System.out.println(query);
        TopDocs topDocs = indexSearcher.search(query, 10);
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (ScoreDoc scoreDoc : scoreDocs) {
            int doc = scoreDoc.doc;
            Document document = indexSearcher.doc(doc);
            Article article = Article.parseArticle(document);
            System.out.println(article);
        }
        directoryReader.close();
    }

 

十二:连接范围查询(相当于< ,>)

 @Test
    public void testRangeQuery() throws Exception {
        String indexPath = "/Users/zx/Documents/dev/lucene/index";
        DirectoryReader directoryReader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath)));
        IndexSearcher indexSearcher = new IndexSearcher(directoryReader);
        Query query = LongPoint.newRangeQuery("id", 107L, 108L);
        System.out.println(query);
        TopDocs topDocs = indexSearcher.search(query, 10);
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (ScoreDoc scoreDoc : scoreDocs) {
            int doc = scoreDoc.doc;
            Document document = indexSearcher.doc(doc);
            Article article = Article.parseArticle(document);
            System.out.println(article);
        }
        directoryReader.close();
    }

 

作笔记用!

 

posted on 2018-08-10 17:54  CodeArtist  阅读(433)  评论(0)    收藏  举报