Lucene简单使用一
一:功能说明
添加数据时,会对数据进行分词并建立索引,存储到索引库中,然后将真正的内容,存储到文档区域
查询数据时,将查询条件分词,先在索引库中查找,会返回文档id,然后根据文档ID,再到存储文档的区域查找真正的内容
二:核心pom.xml依赖配置
<!-- lucene的核心 -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>6.6.0</version>
</dependency>
<!-- lucene的分词器,有标准的英文相关的分词器,没有中文的 -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>6.6.0</version>
</dependency>
<!-- 查询解析器 -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>6.6.0</version>
</dependency>
<!-- 各种查询方式 -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queries</artifactId>
<version>6.6.0</version>
</dependency>
<!-- 关键字高亮 -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-highlighter</artifactId>
<version>6.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-demo</artifactId>
<version>6.6.0</version>
</dependency>
三:文档对象对应的类
public class Article { private Long id; private String title; private String content; private String author; private String url; public Article(){} public Article(Long id, String title, String content, String author, String url) { super(); this.id = id; this.title = title; this.content = content; this.author = author; this.url = url; } public Long getId() { return id; } public void setId(Long id) { this.id = id; } public String getTitle() { return title; } public void setTitle(String title) { this.title = title; } public String getContent() { return content; } public void setContent(String content) { this.content = content; } public String getAuthor() { return author; } public void setAuthor(String author) { this.author = author; } public String getUrl() { return url; } public void setUrl(String url) { this.url = url; } public Document toDocument(){ //Lucene存储的格式(Map装的k,v) Document doc = new Document(); //向文档中添加一个long类型的属性,建立索引 doc.add(new LongPoint("id", id)); //在文档中存储 doc.add(new StoredField("id", id)); //设置一个文本类型,会对内容进行分词,建立索引,并将内容在文档中存储 doc.add(new TextField("title", title, Store.YES)); //设置一个文本类型,会对内容进行分词,建立索引,存在文档中存储 / No代表不存储 //Store.No只是不在文档中存储 doc.add(new TextField("content", content, Store.YES)); //StringField,不分词,建立索引,文档中存储,因为不分词,所以查询时要输入全内容 doc.add(new StringField("author", author, Store.YES)); //不分词,不建立索引,在文档中存储, doc.add(new StoredField("url", url)); return doc; } public static Article parseArticle(Document doc){ Long id = Long.parseLong(doc.get("id")); String title = doc.get("title"); String content = doc.get("content"); String author = doc.get("author"); String url = doc.get("url"); Article article = new Article(id, title, content, author, url); return article; } @Override public String toString() { return "id : " + id + " , title : " + title + " , content : " + content + " , author : " + author + " , url : " + url; } }
四:添加数据模块
/** * 往用lucene写入数据 * @throws IOException */ @Test public void testCreate() throws IOException { Article article = new Article();
// 即使重复也可以 article.setId(108L); article.setAuthor("张三"); article.setTitle("学习lucene"); article.setContent("lucene,单机程序!"); article.setUrl("http://www.edu360.cn/a10011"); // 指定目录 数据写入目录 String indexPath = "/Users/zx/Documents/dev/lucene/index"; FSDirectory fsDirectory = FSDirectory.open(Paths.get(indexPath)); //创建一个标准分词器,一个字分一次 无法分中文 例如:“老师”会分成“老”“师” //Analyzer analyzer = new StandardAnalyzer(); Analyzer analyzer = new IKAnalyzer(true); //写入索引的配置,设置了分词器 IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); //指定了写入数据目录和配置 IndexWriter indexWriter = new IndexWriter(fsDirectory, indexWriterConfig); //创建一个文档对象 Document document = article.toDocument(); //通过IndexWriter写入 indexWriter.addDocument(document); indexWriter.close(); }
五:搜索模块
@Test public void testSearch() throws IOException, ParseException { String indexPath = "/Users/zx/Documents/dev/lucene/index"; // 读跟写要用相同的分词器 Analyzer analyzer = new IKAnalyzer(true); //Analyzer analyzer = new IKAnalyzer(true); DirectoryReader directoryReader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath))); //索引查询器 IndexSearcher indexSearcher = new IndexSearcher(directoryReader); // 要查找的关键字 String queryStr = "数据"; //创建一个查询条件解析器 "content"表示从content中查找 QueryParser parser = new QueryParser("content", analyzer); //对查询条件进行解析 Query query = parser.parse(queryStr); //TermQuery将查询条件当成是一个固定的词 //Query query = new TermQuery(new Term("url", "http://www.edu360.cn/a10010")); //在【索引库】中进行查找 10 表示查找前10个 TopDocs topDocs = indexSearcher.search(query, 10); //获取到查找到的文文档ID和得分 ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (ScoreDoc scoreDoc : scoreDocs) { //从索引中查询到文档的ID,跟我们自定的实体类id没有关系 int doc = scoreDoc.doc; //在根据ID到文档中查找文档内容 Document document = indexSearcher.doc(doc); //将文档转换成对应的实体类 Article article = Article.parseArticle(document); System.out.println(article); } // 释放资源 directoryReader.close(); }
六:删除模块
@Test public void testDelete() throws IOException, ParseException { String indexPath = "/Users/zx/Documents/dev/lucene/index"; Analyzer analyzer = new IKAnalyzer(true); FSDirectory fsDirectory = FSDirectory.open(Paths.get(indexPath)); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); IndexWriter indexWriter = new IndexWriter(fsDirectory, indexWriterConfig); //Term词条查找,内容必须完全匹配,不分词 //indexWriter.deleteDocuments(new Term("content", "学好"));
//以分词器作为查询条件 //QueryParser parser = new QueryParser("title", analyzer); //Query query = parser.parse("大数据老师"); //LongPoint是建立索引的 范围查找 //Query query = LongPoint.newRangeQuery("id", 99L, 120L);
//等值查找 Query query = LongPoint.newExactQuery("id", 105L); indexWriter.deleteDocuments(query); indexWriter.commit(); indexWriter.close(); }
七:更新模块 (就是先删除再插入)
/** * lucene的update比较特殊,update的代价太高,先删除,然后在插入 * @throws IOException * @throws ParseException */ @Test public void testUpdate() throws IOException, ParseException { String indexPath = "/Users/zx/Documents/dev/lucene/index"; StandardAnalyzer analyzer = new StandardAnalyzer(); FSDirectory fsDirectory = FSDirectory.open(Paths.get(indexPath)); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); IndexWriter indexWriter = new IndexWriter(fsDirectory, indexWriterConfig); Article article = new Article(); article.setId(106L); article.setAuthor("李四"); article.setTitle("不学"); article.setContent("adsafsa工在!!!"); article.setUrl("http://www.baidu.cn"); Document document = article.toDocument(); indexWriter.updateDocument(new Term("author", "张三"), document); indexWriter.commit(); indexWriter.close(); }
八:多字段查找
/** * 可以从多个字段中查找 * @throws IOException * @throws ParseException */ @Test public void testMultiField() throws IOException, ParseException { String indexPath = "/Users/zx/Documents/dev/lucene/index"; Analyzer analyzer = new IKAnalyzer(true); DirectoryReader directoryReader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath))); IndexSearcher indexSearcher = new IndexSearcher(directoryReader); //多字段数组 String[] fields = {"title", "content"}; //多字段的查询转换器 MultiFieldQueryParser queryParser = new MultiFieldQueryParser(fields, analyzer); Query query = queryParser.parse("三"); TopDocs topDocs = indexSearcher.search(query, 10); ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (ScoreDoc scoreDoc : scoreDocs) { int doc = scoreDoc.doc; Document document = indexSearcher.doc(doc); Article article = Article.parseArticle(document); System.out.println(article); } directoryReader.close(); }
九:全字段内查找
/** * 查找全部的数据 * @throws IOException * @throws ParseException */ @Test public void testMatchAll() throws IOException, ParseException { String indexPath = "/Users/zx/Documents/dev/lucene/index"; DirectoryReader directoryReader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath))); IndexSearcher indexSearcher = new IndexSearcher(directoryReader); Query query = new MatchAllDocsQuery(); TopDocs topDocs = indexSearcher.search(query, 10); ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (ScoreDoc scoreDoc : scoreDocs) { int doc = scoreDoc.doc; Document document = indexSearcher.doc(doc); Article article = Article.parseArticle(document); System.out.println(article); } directoryReader.close(); }
十:组合查询 布尔查询(组合多个查询条件)
/** * 布尔查询,可以组合多个查询条件 * @throws Exception */ @Test public void testBooleanQuery() throws Exception { String indexPath = "/Users/zx/Documents/dev/lucene/index"; DirectoryReader directoryReader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath))); IndexSearcher indexSearcher = new IndexSearcher(directoryReader); Query query1 = new TermQuery(new Term("title", "三")); Query query2 = new TermQuery(new Term("content", "a")); BooleanClause bc1 = new BooleanClause(query1, BooleanClause.Occur.MUST);//必须满足 BooleanClause bc2 = new BooleanClause(query2, BooleanClause.Occur.MUST_NOT);//必须不满足 BooleanQuery boolQuery = new BooleanQuery.Builder().add(bc1).add(bc2).build(); System.out.println(boolQuery); TopDocs topDocs = indexSearcher.search(boolQuery, 10); ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (ScoreDoc scoreDoc : scoreDocs) { int doc = scoreDoc.doc; Document document = indexSearcher.doc(doc); Article article = Article.parseArticle(document); System.out.println(article); } directoryReader.close(); }
十一:非连续范围查找 (相当于in or)
@Test public void testQueryParser() throws Exception { String indexPath = "/Users/zx/Documents/dev/lucene/index"; DirectoryReader directoryReader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath))); IndexSearcher indexSearcher = new IndexSearcher(directoryReader); //创建一个QueryParser对象。参数1:默认搜索域 参数2:分析器对象。 QueryParser queryParser = new QueryParser("title", new IKAnalyzer(true)); //Query query = queryParser.parse("数据"); Query query = queryParser.parse("title:学好 OR title:学习"); System.out.println(query); TopDocs topDocs = indexSearcher.search(query, 10); ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (ScoreDoc scoreDoc : scoreDocs) { int doc = scoreDoc.doc; Document document = indexSearcher.doc(doc); Article article = Article.parseArticle(document); System.out.println(article); } directoryReader.close(); }
十二:连接范围查询(相当于< ,>)
@Test public void testRangeQuery() throws Exception { String indexPath = "/Users/zx/Documents/dev/lucene/index"; DirectoryReader directoryReader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath))); IndexSearcher indexSearcher = new IndexSearcher(directoryReader); Query query = LongPoint.newRangeQuery("id", 107L, 108L); System.out.println(query); TopDocs topDocs = indexSearcher.search(query, 10); ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (ScoreDoc scoreDoc : scoreDocs) { int doc = scoreDoc.doc; Document document = indexSearcher.doc(doc); Article article = Article.parseArticle(document); System.out.println(article); } directoryReader.close(); }
作笔记用!
posted on 2018-08-10 17:54 CodeArtist 阅读(433) 评论(0) 收藏 举报
浙公网安备 33010602011771号