Lucene创建索引与搜索索引试手
由于仿写的源码的版本是Lucene2.1.0,我用的Lucene已经是4.5.0了,所以像创建IndexWriter、IndexSearcher的时候源码的已经不能用了,只好自己查api摸索,所以有个老师在旁边指导该多好。
首先我创建的是中文的索引。
CJKAnalyzer是:对中文汉字,每两个字作为一个词条
StandardAnalyzer是:单个汉字作为一个词条
所以如果要查询像:“大禹”这样俩个字的词条时,用CJKAnalyzer,查询像“水”这样的词条时,需要改用StandardAnalyzer。我在这里纠结了很久不知道哪里错了。
还有就是StringField和TextField的区别。api的解释分别是:
TextField:A field that is indexed and tokenized, without term vectors. For example this would be used on a 'body' field, that contains the bulk of a document's text.
StringField:A field that is indexed but not tokenized: the entire String value is indexed as a single token. For example this might be used for a 'country' field or an 'id' field, or any field that you intend to use for sorting or access through the field cache.
现在看看也没很多错的地方,但是写了仨小时。期间各种查api啊,还是那句话,有个老师指点一下的话,我就能少走很多弯路,节省很多时间了。唉。。。
package org.apache.lucene;
import java.awt.BorderLayout;
import java.awt.Container;
import java.awt.GridLayout;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.text.SimpleDateFormat;
import java.util.Date;
import javax.swing.JButton;
import javax.swing.JFileChooser;
import javax.swing.JFrame;
import javax.swing.JLabel;
import javax.swing.JOptionPane;
import javax.swing.JPanel;
import javax.swing.JScrollPane;
import javax.swing.JTextArea;
import javax.swing.JTextField;
import javax.swing.SwingUtilities;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import com.wb.tool.FileList;
import com.wb.tool.FileText;
public class LuceneIndexer {
private JTextField jtfa;
private JButton jba;
private JTextField jtfb;
private JButton jbb;
private JButton jbc;
private static JTextArea jta;
private void createAndShowGUI()
{
// 设置跨平台外观感觉
//String lf=UIManager.getCrossPlatformLookAndFeelClassName();
//GTK
//String lf="com.sun.java.swing.plaf.gtk.GTKLookAndFeel";
//System
//String lf=UIManager.getSystemLookAndFeelClassName();
//windows
//String lf="com.sun.java.swing.plaf.windows.WindowsLookAndFeel";
//metal
//String lf="javax.swing.plaf.metal.MetalLookAndFeel";
/**common use
try
{
UIManager.setLookAndFeel(lf);
}
catch(Exception ce)
{
JOptionPane.showMessageDialog(null,"无法设定外观感觉!");
}
**/
//Java感觉
JFrame.setDefaultLookAndFeelDecorated(true);
JFrame frame=new JFrame("TEST");
frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
final JFileChooser fc=new JFileChooser();
fc.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);
Container con= frame.getContentPane();
con.setLayout(new BorderLayout());
JPanel jpup=new JPanel();
jpup.setLayout(new GridLayout(3,2));
jtfa=new JTextField(30);
jba=new JButton("选择被索引的文件存放路径");
jba.addActionListener
(
new ActionListener()
{
public void actionPerformed(ActionEvent e)
{
int r=fc.showOpenDialog(null);
if(r==JFileChooser.APPROVE_OPTION)
{
jtfa.setText(fc.getSelectedFile().getPath());
jbc.setEnabled(true);
}
}
}
);
jtfb=new JTextField(30);
JButton jbb=new JButton("选择索引的存放路径");
jbb.addActionListener
(
new ActionListener()
{
public void actionPerformed(ActionEvent e)
{
int r=fc.showOpenDialog(null);
if(r==JFileChooser.APPROVE_OPTION)
{
jtfb.setText(fc.getSelectedFile().getPath());
jbc.setEnabled(true);
}
}
}
);
JLabel jl=new JLabel("");
jbc=new JButton("建立索引");
jbc.addActionListener
(
new ActionListener()
{
public void actionPerformed(ActionEvent e)
{
try
{
LuceneIndexerTool.index(jtfa.getText(),jtfb.getText());
//jbc.setEnabled(false);
}
catch(Exception ee)
{
ee.printStackTrace();
jbc.setEnabled(true);
JOptionPane.showMessageDialog(null,"索引创建失败!");
System.out.println(ee.getMessage());
}
}
}
);
jpup.add(jtfa);
jpup.add(jba);
jpup.add(jtfb);
jpup.add(jbb);
jpup.add(jl);
jpup.add(jbc);
jta=new JTextArea(10,60);
JScrollPane jsp=new JScrollPane(jta);
con.add(jpup,BorderLayout.NORTH);
con.add(jsp,BorderLayout.CENTER);
frame.setSize(200,100);
frame.pack();
frame.setVisible(true);
}
public static void main(String[] args) {
SwingUtilities.invokeLater(
new Runnable() {
public void run() {
new LuceneIndexer().createAndShowGUI();
}
}
);
}
static class LuceneIndexerTool {
public static void index(String filePath, String indexPath) throws IOException {
Path path = Paths.get(indexPath);
Directory dir = FSDirectory.open(path);
Analyzer analyzer = new StandardAnalyzer();
IndexWriterConfig config = new IndexWriterConfig(analyzer);
IndexWriter writer = new IndexWriter(dir, config);
String s[] = FileList.getFiles(filePath);
int len = s.length;
for(int i=0; i<len; i++) {
File file = new File(s[i]);
String ext = getExt(file);
if((ext.equalsIgnoreCase("htm")) || (ext.equalsIgnoreCase("html"))) {
Document doc = new Document();
Field field;
String fileName = file.getName();
field = new TextField("fileName", fileName, Field.Store.YES);
doc.add(field);
String uri = file.getPath();
field = new TextField("uri", uri, Field.Store.YES);
doc.add(field);
Date dt = new Date(file.lastModified());
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-mm-dd");
String date = sdf.format(dt);
field = new TextField("date", date, Field.Store.YES);
doc.add(field);
double l = file.length();
String size = "";
if(l>1024)
size = String.valueOf(Math.floor(l/1024)) + "K";
else
size = String.valueOf(size) + "Bytes";
field = new TextField("size", size, Field.Store.YES);
doc.add(field);
String text = FileText.getText(file);
field = new TextField("text", text, Field.Store.YES);
doc.add(field);
String digest = "";
if(text.length() > 200)
digest = text.substring(0, 200);
else
digest = text;
field = new TextField("digest", digest, Field.Store.YES);
doc.add(field);
writer.addDocument(doc);
jta.setText(jta.getText() + "已经加入索引:" + file + "\n");
}
}
writer.close();
}
public static String getExt(File file) {
String s = file.getName();
s = s.substring(s.lastIndexOf(".") + 1);
return s;
}
}
}
</pre><pre name="code" class="java"><pre name="code" class="java">package org.apache.lucene;
import java.awt.BorderLayout;
import java.awt.Container;
import java.awt.FlowLayout;
import java.awt.GridLayout;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.nio.file.Path;
import java.nio.file.Paths;
import javax.swing.JButton;
import javax.swing.JFileChooser;
import javax.swing.JFrame;
import javax.swing.JOptionPane;
import javax.swing.JPanel;
import javax.swing.JScrollPane;
import javax.swing.JTextArea;
import javax.swing.JTextField;
import javax.swing.SwingUtilities;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class LuceneSearcher {
private JTextField jtfa;
private JButton jba;
private JTextField jtfb;
private JButton jbb;
private JButton jbc;
private static JTextArea jta;
private JTextField jtfc;
private JButton jbd;
private JButton jbe;
private void createAndShowGUI()
{
// 设置跨平台外观感觉
//String lf=UIManager.getCrossPlatformLookAndFeelClassName();
//GTK
//String lf="com.sun.java.swing.plaf.gtk.GTKLookAndFeel";
//System
//String lf=UIManager.getSystemLookAndFeelClassName();
//windows
//String lf="com.sun.java.swing.plaf.windows.WindowsLookAndFeel";
//metal
//String lf="javax.swing.plaf.metal.MetalLookAndFeel";
/**common use
try
{
UIManager.setLookAndFeel(lf);
}
catch(Exception ce)
{
JOptionPane.showMessageDialog(null,"无法设定外观感觉!");
}
**/
//Java感觉
JFrame.setDefaultLookAndFeelDecorated(true);
JFrame frame=new JFrame("Tianen Searcher! yutianen@163.com");
frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
final JFileChooser fc=new JFileChooser();
fc.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);
Container con= frame.getContentPane();
con.setLayout(new BorderLayout());
JPanel jpup=new JPanel();
jpup.setLayout(new GridLayout(2,2));
jtfa=new JTextField(30);
jba=new JButton("选择索引的存放路径");
jba.addActionListener
(
new ActionListener()
{
public void actionPerformed(ActionEvent e)
{
int r=fc.showOpenDialog(null);
if(r==JFileChooser.APPROVE_OPTION)
{
jtfa.setText(fc.getSelectedFile().getPath());
}
}
}
);
jtfb=new JTextField(30);
JButton jbb=new JButton("搜索");
jbb.addActionListener
(
new ActionListener()
{
public void actionPerformed(ActionEvent e)
{
try
{
String indexPath=jtfa.getText();
String phrase=jtfb.getText();
new LuceneSearcherTool().search(phrase,indexPath);
System.out.println("123");
}
catch(Exception ex)
{
JOptionPane.showMessageDialog(null,"搜索失败!","提示",JOptionPane.ERROR_MESSAGE);
}
}
}
);
jpup.add(jtfa);
jpup.add(jba);
jpup.add(jtfb);
jpup.add(jbb);
jta=new JTextArea(10,30);
JScrollPane jsp=new JScrollPane(jta);
JPanel jpdown=new JPanel();
jpdown.setLayout(new FlowLayout());
jtfc=new JTextField(35);
jbd=new JButton("设定导出路径");
fc.setFileSelectionMode(JFileChooser.FILES_AND_DIRECTORIES);
jbd.addActionListener
(
new ActionListener()
{
public void actionPerformed(ActionEvent e)
{
int r=fc.showOpenDialog(null);
if(r==JFileChooser.APPROVE_OPTION)
{
jtfc.setText(fc.getSelectedFile().getPath());
}
}
}
);
jbe=new JButton("导出搜索结果");
jbe.addActionListener
(
new ActionListener()
{
public void actionPerformed(ActionEvent e)
{
try
{
File f=new File(jtfc.getText());
FileWriter fw=new FileWriter(f);
PrintWriter pw=new PrintWriter(fw);
pw.write(jta.getText());
pw.flush();
pw.close();
JOptionPane.showMessageDialog(null,"写入文件成功!","提示",JOptionPane.INFORMATION_MESSAGE);
}
catch(IOException ioe)
{
JOptionPane.showMessageDialog(null,"写入文件失败!","提示",JOptionPane.ERROR_MESSAGE);
}
}
}
);
jpdown.add(jtfc);
jpdown.add(jbd);
jpdown.add(jbe);
con.add(jpup,BorderLayout.NORTH);
con.add(jsp,BorderLayout.CENTER);
con.add(jpdown,BorderLayout.SOUTH);
frame.setSize(200,100);
frame.pack();
frame.setVisible(true);
}
public static void main(String[] args) {
SwingUtilities.invokeLater(
new Runnable() {
public void run() {
new LuceneSearcher().createAndShowGUI();
}
}
);
}
static class LuceneSearcherTool {
public void search(String phrase, String indexPath) throws IOException, ParseException {
Path path = Paths.get(indexPath);
Directory dir = FSDirectory.open(path);
IndexReader ir = DirectoryReader.open(dir);
IndexSearcher is = new IndexSearcher(ir);
Analyzer analyzer = new StandardAnalyzer();
QueryParser parser = new QueryParser("text", analyzer);
Query query = parser.parse(phrase);
TopDocs hits = is.search(query, 10);
for(ScoreDoc scoreDoc: hits.scoreDocs) {
Document doc = is.doc(scoreDoc.doc);
if(doc == null)
continue;
Field field = (Field) doc.getField("fileName");
String fileName = field.stringValue();
field = (Field) doc.getField("uri");
String uri = field.stringValue();
field = (Field) doc.getField("date");
String date = field.stringValue();
field = (Field) doc.getField("digest");
String digest = field.stringValue();
StringBuffer sb = new StringBuffer();
sb.append("URI:" + uri + "\n");
sb.append("filename:" + fileName + "\n");
sb.append("date:" + date + "\n");
sb.append("digest:" + digest + "\n");
sb.append("------------------------------------\n");
jta.setText(jta.getText() + sb.toString());
}
ir.close();
dir.close();
}
}
}

浙公网安备 33010602011771号