java基于lucene创建数据库索引

package com.cn.ninemax;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;

public class JDBCUtil {
 public static Connection getConn(){
   Connection conn=null;
   try {
    Class.forName("net.sourceforge.jtds.jdbc.Driver").newInstance();
    String urlDB="jdbc:jtds:sqlserver://10.1.72.103:1433;DatabaseName=nstl_test";
        conn = DriverManager.getConnection(urlDB,"sa","sa");
   } catch (ClassNotFoundException e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
   } catch (SQLException e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
   } catch (InstantiationException e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
   } catch (IllegalAccessException e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
   }
       return conn;
 }
 public static void close(Object object, Statement stmt, Connection conn) {
  try {
   stmt.close();
   conn.close();
  } catch (SQLException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  }
  
 }

 public static void rollback(Connection conn) {
  try {
   conn.rollback();
  } catch (SQLException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  }
  
 }

 public static void commit(Connection conn) {
  try {
   conn.commit();
  } catch (SQLException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  }
  
 }
 public static void main(String[] args) {
 System.out.println(JDBCUtil.getConn());
}
 }

 

 

package com.cn.ninemax;

import java.io.File;
import java.io.IOException;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Date;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class IndexerDB {
 private String directory = "e://youyajie";

 public static void main(String[] args) throws IOException, SQLException {
  IndexerDB db = new IndexerDB();
  // db.SearDataByIndex();
  db.CreateDBIndex();
 }

 private void SearDataByIndex() throws IOException {
  File file = new File(directory);
  FSDirectory directory = FSDirectory.open(file);
  IndexReader reader = IndexReader.open(directory);
  // 简单索引
  IndexSearcher searcher = new IndexSearcher(reader);
  // Blood pyruvate curves of normal and obese subjects following the
  // ingestion of carbohydrate.
  Term t = new Term("ti", "obese");
  TermQuery quenry = new TermQuery(t);
  // 获得得分靠前的两个匹配记录
  ScoreDoc[] docs = searcher.search(quenry, 3).scoreDocs;
  for (int i = 0; i < docs.length; i++) {
   Date start = new Date();
   String ti = searcher.doc(docs[i].doc).get("ti");
   Date end = new Date();
   System.out.println(ti + (end.getTime() - start.getTime()) + "ms");
  }

 }

 private int pageSize = 10000;
 private int count = 0;

 public void CreateDBIndex() throws IOException, SQLException {
  Connection conn = JDBCUtil.getConn();
  Statement st = conn.createStatement();
  String sql = "select count(*) from dbo.pb18001949_test";
  ResultSet rs = st.executeQuery(sql);
  while (rs.next()) {
   count = rs.getInt(1);
  }
  int thread = count / pageSize;
  for (int i = 0; i < thread; i++) {
   sql = "select top ("
     + pageSize
     + ") ti,ab from(select row_number() over(order by id asc) as rownumber,id,ti,kw,ab from dbo.pb18001949_test) as tb where rownumber>("
     + (i * pageSize) + ")";
   rs = st.executeQuery(sql);
   while (rs.next()) {
    CreateFileIndex(rs.getString(1), rs.getString(2));
   }
   System.out.println("第" + (i + 1) + "批数据建立索引完成");
  }
  JDBCUtil.close(null, st, conn);
 }

 private void CreateFileIndex(String str1, String str2)
   throws CorruptIndexException, IOException {
  File file = new File(directory);
  FSDirectory directory = FSDirectory.open(file);
  // 用来创建索引
  IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_36,
    new StandardAnalyzer(Version.LUCENE_36));
  IndexWriter writer = new IndexWriter(directory, conf);
  Document doc = new Document();
  Field f1 = null;
  if (str1.length() > 0) {
   f1 = new Field("ti", str1, Store.YES, Index.ANALYZED);
   doc.add(f1);
  }
  if (str2 != null && str2.length() > 0) {
   Field f2 = new Field("ab", str2, Store.YES, Index.ANALYZED);

   doc.add(f2);
  }
  writer.addDocument(doc);
  writer.close();
 }
}

主要的就是这两个类的  底层需要两个jar包 一个是jtds用来连接数据库的 还有一个就是lucene的核心jar包
posted @ 2013-01-31 17:46  6小贝  阅读(448)  评论(0)    收藏  举报