我们以前经常碰到搜索数据库的内容;用like %的sql语句;如果数据量大而且多表查询时;速度实在让人难以忍受。。。如果用lucene3.5那就可以把这个恼人的问题解决了。
lucene3.5搜索photo表的title,username,tagname,desr内容;
用一个例题来说明更直观;此例题能搜索中文分词;
(需要mysql5的jdbc包和lucene3.5的包):
1、数据库我用mysql5;建一个photo表;数据库名是test。
-- -- 表的结构 `photo` --
CREATE TABLE IF NOT EXISTS `photo` ( `photo_id` int(10) unsigned NOT NULL AUTO_INCREMENT, `title` varchar(11) DEFAULT NULL, `descr` text, `user_name` varchar(11) DEFAULT NULL, `tag_name` varchar(11) DEFAULT NULL, PRIMARY KEY (`photo_id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=REDUNDANT AUTO_INCREMENT=5 ;
-- -- 导出表中的数据 `photo` --
INSERT INTO `photo` (`photo_id`, `title`, `descr`, `user_name`, `tag_name`) VALUES (1, '美女', '美女', '好人5', '美女'), (2, '美女', '美女', '美女', '美女'), (3, 'hagh', '说的就是我的是', '', NULL), (4, 'hagh', '说的就是我的是', ' ', NULL);
2、java文件有4个:
文件Photo.java是数据库的photo表的操作文件;内容如下:
package test; import java.sql.Connection; import java.util.ArrayList; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; public class Photo { private long photoId; private String title; private String description; private String userName; private String tag; public String getDescription() { return description; } public void setDescription(String description) { this.description = description; } public long getPhotoId() { return photoId; } public void setPhotoId(long photoId) { this.photoId = photoId; } public String getTag() { return tag; } public void setTag(String tag) { this.tag = tag; } public String getTitle() { return title; } public void setTitle(String title) { this.title = title; } public String getUserName() { return userName; } public void setUserName(String userName) { this.userName = userName; } public static Photo[] loadPhotos(Connection con) throws Exception { ArrayList<Photo> list = new ArrayList<Photo>(); PreparedStatement pstm = null; ResultSet rs = null; String sql = "select photo_id,title,descr,user_name,tag_name from photo"; try { pstm = con.prepareStatement(sql); rs = pstm.executeQuery(); while (rs.next()) { Photo photo = new Photo(); photo.setPhotoId(rs.getLong(1)); photo.setTitle(rs.getString(2)); photo.setDescription(rs.getString(3)); photo.setUserName(rs.getString(4)); photo.setTag(rs.getString(5)); list.add(photo); } } catch (SQLException e) { e.printStackTrace(); } finally { if (rs != null) { rs.close(); } if (pstm != null) { pstm.close(); } } return (Photo[]) list.toArray(new Photo[list.size()]); } }
文件IndexerFile.java是把数据库的内容备份成索引文件到磁盘中去;
内容如下:
package test; import java.io.File; import java.io.IOException; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.apache.lucene.document.Field; public class IndexerFile { public static int indexFile(String indexDir,Photo[] list) throws IOException{ IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)); conf.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(FSDirectory.open(new File(indexDir)), conf); for(int i=0;i<list.length;i++){ Document doc=new Document(); doc.add(new Field("photoId", String.valueOf(list[i].getPhotoId()), Field.Store.YES, Field.Index.NO)); if(list[i].getTitle()!=null && list[i].getTitle().length()>0) doc.add(new Field("title", list[i].getTitle(), Field.Store.YES, Field.Index.ANALYZED)); if(list[i].getDescription()!=null && list[i].getDescription().length()>0) doc.add(new Field("description", list[i].getDescription(), Field.Store.YES, Field.Index.ANALYZED)); if(list[i].getUserName()!= null && list[i].getUserName().length()>0) doc.add(new Field("userName", list[i].getUserName(), Field.Store.YES, Field.Index.ANALYZED)); if(list[i].getTag()!= null && list[i].getTag().length()>0) doc.add(new Field("tag", list[i].getTag(), Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); } int numIndexed = writer.maxDoc(); writer.forceMerge(1); writer.close(); return numIndexed; } }
文件SearcherFile.java是搜索磁盘索引文件内容的;
内容如下:
package test; import java.io.IOException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.queryParser.MultiFieldQueryParser; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.util.Version; public class SearcherFile { public static void search(IndexSearcher searcher, String[] q) throws IOException, ParseException { Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_35); String[] fields = {"title","description","tag","userName"}; Query query = MultiFieldQueryParser.parse(Version.LUCENE_35, q, fields, analyzer); TopDocs topDocs = searcher.search(query, 100);//100是显示队列的Size ScoreDoc[] hits = topDocs.scoreDocs; System.out.println("共有" + searcher.maxDoc() + "条索引,命中" + hits.length + "条"); for (int i = 0; i < hits.length; i++) { int DocId = hits[i].doc; Document document = searcher.doc(DocId); System.out.println("photoId==="+document.get("photoId")); } } }
文件TestDb.java是操作的主文件;
内容如下:
package test; import java.io.File; import java.io.IOException; import java.sql.Connection; import java.sql.SQLException; import java.util.Date;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.store.FSDirectory; public class TestDb { public final static String indexDir ="E:\\TestLucene"; private static Connection getConnection() { Connection conn = null; String url = "jdbc:mysql://localhost:3306/test"; String userName = "root"; String password = "root"; try { Class.forName("com.mysql.jdbc.Driver"); conn = java.sql.DriverManager .getConnection(url, userName, password); } catch (Exception e) { e.printStackTrace(); System.out.println("Error Trace in getConnection() : " + e.getMessage()); } return conn; } public static void main(String[] args) throws IOException, ParseException, SQLException { index();//做索引 IndexSearcher searcher=null; try{ IndexReader reader = IndexReader.open(FSDirectory.open(new File(indexDir)),false); searcher = new IndexSearcher(reader); search(searcher);//搜索 }catch(Exception e){ e.printStackTrace(); }finally{ if(searcher!=null) searcher.close(); } } public static void search(IndexSearcher searcher) throws IOException, ParseException{ //以下是搜索的关键词 String[] q = {"美女1","美女2","好人3","好人5"}; long start=new Date().getTime(); SearcherFile.search(searcher,q); long end=new Date().getTime(); System.out.println("花费时间:"+(double)(end-start)/1000+"秒"); } public static void index() throws SQLException{ Connection conn = null; try { conn = getConnection(); Photo[] list = Photo.loadPhotos(conn); IndexerFile.indexFile(indexDir,list); } catch (Exception e) { e.printStackTrace(); } finally { if (conn != null) { conn.close(); } } } }
二、下面是lucene3.5搜索txt文本文件
建一个E:\\TestLucene\\fileS的文件夹,放需要搜索的文件。
在该文件夹里面随便建三个txt文件,"1.txt","2.txt"和"3.txt"
其中1.txt的内容如下:
老周 北京人民 2009年
2.txt和3.txt也随便写些。
再建一个E:\\TestLucene\\fileIndex的文件夹;放索引文件。
java文件TestQueryFile:内容如下
package test;
import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.util.Date;
import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.apache.lucene.document.Field; public class TestQueryFile { public static void main(String[] args) throws Exception { indexF(); String queryString = "北京"; Query query = null; IndexReader reader = IndexReader.open(FSDirectory.open(new File("E:\\TestLucene\\fileIndex")),true);//read-only IndexSearcher searcher = new IndexSearcher(reader); String fields = "body"; try { QueryParser qp = new QueryParser(Version.LUCENE_35, fields, new StandardAnalyzer(Version.LUCENE_35));//有变化的地方 query = qp.parse(queryString); } catch (ParseException e) { } if (searcher != null) { TopDocs topDocs = searcher.search(query, 100);//100是显示队列的Size ScoreDoc[] hits = topDocs.scoreDocs; System.out.println("共有" + searcher.maxDoc() + "条索引,命中" + hits.length + "条"); } } private static void indexF() throws Exception { File fileDir = new File("E:\\TestLucene\\fileS"); File indexDir = new File("E:\\TestLucene\\fileIndex"); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)); |