Rss订阅

首页 »编程综合 » lucene:全文检索Lucene的一次尝试 »正文

lucene:全文检索Lucene的一次尝试

来源: 发布时间:星期四, 2009年1月15日浏览:61次评论:0

　　由于系统搜索速度

直不理想

今天决定用Lucene进行索引

然后全表检索

　　核心代码如下

packagenet.java2000.forum.util; 　
importjava.io.IOException; 　
importjava.sql.Connection; 　
importjava.sql.ResultSet; 　
importjava.sql.SQLException; 　
importjava.sql.Statement; 　
importjava.util.ArrayList; 　
importjava.util.Date; 　
importjava.util.List; 　
importjavax.sql.DataSource; 　
importorg.apache.lucene.analysis.Analyzer; 　
importorg.apache.lucene.analysis.standard.StandardAnalyzer; 　
importorg.apache.lucene.document.Document; 　
importorg.apache.lucene.document.Field; 　
importorg.apache.lucene.index.IndexWriter; 　
importorg.apache.lucene.index.Term; 　
importorg.apache.lucene.queryParser.MultiFieldQueryParser; 　
importorg.apache.lucene.queryParser.QueryParser; 　
importorg.apache.lucene.search.Hits; 　
importorg.apache.lucene.search.IndexSearcher; 　
importorg.apache.lucene.search.Query; 　
importorg.apache.lucene.search.Searcher; 　
importorg.apache.lucene.search.WildcardQuery; 　
publicLucene{ 　
　privatefinalStringindexPath="d:/index/www.java2000.net"; 　
　/**
　*@paramargs
　*@throwsException
　*/　
　publicvoid(Stringargs)throwsException{ 　
　　//create; 　
　　Lucenel=Lucene; 　
　　.out.prln("模糊搜索n------------------------------"); 　
　　l.seacherWildcard("域名"); 　
　　.out.prln("索引搜索n------------------------------"); 　
　　l.seacherIndex("域名AND系统"); 　
　} 　
　publicvoidrebuildAll{ 　
　　synchronized(indexPath){ 　
　　　Lucenel=Lucene; 　
　　　DataSourceds=(DataSource)Factory.getBean("dataSource"); 　
　　　Connectioncon=null; 　
　　　Statementstat=null; 　
　　　ResultSetrs=null; 　
　　　try{ 　
　　　　con=ds.getConnection; 　
　　　　stat=con.createStatement; 　
　　　　rs=stat.executeQuery("selectid,subject,contentfromt_post"); 　
　　　　(rs!=null){ 　
　　　　　l.Index(rs); 　
　　　　} 　
　　　}catch(Exceptionex){ 　
　　　　ex.prStackTrace; 　
　　　}finally{ 　
　　　　(rs!=null){ 　
　　　　　try{ 　
　　　　　　rs.close; 　
　　　　　}catch(Exceptionex){} 　
　　　　} 　
　　　　(stat!=null){ 　
　　　　　try{ 　
　　　　　　stat.close; 　
　　　　　}catch(Exceptionex){} 　
　　　　} 　
　　　　(con!=null){ 　
　　　　　try{ 　
　　　　　　con.close; 　
　　　　　}catch(Exceptionex){} 　
　　　　} 　
　　　} 　
　　} 　
　} 　
　publicsynchronizedAnalyzergetAnalyzer{ 　
　　StandardAnalyzer; 　
　} 　
　privatesynchronizedvoidIndex(ResultSetrs){//通过结果集就可以获得数据源了 　
　　try{ 　
　　　IndexWriterwriter=IndexWriter(indexPath,getAnalyzer,true); 　
　　　Datestart=Date; 　
　　　while(rs.next){ 　
　　　　Documentdoc=Document;//个文档相当和表条记录 　
　　　　doc.add(Field("id",rs.getString("id"),Field.Store.YES,Field.Index.UN_TOKENIZED));//字段id放是数据库表中idlucene条记录个字段下数据可以放多个值这点和数据库表区别 　
　　　　doc.add(Field("subject",rs.getString("subject"),Field.Store.YES,Field.Index.TOKENIZED)); 　
　　　　doc.add(Field("content",rs.getString("content"),Field.Store.YES,Field.Index.TOKENIZED)); 　
　　　　writer.addDocument(doc); 　
　　　} 　
　　　writer.optimize;//优化 　
　　　writer.close;//定要关闭否则不能把内存中数据写到文件 　
　　　Dateend=Date; 　
　　　.out.prln("重建索引成功！！！！"+"用时"+(end.getTime-start.getTime)+"毫秒"); 　
　　}catch(IOExceptione){ 　
　　　.out.prln(e); 　
　　}catch(SQLExceptione){ 　
　　　.out.prln(e); 　
　　} 　
　} 　
　publicvoidIndexSingle(longid,Stringsubject,Stringcontent){//通过结果集就可以获得数据源了 　
　　synchronized(indexPath){ 　
　　　try{ 　
　　　　IndexWriterwriter=IndexWriter(indexPath,getAnalyzer,false); 　
　　　　Datestart=Date; 　
　　　　Documentdoc=Document;//个文档相当和表条记录 　
　　　　doc.add(Field("id",Long.toString(id),Field.Store.YES,Field.Index.UN_TOKENIZED));//字段id放是数据库表中idlucene条记录个字段下数据可以放多个值这点和数据库表区别 　
　　　　doc.add(Field("subject",subject,Field.Store.YES,Field.Index.TOKENIZED)); 　
　　　　doc.add(Field("content",content,Field.Store.YES,Field.Index.TOKENIZED)); 　
　　　　writer.addDocument(doc); 　
//　　　　writer.optimize;//优化 　
　　　　writer.close;//定要关闭否则不能把内存中数据写到文件 　
　　　　Dateend=Date; 　
　　　　.out.prln("索引建立成功！！！！"+"用时"+(end.getTime-start.getTime)+"毫秒"); 　
　　　}catch(IOExceptione){ 　
　　　　.out.prln(e); 　
　　　} 　
　　} 　
　} 　
　publicHitsseacherWildcard(StringqueryString){//根据关键字搜索 　
　　Hitshits=null; 　
　　try{ 　
　　　Queryquery=WildcardQuery(Term("subject","*"+queryString+"*"));//模糊查询下 　
　　　Searchersearcher=IndexSearcher(indexPath); 　
　　　hits=searcher.search(query); 　
　　　for(i=0;i<hits.length;i){ 　
　　　　Documentdoc=hits.doc(i); 　
　　　} 　
　　　searcher.close; 　
　　}catch(Exceptione){ 　
　　　.out.pr(e); 　
　　} 　
　　hits; 　
　} 　
　publicList<Long>seacherIndex(StringqueryString){//根据关键字搜索 　
　　.out.prln(queryString); 　
　　Hitshits=null; 　
　　try{ 　
　　　IndexSearcherisearcher=IndexSearcher(indexPath); 　
　　　//Parseasimplequerythatsearchesfor"text": 　
　　　MultiFieldQueryParserparser=MultiFieldQueryParser(String{"subject","content"},getAnalyzer); 　
//　　　QueryParserparser=QueryParser("subject",getAnalyzer); 　
　　　Queryquery=parser.parse(queryString); 　
　　　hits=isearcher.search(query); 　
　　　List<Long>rtn=ArrayList<Long>; 　
　　　for(i=0;i<hits.length;i){ 　
　　　　Documentdoc=hits.doc(i); 　
　　　　rtn.add(Long.parseLong(doc.get("id").trim)); 　
　　　} 　
　　　isearcher.close; 　
　　　rtn; 　
　　}catch(Exceptione){ 　
　　　.out.pr(e); 　
　　　null; 　
　　} 　
　} 　
}

　　介绍说明

　　1 StandardAnalyzer 支持中文

所以不用再找其它

了

　　2 Index思路方法里

IndexWriterwriter=IndexWriter(indexPath,getAnalyzer,true);　
IndexWriter writer =  IndexWriter(indexPath, getAnalyzer, true);

　　最后面

这个true,是重新建立索引

在第

运行时

必须重新所有所有

　　Field.Index.UN_TOKENIZED 是不进行单词拆分

　　Field.Index.TOKENIZED 是进行拆分

　　3 seacherWildcard 里面可以进行模糊搜索

　　4 seacherIndex 里面

　　MultiFieldQueryParser 支持多个字段

联合查询

　　5 我测试

结果

我

网站WebSite

11M

索引(应该算很

般

数据量

6000多个帖子),检索竟然几乎不需要时间？或者10毫秒以内

　　6 l.seacherIndex("域名 AND 系统");

　　AND OR NOT 用在单词的间进行组合查询

+ 代表 AND -代表 NOT

　　7 每次有新

数据时

比如发言或者回复则

publicvoidIndexSingle(longid,Stringsubject,Stringcontent)　
public void IndexSingle(long id, String subject, String content)

　　进行索引更新

注意里面

参数是false;

IndexWriterwriter=IndexWriter(indexPath,getAnalyzer,false);　
IndexWriter writer =  IndexWriter(indexPath, getAnalyzer, false);

　　就这些

第

次使用

效果很好

专注于互联网--专注于架构

首页 »编程综合 » lucene:全文检索Lucene的一次尝试 »正文

lucene:全文检索Lucene的一次尝试

相关文章

读者评论

发表评论

热门标签

精华推荐

最新标签

Dig排行

阅读排行

最新文章