首頁新隨筆新文章聯系

2025年7月

日

一

二

三

四

五

六

29

30

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

1

2

3

4

5

6

7

8

9

blog是收集資料并且作為技術交流的平臺，發布一些本人常用資料或開發經驗，希望能和大家一起討論、進步。

訪問統計

留言簿(6)

我參與的團隊

牛虻(0/0)

隨筆檔案(8)

文章分類(149)

新聞分類(1)

鐵血軍事(1)

相冊

收藏夾(21)

友情鏈接

lpj的博客
三生石的博客
中文愛百科
莊陸的博客
狼的博客

我的鏈接

搜索

積分與排名

積分 - 151373
排名 - 409

閱讀排行榜

評論排行榜

lucene全文檢索應用示例及代碼簡析

使用Lucene實現全文檢索，主要有下面三個步驟：
　　1、建立索引庫：根據網站新聞信息庫中的已有的數據資料建立Lucene索引文件。
　　2、通過索引庫搜索：有了索引后，即可使用標準的詞法分析器或直接的詞法分析器實現進行全文檢索。
　　3、維護索引庫：網站新聞信息庫中的信息會不斷的變動，包括新增、修改及刪除等，這些信息的變動都需要進一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相關代碼!

一、索引管理(建立及維護)
　　索引管理類MyRssIndexManage主要實現根據網站信息庫中的數據建立索引，維護索引等。由于索引的過程需要消耗一定的時間，因此，索引管理類實現Runnable接口，使得我們可以在程序中開新線程來運行。

1

package com.easyjf.lucene;
2

import java.util.Date;
3

import java.util.List;
4

import org.apache.lucene.analysis.standard.StandardAnalyzer;
5

import org.apache.lucene.document.Document;
6

import org.apache.lucene.document.Field;
7

import org.apache.lucene.index.IndexReader;
8

import org.apache.lucene.index.IndexWriter;
9

import org.apache.lucene.queryParser.MultiFieldQueryParser;
10

import org.apache.lucene.queryParser.QueryParser;
11

import org.apache.lucene.search.Hits;
12

import org.apache.lucene.search.IndexSearcher;
13

import org.apache.lucene.search.Query;
14

import org.apache.lucene.search.Searcher;
15

import com.easyjf.dbo.EasyJDB;
16

import com.easyjf.news.business.NewsDir;
17

import com.easyjf.news.business.NewsDoc;
18

import com.easyjf.news.business.NewsUtil;
19

import com.easyjf.web.tools.IPageList;
20

public class MyRssIndexManage implements Runnable {
21

private String indexDir;
22

private String indexType="add";
23

public void run() {
24

// TODO Auto-generated method stub
25

if("add".equals(indexType))
26

normalIndex();
27

else if ("init".equals(indexType)) reIndexAll();
28

}
29

public void normalIndex()
30

{
31

try{
32

Date start = new Date();
33

int num=0;
34

IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
35

//NewsDir dir=NewsDir.readBySn();
36

String scope="(needIndex<2) or(needIndex is null)";
37

IPageList pList=NewsUtil.pageList(scope,1,50);
38

for(int p=0;p {
39

pList=NewsUtil.pageList(scope,p,100);
40

List list=pList.getResult();
41

for(int i=0;i {
42

NewsDoc doc=(NewsDoc)list.get(i);
43

writer.addDocument(newsdoc2lucenedoc(doc));
44

num++;
45

}
46

}
47

writer.optimize();
48

writer.close();
49

EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
50

Date end = new Date();
51

System.out.print("新增索引"+num+"條信息，一共花："+(end.getTime() - start.getTime())/60000+"分鐘!");
52

}
53

catch(Exception e)
54

{
55

e.printStackTrace();
56

}
57

}
58

public void reIndexAll()
59

{
60

try{
61

Date start = new Date();
62

int num=0;
63

IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
64

NewsDir dir=NewsDir.readBySn("easyjf");
65

IPageList pList=NewsUtil.pageList(dir,1,50);
66

for(int p=0;p {
67

pList=NewsUtil.pageList(dir,p,100);
68

List list=pList.getResult();
69

for(int i=0;i {
70

NewsDoc doc=(NewsDoc)list.get(i);
71

writer.addDocument(newsdoc2lucenedoc(doc));
72

num++;
73

}
74

}
75

writer.optimize();
76

writer.close();
77

EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
78

Date end = new Date();
79

System.out.print("全部重新做了一次索引，一共處理了"+num+"條信息，花："+(end.getTime() - start.getTime())/60000+"分鐘!");
80

}
81

catch(Exception e)
82

{
83

e.printStackTrace();
84

}
85

}
86

private Document newsdoc2lucenedoc(NewsDoc doc)
87

{
88

Document lDoc=new Document();
89

lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
90

lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
91

lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
92

lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
93

lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
94

lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
95

return lDoc;
96

}
97

public String getIndexDir() {
98

return indexDir;
99

}
100

public void setIndexDir(String indexDir) {
101

this.indexDir = indexDir;
102

}
103

104

public String getIndexType() {
105

return indexType;
106

}
107

public void setIndexType(String indexType) {
108

this.indexType = indexType;
109

}
110

}
111

二、使用Lucene實現全文搜索
下面是MyRssSearch類的源碼，該類主要實現使用Lucene中Searcher及QueryParser實現從索引庫中搜索關鍵詞。

1

package com.easyjf.lucene;
2

3

import java.util.List;
4

import org.apache.lucene.analysis.standard.StandardAnalyzer;
5

import org.apache.lucene.document.Document;
6

import org.apache.lucene.index.IndexReader;
7

import org.apache.lucene.queryParser.MultiFieldQueryParser;
8

import org.apache.lucene.queryParser.QueryParser;
9

import org.apache.lucene.search.Hits;
10

import org.apache.lucene.search.IndexSearcher;
11

import org.apache.lucene.search.Query;
12

import org.apache.lucene.search.Searcher;
13

14

import com.easyjf.search.MyRssUtil;
15

import com.easyjf.search.SearchContent;
16

import com.easyjf.web.tools.IPageList;
17

import com.easyjf.web.tools.PageList;
18

19

public class MyRssSearch {
20

private String indexDir;
21

IndexReader ir;
22

Searcher search;
23

public IPageList search(String key,int pageSize,int currentPage)
24

{
25

IPageList pList=new PageList(new HitsQuery(doSearch(key)));
26

pList.doList(pageSize,currentPage,"","",null);
27

if(pList!=null)
28

{
29

List list=pList.getResult();
30

if(list!=null){
31

for(int i=0;i {
32

list.set(i,lucene2searchObj((Document)list.get(i),key));
33

}
34

}
35

}
36

try{
37

if(search!=null)search.close();
38

if(ir!=null)ir.close();
39

}
40

catch(Exception e)
41

{
42

e.printStackTrace();
43

}
44

return pList;
45

}
46

private SearchContent lucene2searchObj(Document doc,String key)
47

{
48

SearchContent searchObj=new SearchContent();
49

String title=doc.getField("title").stringValue();
50

searchObj.setTitle(title.replaceAll(key,""+key+""));
51

searchObj.setTvalue(doc.getField("cid").stringValue());
52

searchObj.setUrl(doc.getField("url").stringValue());
53

searchObj.setSource(doc.getField("source").stringValue());
54

searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
55

searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
56

return searchObj;
57

}
58

public Hits doSearch(String key)
59

{
60

Hits hits=null;
61

try{
62

ir=IndexReader.open(indexDir);
63

search=new IndexSearcher(ir);
64

String fields[]={"title","content"};
65

QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
66

Query query=parser.parse(key);
67

hits=search.search(query);
68

}
69

catch(Exception e)
70

{
71

e.printStackTrace();
72

}
73

//System.out.println("搜索結果:"+hits.length());
74

return hits;
75

}
76

77

public String getIndexDir() {
78

return indexDir;
79

}
80

public void setIndexDir(String indexDir) {
81

this.indexDir = indexDir;
82

}
83

}
84

　　在上面的代碼中，search方法返回一個封裝了分頁查詢結果的IPageList，IPageList是EasyJWeb Tools業務引擎中的分頁引擎，對于IPageList的使用，請看本人寫的這篇文章《EasyJWeb Tools中業務引擎分頁的設計實現》：

　　我們針對Lucene的的查詢結果Hits結構，寫了一個查詢器HitsQuery。代碼如下所示：

1

package com.easyjf.lucene;
2

import java.util.ArrayList;
3

import java.util.Collection;
4

import java.util.List;
5

import org.apache.lucene.search.Hits;
6

import com.easyjf.web.tools.IQuery;
7

public class HitsQuery implements IQuery {
8

private int begin=0;
9

private int max=0;
10

private Hits hits;
11

public HitsQuery()
12

{
13

14

}
15

public HitsQuery(Hits hits)
16

{
17

if(hits!=null)
18

{
19

this.hits=hits;
20

this.max=hits.length();
21

}
22

}
23

public int getRows(String arg0) {
24

// TODO Auto-generated method stub
25

return (hits==null?0:hits.length());
26

}
27

public List getResult(String arg0) {
28

// TODO Auto-generated method stub
29

List list=new ArrayList();
30

for(int i=begin;i<(begin+max)&&(i {
31

try{
32

list.add(hits.doc(i));
33

}
34

catch(Exception e)
35

{
36

e.printStackTrace();
37

}
38

}
39

return list;
40

}
41

public void setFirstResult(int begin) {
42

// TODO Auto-generated method stub
43

this.begin=begin;
44

}
45

public void setMaxResults(int max) {
46

// TODO Auto-generated method stub
47

this.max=max;
48

}
49

public void setParaValues(Collection arg0) {
50

// TODO Auto-generated method stub
51

52

}
53

public List getResult(String condition, int begin, int max) {
54

// TODO Auto-generated method stub
55

if((begin>=0)&&(begin if(!(max>hits.length()))this.max=max;
56

return getResult(condition);
57

}
58

}
59

三、Web調用
　　下面我們來看看在Web中如果調用商業邏輯層的全文檢索功能。下面是處理用戶請請的Action中關于搜索部分的源碼：

1

package com.easyjf.news.action;
2

public class SearchAction implements IWebAction {
3

public Page doSearch(WebForm form,Module module)throws Exception
4

{
5

String key=CommUtil.null2String(form.get("v"));
6

key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
7

form.set("v",key);
8

form.addResult("v2",URLEncoder.encode(key,"utf-8"));
9

if(key.getBytes().length>2){
10

String orderBy=CommUtil.null2String(form.get("order"));
11

int currentPage=CommUtil.null2Int(form.get("page"));
12

int pageSize=CommUtil.null2Int(form.get("pageSize"));
13

if(currentPage<1)currentPage=1;
14

if(pageSize<1)pageSize=15;
15

SearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
16

search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
17

search.doSearchByLucene();
18

IPageList pList=search.getResult();
19

if(pList!=null && pList.getRowCount()>0){
20

form.addResult("list",pList.getResult());
21

form.addResult("pages",new Integer(pList.getPages()));
22

form.addResult("rows",new Integer(pList.getRowCount()));
23

form.addResult("page",new Integer(pList.getCurrentPage()));
24

form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
25

}
26

else
27

{
28

form.addResult("notFound","true");//找不到數據
29

}
30

}
31

else
32

form.addResult("errMsg","您輸入的關鍵字太短!");
33

form.addResult("hotSearch",SearchEngine.getHotSearch(20));
34

return null;
35

}
36

}
37

其中調用的SearchEngine類中有關Lucene部分的源碼：
38

public class SearchEngine {
39

private MyRssSearch luceneSearch=new MyRssSearch();
40

public void doSearchByLucene()
41

{
42

SearchKey keyObj=readCache();
43

if(keyObj!=null){
44

result=luceneSearch.search(key,pageSize,currentPage);
45

if(updateStatus){
46

keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
47

keyObj.update();
48

}
49

}
50

else//緩存中沒有該關鍵字信息,生成關鍵字搜索結果
51

{
52

keyObj=new SearchKey();
53

keyObj.setTitle(key);
54

keyObj.setLastUpdated(new Date());
55

keyObj.setReadTimes(new Integer(1));
56

keyObj.setStatus(new Integer(0));
57

keyObj.setSequence(new Integer(1));
58

keyObj.setVdate(new Date());
59

keyObj.save();
60

result=luceneSearch.search(key,pageSize,currentPage);;
61

62

}
63

}
64

}
65

本文轉自：http://java.ccidnet.com/art/3749/20060704/595099_1.html

---------------------------------------------------------------------------------------------------------------------------------
說人之短，乃護己之短。夸己之長，乃忌人之長。皆由存心不厚，識量太狹耳。能去此弊，可以進德，可以遠怨。
http://www.aygfsteel.com/szhswl
------------------------------------------------------------------------------------------------------ ----------------- ---------

posted on 2007-12-05 17:08 宋針還閱讀(404) 評論(0) 編輯收藏所屬分類: 搜索引擎

新用戶注冊刷新評論列表


只有注冊用戶登錄后才能發表評論。




網站導航: 博客園 IT新聞 Chat2DB C++博客博問管理
相關文章: LUCENE學習筆記3(轉載) 用Lucene加速Web搜索應用程序的開發給Compass搜索添加高亮(highlight) Compass: 在你的應用中集成搜索功能 lunece查詢 Lucene的工作原理(轉載) lucene全文檢索應用示例及代碼簡析 LUCENE簡單實例

留言簿(6)

我參與的團隊

隨筆檔案(8)

文章分類(149)

新聞分類(1)

相冊

收藏夾(21)

友情鏈接

我的鏈接

搜索

積分與排名

最新評論

閱讀排行榜

評論排行榜