首頁新隨筆新文章聯(lián)系聚合

posts - 7,comments - 10,trackbacks - 0

<

2009年12月

>

日

一

二

三

四

五

六

29

30

2

4

6

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

1

2

3

4

5

6

7

8

9

唯美古典博客公告唯美古典剛剛開博，希望大家多多支持！

常用鏈接

留言簿

隨筆分類

隨筆檔案

搜索

閱讀排行榜

評論排行榜

Lucene版Hello world（世界，你好）

1、首先從lucene官網(wǎng)上下載lucene2.4.0（也可以點擊直接下載，我這里用的這個版本，現(xiàn)在最高版本是3.0）

2、從極易軟件下載極易分詞器jar包（為漢語的世界，你好提供支持）

3、在Eclipse中新建Java工程，并將所需jar包（lucene-core-2.4.0.jar,lucene-analyzer-2.4.0.jar,lucene-highlighter-2.4.0.jar,je-analysis-1.5.3.jar）加入工程

4、差不多該開始了，在開始之前還需要建立兩個文件夾，我這里是luceneDataSource放文件（用來建立索引庫），luceneIndexs(存放索引庫的位置)，最終的結(jié)構(gòu)是：

5、好，我們開始，首先建立HelloWorld類，類里有兩個方法createIndex和search分別是創(chuàng)建索引庫和搜索，搜索出來的結(jié)果高亮顯示，具體實現(xiàn)為：

package com.lucene.helloworld;

import java.util.logging.SimpleFormatter;

import jeasy.analysis.MMAnalyzer;

import org.apache.lucene.analysis.Analyzer;

import org.apache.lucene.analysis.standard.StandardAnalyzer;

import org.apache.lucene.document.Document;

import org.apache.lucene.index.IndexWriter;

import org.apache.lucene.index.IndexWriter.MaxFieldLength;

import org.apache.lucene.queryParser.MultiFieldQueryParser;

import org.apache.lucene.queryParser.QueryParser;

import org.apache.lucene.search.Filter;

import org.apache.lucene.search.IndexSearcher;

import org.apache.lucene.search.Query;

import org.apache.lucene.search.ScoreDoc;

import org.apache.lucene.search.TopDocs;

import org.apache.lucene.search.highlight.Formatter;

import org.apache.lucene.search.highlight.Fragmenter;

import org.apache.lucene.search.highlight.Highlighter;

import org.apache.lucene.search.highlight.QueryScorer;

import org.apache.lucene.search.highlight.Scorer;

import org.apache.lucene.search.highlight.SimpleFragmenter;

import org.apache.lucene.search.highlight.SimpleHTMLFormatter;

import org.junit.Test;

import com.lucene.util.File2DocumentUtils;

public class HelloWorld {

String zhFilePath = "F:""java""workspaces""LuceneTest""luceneDatasource""世界，你好.txt";

String filePath = "F:""java""workspaces""LuceneTest""luceneDatasource""IndexWriter addDocument's a javadoc .txt";

String indexPath = "F:""java""workspaces""LuceneTest""luceneIndexs";

// Analyzer analyzer = new StandardAnalyzer();

Analyzer mmAnalyzer = new MMAnalyzer(); // 詞庫分析，極易分詞

/**

* 創(chuàng)建索引

*

* @throws Exception

*

*/

@Test

public void createIndex() throws Exception {

IndexWriter indexWriter = new IndexWriter(indexPath, mmAnalyzer, true, MaxFieldLength.LIMITED);

// Document doc = File2DocumentUtils.file2Document(filePath);

Document zhDoc = File2DocumentUtils.file2Document(zhFilePath);

// indexWriter.addDocument(doc);

indexWriter.addDocument(zhDoc);

indexWriter.close();

}

/**

* 從索引庫搜索

*

* @throws Exception

*/

@Test

public void search() throws Exception {

// String queryString = "hello world";

String queryString = "世界,你好";

// 1、將搜索文件解析為Query對象

String[] fields = { "name", "content" };

QueryParser queryParser = new MultiFieldQueryParser(fields, mmAnalyzer);

Query query = queryParser.parse(queryString);

// 2、查詢

IndexSearcher indexSearcher = new IndexSearcher(indexPath);

Filter filter = null;

TopDocs topDocs = indexSearcher.search(query, filter, 10000);

System.out.println("總共有【" + topDocs.totalHits + "】條結(jié)果匹配");

// start 準備高亮器

Formatter formatter = new SimpleHTMLFormatter("<font color=red>", "</font>");

Scorer scorer = new QueryScorer(query);

Highlighter highlighter = new Highlighter(formatter, scorer);

Fragmenter fragmenter = new SimpleFragmenter(50);

highlighter.setTextFragmenter(fragmenter);

// end 結(jié)束高亮器

// 3、打印輸出結(jié)果

for (ScoreDoc scoreDoc : topDocs.scoreDocs) {

int docSn = scoreDoc.doc;

Document doc = indexSearcher.doc(docSn);

// start 高亮

// 返回高亮后的結(jié)果，如果當前屬性值中沒有出現(xiàn)關(guān)鍵字，會返回 null

String hc = highlighter.getBestFragment(mmAnalyzer, "content", doc.get("content"));

if (hc == null) {

String content = doc.get("content");

int endIndex = Math.min(50, content.length());

hc = content.substring(0, endIndex);

}

doc.getField("content").setValue(hc);

// end 高亮

File2DocumentUtils.printDocumentInfo(doc);

}

該類需要有一個工具類支持，來將file轉(zhuǎn)換為Document，具體實現(xiàn)如下：

package com.lucene.util;

import java.io.BufferedReader;

import java.io.File;

import java.io.FileInputStream;

import java.io.InputStreamReader;

import org.apache.lucene.document.Document;

import org.apache.lucene.document.Field;

import org.apache.lucene.document.NumberTools;

import org.apache.lucene.document.Field.Index;

import org.apache.lucene.document.Field.Store;

publicclass File2DocumentUtils {

publicstatic Document file2Document(String path) {

File file = new File(path);

Document doc = new Document();

doc.add(new Field("name", file.getName(), Store.YES, Index.ANALYZED));

doc.add(new Field("content", readFileContent(file), Store.YES, Index.ANALYZED));

doc.add(new Field("size", NumberTools.longToString(file.length()), Store.YES, Index.NOT_ANALYZED));

doc.add(new Field("path", file.getAbsolutePath(), Store.YES, Index.NOT_ANALYZED));

return doc;

}

// public static void document2File(Document doc ){

//

// }

publicstatic String readFileContent(File file) {

try {

BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file)));

StringBuffer content = new StringBuffer();

for (String line = null; (line = reader.readLine()) != null;) {

content.append(line).append(""n");

}

return content.toString();

} catch (Exception e) {

thrownew RuntimeException(e);

}

publicstaticvoid printDocumentInfo(Document doc) {

// Field f = doc.getField("name");

// f.stringValue();

System.out.println("------------------------------");

System.out.println("name = " + doc.get("name"));

System.out.println("content = " + doc.get("content"));

System.out.println("size = " + NumberTools.stringToLong(doc.get("size")));

System.out.println("path = " + doc.get("path"));

}

6、到此我們結(jié)束，看下成果，英文版的我就不寫了，想對來說比較容易，來看下中文版的結(jié)果

唯美古典的工作室

posted on 2009-12-03 15:27 唯美古典閱讀(2900) 評論(0) 編輯收藏所屬分類: Java入門、lucene

新用戶注冊刷新評論列表


只有注冊用戶登錄后才能發(fā)表評論。




網(wǎng)站導(dǎo)航: 博客園 IT新聞 Chat2DB C++博客博問管理

常用鏈接

留言簿

隨筆分類

隨筆檔案

搜索

最新評論

閱讀排行榜

評論排行榜