躺在沙灘上的小豬

快樂的每一天

Lucene Highlighter

lucene最新版本為1.9，從apache svn中checkout 出來已經包括了Highlighter

測試一下：

package org.apache.lucene.search.highlight;

import junit.framework.TestCase;

import org.apache.lucene.analysis.Analyzer;

import org.apache.lucene.analysis.TokenStream;

import org.apache.lucene.analysis.standard.StandardAnalyzer;

import org.apache.lucene.document.Document;

import org.apache.lucene.document.Field;

import org.apache.lucene.index.IndexReader;

import org.apache.lucene.index.IndexWriter;

import org.apache.lucene.queryParser.QueryParser;

import org.apache.lucene.search.Hits;

import org.apache.lucene.search.IndexSearcher;

import org.apache.lucene.search.Query;

import org.apache.lucene.search.Searcher;

import org.apache.lucene.store.RAMDirectory;

import java.io.IOException;

import java.io.StringReader;

/**

* @author martin

*/

public class WordsHighlighterTest extends TestCase {

private IndexReader reader;

RAMDirectory ramDirectory;

final private static String FIELD_NAME = "contents";

final private static String queryString = "索引";

String [] words = {

"1:索引內容結構：Document，以及包含于Document的多個Field索",

"2:索引內容優先性調整因子,boost(可對整個Document或Field指定).",

"3:索引的寫入IndexWriter,索引的寫入目標Directory,實現包括FsDirectory跟RamDirectory等",

"4:索引創建速度的調整"

};

protected void setUp() throws Exception {

ramDirectory = new RAMDirectory();

IndexWriter indexWriter = new IndexWriter(ramDirectory, new StandardAnalyzer(), true);

for (String s : words) {

addDoc(indexWriter, s);

}

indexWriter.optimize();

indexWriter.close();

reader = IndexReader.open(ramDirectory);

}

private void addDoc(IndexWriter indexWriter, String s) throws IOException {

Document doc = new Document();

doc.add(new Field(FIELD_NAME, s, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES));

indexWriter.addDocument(doc);

}

public void testSimpleWords() throws Exception {

Query query = new QueryParser(FIELD_NAME, new StandardAnalyzer()).parse(queryString);

query = query.rewrite(reader);

System.out.println("Searching for: " + query.toString(FIELD_NAME));

Searcher searcher = new IndexSearcher(ramDirectory);

Hits hits = searcher.search(query);

Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<font color=\"red\">", "</font>"), new QueryScorer(query));

highlighter.setTextFragmenter(new SimpleFragmenter(20));

Analyzer analyzer = new StandardAnalyzer();

for (int i = 0; i < hits.length(); i++) {

String text = hits.doc(i).get(FIELD_NAME);

TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));

String result = highlighter.getBestFragments(tokenStream, text, 2, "

");

System.out.println("\t" + result);

}

protected void tearDown() throws Exception {

super.tearDown();

}

輸出：

Searching for: "索引"
4:索引創建速度的調整
3:索引的寫入IndexWriter,索引的寫入目標Directory,實現包
1:索引內容結構：Document，以索
2:索引內容優先性調整因子,boost(可對整個Document或Field指

posted on 2006-02-09 19:11 martin xus 閱讀(1401) 評論(0) 編輯收藏所屬分類: java 、lucene

躺在沙灘上的小豬

Lucene Highlighter

My Links

Blog Stats

留言簿(10)

隨筆檔案

搜索

最新評論