1、首先從lucene官網(wǎng)上下載lucene2.4.0(也可以點(diǎn)擊直接下載,我這里用的這個(gè)版本,現(xiàn)在最高版本是3.0)
2、從極易軟件下載極易分詞器jar包(為漢語(yǔ)的世界,你好提供支持)
3、在Eclipse中新建Java工程,并將所需jar包(lucene-core-2.4.0.jar,lucene-analyzer-2.4.0.jar,lucene-highlighter-2.4.0.jar,je-analysis-1.5.3.jar)加入工程
4、差不多該開(kāi)始了,在開(kāi)始之前還需要建立兩個(gè)文件夾,我這里是luceneDataSource放文件(用來(lái)建立索引庫(kù)),luceneIndexs(存放索引庫(kù)的位置),最終的結(jié)構(gòu)是:
5、好,我們開(kāi)始,首先建立HelloWorld類,類里有兩個(gè)方法createIndex和search分別是創(chuàng)建索引庫(kù)和搜索,搜索出來(lái)的結(jié)果高亮顯示,具體實(shí)現(xiàn)為:
package com.lucene.helloworld; import java.util.logging.SimpleFormatter; import jeasy.analysis.MMAnalyzer; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter.MaxFieldLength; import org.apache.lucene.queryParser.MultiFieldQueryParser; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.Filter; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Formatter; import org.apache.lucene.search.highlight.Fragmenter; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.Scorer; import org.apache.lucene.search.highlight.SimpleFragmenter; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.junit.Test; import com.lucene.util.File2DocumentUtils; public class HelloWorld { String zhFilePath = "F:""java""workspaces""LuceneTest""luceneDatasource""世界,你好.txt"; String filePath = "F:""java""workspaces""LuceneTest""luceneDatasource""IndexWriter addDocument's a javadoc .txt"; String indexPath = "F:""java""workspaces""LuceneTest""luceneIndexs"; // Analyzer analyzer = new StandardAnalyzer(); Analyzer mmAnalyzer = new MMAnalyzer(); // 詞庫(kù)分析,極易分詞 /** * 創(chuàng)建索引 * * @throws Exception * */ @Test public void createIndex() throws Exception { IndexWriter indexWriter = new IndexWriter(indexPath, mmAnalyzer, true, MaxFieldLength.LIMITED); // Document doc = File2DocumentUtils.file2Document(filePath); Document zhDoc = File2DocumentUtils.file2Document(zhFilePath); // indexWriter.addDocument(doc); indexWriter.addDocument(zhDoc); indexWriter.close(); } /** * 從索引庫(kù)搜索 * * @throws Exception */ @Test public void search() throws Exception { // String queryString = "hello world"; String queryString = "世界,你好"; // 1、將搜索文件解析為Query對(duì)象 String[] fields = { "name", "content" }; QueryParser queryParser = new MultiFieldQueryParser(fields, mmAnalyzer); Query query = queryParser.parse(queryString); // 2、查詢 IndexSearcher indexSearcher = new IndexSearcher(indexPath); Filter filter = null; TopDocs topDocs = indexSearcher.search(query, filter, 10000); System.out.println("總共有【" + topDocs.totalHits + "】條結(jié)果匹配"); // start 準(zhǔn)備高亮器 Formatter formatter = new SimpleHTMLFormatter("<font color=red>", "</font>"); Scorer scorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(formatter, scorer); Fragmenter fragmenter = new SimpleFragmenter(50); highlighter.setTextFragmenter(fragmenter); // end 結(jié)束高亮器 // 3、打印輸出結(jié)果 for (ScoreDoc scoreDoc : topDocs.scoreDocs) { int docSn = scoreDoc.doc; Document doc = indexSearcher.doc(docSn); // start 高亮 // 返回高亮后的結(jié)果,如果當(dāng)前屬性值中沒(méi)有出現(xiàn)關(guān)鍵字,會(huì)返回 null String hc = highlighter.getBestFragment(mmAnalyzer, "content", doc.get("content")); if (hc == null) { String content = doc.get("content"); int endIndex = Math.min(50, content.length()); hc = content.substring(0, endIndex); } doc.getField("content").setValue(hc); // end 高亮 File2DocumentUtils.printDocumentInfo(doc); } } } |
該類需要有一個(gè)工具類支持,來(lái)將file轉(zhuǎn)換為Document,具體實(shí)現(xiàn)如下:
package com.lucene.util; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.InputStreamReader; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumberTools; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; publicclass File2DocumentUtils { publicstatic Document file2Document(String path) { File file = new File(path); Document doc = new Document(); doc.add(new Field("name", file.getName(), Store.YES, Index.ANALYZED)); doc.add(new Field("content", readFileContent(file), Store.YES, Index.ANALYZED)); doc.add(new Field("size", NumberTools.longToString(file.length()), Store.YES, Index.NOT_ANALYZED)); doc.add(new Field("path", file.getAbsolutePath(), Store.YES, Index.NOT_ANALYZED)); return doc; } // public static void document2File(Document doc ){ // // } publicstatic String readFileContent(File file) { try { BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file))); StringBuffer content = new StringBuffer(); for (String line = null; (line = reader.readLine()) != null;) { content.append(line).append(""n"); } return content.toString(); } catch (Exception e) { thrownew RuntimeException(e); } }
publicstaticvoid printDocumentInfo(Document doc) { // Field f = doc.getField("name"); // f.stringValue(); System.out.println("------------------------------"); System.out.println("name = " + doc.get("name")); System.out.println("content = " + doc.get("content")); System.out.println("size = " + NumberTools.stringToLong(doc.get("size"))); System.out.println("path = " + doc.get("path")); } } |
6、到此我們結(jié)束,看下成果,英文版的我就不寫了,想對(duì)來(lái)說(shuō)比較容易,來(lái)看下中文版的結(jié)果
