1、首先從lucene官網上下載lucene2.4.0(也可以點擊直接下載,我這里用的這個版本,現在最高版本是3.0)
2、從極易軟件下載極易分詞器jar包(為漢語的世界,你好提供支持)
3、在Eclipse中新建Java工程,并將所需jar包(lucene-core-2.4.0.jar,lucene-analyzer-2.4.0.jar,lucene-highlighter-2.4.0.jar,je-analysis-1.5.3.jar)加入工程
4、差不多該開始了,在開始之前還需要建立兩個文件夾,我這里是luceneDataSource放文件(用來建立索引庫),luceneIndexs(存放索引庫的位置),最終的結構是:
5、好,我們開始,首先建立HelloWorld類,類里有兩個方法createIndex和search分別是創建索引庫和搜索,搜索出來的結果高亮顯示,具體實現為:
package com.lucene.helloworld; import java.util.logging.SimpleFormatter; import jeasy.analysis.MMAnalyzer; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter.MaxFieldLength; import org.apache.lucene.queryParser.MultiFieldQueryParser; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.Filter; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Formatter; import org.apache.lucene.search.highlight.Fragmenter; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.Scorer; import org.apache.lucene.search.highlight.SimpleFragmenter; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.junit.Test; import com.lucene.util.File2DocumentUtils; public class HelloWorld { String zhFilePath = "F:""java""workspaces""LuceneTest""luceneDatasource""世界,你好.txt"; String filePath = "F:""java""workspaces""LuceneTest""luceneDatasource""IndexWriter addDocument's a javadoc .txt"; String indexPath = "F:""java""workspaces""LuceneTest""luceneIndexs"; // Analyzer analyzer = new StandardAnalyzer(); Analyzer mmAnalyzer = new MMAnalyzer(); // 詞庫分析,極易分詞 /** * 創建索引 * * @throws Exception * */ @Test public void createIndex() throws Exception { IndexWriter indexWriter = new IndexWriter(indexPath, mmAnalyzer, true, MaxFieldLength.LIMITED); // Document doc = File2DocumentUtils.file2Document(filePath); Document zhDoc = File2DocumentUtils.file2Document(zhFilePath); // indexWriter.addDocument(doc); indexWriter.addDocument(zhDoc); indexWriter.close(); } /** * 從索引庫搜索 * * @throws Exception */ @Test public void search() throws Exception { // String queryString = "hello world"; String queryString = "世界,你好"; // 1、將搜索文件解析為Query對象 String[] fields = { "name", "content" }; QueryParser queryParser = new MultiFieldQueryParser(fields, mmAnalyzer); Query query = queryParser.parse(queryString); // 2、查詢 IndexSearcher indexSearcher = new IndexSearcher(indexPath); Filter filter = null; TopDocs topDocs = indexSearcher.search(query, filter, 10000); System.out.println("總共有【" + topDocs.totalHits + "】條結果匹配"); // start 準備高亮器 Formatter formatter = new SimpleHTMLFormatter("<font color=red>", "</font>"); Scorer scorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(formatter, scorer); Fragmenter fragmenter = new SimpleFragmenter(50); highlighter.setTextFragmenter(fragmenter); // end 結束高亮器 // 3、打印輸出結果 for (ScoreDoc scoreDoc : topDocs.scoreDocs) { int docSn = scoreDoc.doc; Document doc = indexSearcher.doc(docSn); // start 高亮 // 返回高亮后的結果,如果當前屬性值中沒有出現關鍵字,會返回 null String hc = highlighter.getBestFragment(mmAnalyzer, "content", doc.get("content")); if (hc == null) { String content = doc.get("content"); int endIndex = Math.min(50, content.length()); hc = content.substring(0, endIndex); } doc.getField("content").setValue(hc); // end 高亮 File2DocumentUtils.printDocumentInfo(doc); } } } |
該類需要有一個工具類支持,來將file轉換為Document,具體實現如下:
package com.lucene.util; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.InputStreamReader; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumberTools; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; publicclass File2DocumentUtils { publicstatic Document file2Document(String path) { File file = new File(path); Document doc = new Document(); doc.add(new Field("name", file.getName(), Store.YES, Index.ANALYZED)); doc.add(new Field("content", readFileContent(file), Store.YES, Index.ANALYZED)); doc.add(new Field("size", NumberTools.longToString(file.length()), Store.YES, Index.NOT_ANALYZED)); doc.add(new Field("path", file.getAbsolutePath(), Store.YES, Index.NOT_ANALYZED)); return doc; } // public static void document2File(Document doc ){ // // } publicstatic String readFileContent(File file) { try { BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file))); StringBuffer content = new StringBuffer(); for (String line = null; (line = reader.readLine()) != null;) { content.append(line).append(""n"); } return content.toString(); } catch (Exception e) { thrownew RuntimeException(e); } }
publicstaticvoid printDocumentInfo(Document doc) { // Field f = doc.getField("name"); // f.stringValue(); System.out.println("------------------------------"); System.out.println("name = " + doc.get("name")); System.out.println("content = " + doc.get("content")); System.out.println("size = " + NumberTools.stringToLong(doc.get("size"))); System.out.println("path = " + doc.get("path")); } } |
6、到此我們結束,看下成果,英文版的我就不寫了,想對來說比較容易,來看下中文版的結果
