唯美古典
          Java入門,Struts,Hibernate,Spring,Groovy,Grails
          posts - 7,comments - 10,trackbacks - 0

          1、首先從lucene官網(wǎng)上下載lucene2.4.0(也可以點(diǎn)擊直接下載,我這里用的這個(gè)版本,現(xiàn)在最高版本是3.0

          2、從極易軟件下載極易分詞器jar包(為漢語(yǔ)的世界,你好提供支持)

          3、Eclipse中新建Java工程,并將所需jar包(lucene-core-2.4.0.jar,lucene-analyzer-2.4.0.jar,lucene-highlighter-2.4.0.jar,je-analysis-1.5.3.jar)加入工程

          4、差不多該開(kāi)始了,在開(kāi)始之前還需要建立兩個(gè)文件夾,我這里是luceneDataSource放文件(用來(lái)建立索引庫(kù)),luceneIndexs(存放索引庫(kù)的位置),最終的結(jié)構(gòu)是:

          5、好,我們開(kāi)始,首先建立HelloWorld類,類里有兩個(gè)方法createIndexsearch分別是創(chuàng)建索引庫(kù)和搜索,搜索出來(lái)的結(jié)果高亮顯示,具體實(shí)現(xiàn)為:

          package com.lucene.helloworld;

          import java.util.logging.SimpleFormatter;

          import jeasy.analysis.MMAnalyzer;

          import org.apache.lucene.analysis.Analyzer;

          import org.apache.lucene.analysis.standard.StandardAnalyzer;

          import org.apache.lucene.document.Document;

          import org.apache.lucene.index.IndexWriter;

          import org.apache.lucene.index.IndexWriter.MaxFieldLength;

          import org.apache.lucene.queryParser.MultiFieldQueryParser;

          import org.apache.lucene.queryParser.QueryParser;

          import org.apache.lucene.search.Filter;

          import org.apache.lucene.search.IndexSearcher;

          import org.apache.lucene.search.Query;

          import org.apache.lucene.search.ScoreDoc;

          import org.apache.lucene.search.TopDocs;

          import org.apache.lucene.search.highlight.Formatter;

          import org.apache.lucene.search.highlight.Fragmenter;

          import org.apache.lucene.search.highlight.Highlighter;

          import org.apache.lucene.search.highlight.QueryScorer;

          import org.apache.lucene.search.highlight.Scorer;

          import org.apache.lucene.search.highlight.SimpleFragmenter;

          import org.apache.lucene.search.highlight.SimpleHTMLFormatter;

          import org.junit.Test;

          import com.lucene.util.File2DocumentUtils;

          public class HelloWorld {

              String zhFilePath = "F:""java""workspaces""LuceneTest""luceneDatasource""世界,你好.txt";

              String filePath = "F:""java""workspaces""LuceneTest""luceneDatasource""IndexWriter addDocument's a javadoc .txt";

              String indexPath = "F:""java""workspaces""LuceneTest""luceneIndexs";

          // Analyzer analyzer = new StandardAnalyzer();

              Analyzer mmAnalyzer = new MMAnalyzer(); // 詞庫(kù)分析,極易分詞

              /**

               * 創(chuàng)建索引

               *

               * @throws Exception

               *

               */

              @Test

              public void createIndex() throws Exception {

                 IndexWriter indexWriter = new IndexWriter(indexPath, mmAnalyzer, true, MaxFieldLength.LIMITED);

          //     Document doc = File2DocumentUtils.file2Document(filePath);

                 Document zhDoc = File2DocumentUtils.file2Document(zhFilePath);

          //     indexWriter.addDocument(doc);

                 indexWriter.addDocument(zhDoc);

                 indexWriter.close();

              }

              /**

               * 從索引庫(kù)搜索

               *

               * @throws Exception

               */

              @Test

              public void search() throws Exception {

                 // String queryString = "hello world";

                 String queryString = "世界,你好";

                 // 1、將搜索文件解析為Query對(duì)象

                 String[] fields = { "name", "content" };

                 QueryParser queryParser = new MultiFieldQueryParser(fields, mmAnalyzer);

                 Query query = queryParser.parse(queryString);

                 // 2、查詢

                 IndexSearcher indexSearcher = new IndexSearcher(indexPath);

                 Filter filter = null;

                 TopDocs topDocs = indexSearcher.search(query, filter, 10000);

                 System.out.println("總共有【" + topDocs.totalHits + "】條結(jié)果匹配");

                 // start 準(zhǔn)備高亮器

                 Formatter formatter = new SimpleHTMLFormatter("<font color=red>", "</font>");

                 Scorer scorer = new QueryScorer(query);

                 Highlighter highlighter = new Highlighter(formatter, scorer);

                 Fragmenter fragmenter = new SimpleFragmenter(50);

                 highlighter.setTextFragmenter(fragmenter);

                 // end 結(jié)束高亮器

                 // 3、打印輸出結(jié)果

                 for (ScoreDoc scoreDoc : topDocs.scoreDocs) {

                     int docSn = scoreDoc.doc;

                     Document doc = indexSearcher.doc(docSn);

                     // start 高亮

                     // 返回高亮后的結(jié)果,如果當(dāng)前屬性值中沒(méi)有出現(xiàn)關(guān)鍵字,會(huì)返回 null

                     String hc = highlighter.getBestFragment(mmAnalyzer, "content", doc.get("content"));

                     if (hc == null) {

                        String content = doc.get("content");

                        int endIndex = Math.min(50, content.length());

                        hc = content.substring(0, endIndex);

                     }

                     doc.getField("content").setValue(hc);

                     // end 高亮

                     File2DocumentUtils.printDocumentInfo(doc);

                 }

              }

          }

          該類需要有一個(gè)工具類支持,來(lái)將file轉(zhuǎn)換為Document,具體實(shí)現(xiàn)如下:

          package com.lucene.util;

          import java.io.BufferedReader;

          import java.io.File;

          import java.io.FileInputStream;

          import java.io.InputStreamReader;

          import org.apache.lucene.document.Document;

          import org.apache.lucene.document.Field;

          import org.apache.lucene.document.NumberTools;

          import org.apache.lucene.document.Field.Index;

          import org.apache.lucene.document.Field.Store;

          publicclass File2DocumentUtils {

              publicstatic Document file2Document(String path) {

                 File file = new File(path);

                 Document doc = new Document();

                 doc.add(new Field("name", file.getName(), Store.YES, Index.ANALYZED));

                 doc.add(new Field("content", readFileContent(file), Store.YES, Index.ANALYZED));

                 doc.add(new Field("size", NumberTools.longToString(file.length()), Store.YES, Index.NOT_ANALYZED));

                 doc.add(new Field("path", file.getAbsolutePath(), Store.YES, Index.NOT_ANALYZED));

                 return doc;

              }

              // public static void document2File(Document doc ){

              //    

              // }

              publicstatic String readFileContent(File file) {

                 try {

                     BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file)));

                     StringBuffer content = new StringBuffer();

                     for (String line = null; (line = reader.readLine()) != null;) {

                        content.append(line).append(""n");

                     }

                     return content.toString();

                 } catch (Exception e) {

                     thrownew RuntimeException(e);

                 }

              }

             

              publicstaticvoid printDocumentInfo(Document doc) {

                 // Field f = doc.getField("name");

                 // f.stringValue();

                 System.out.println("------------------------------");

                 System.out.println("name     = " + doc.get("name"));

                 System.out.println("content = " + doc.get("content"));

                 System.out.println("size     = " + NumberTools.stringToLong(doc.get("size")));

                 System.out.println("path     = " + doc.get("path"));

              }

          }

          6、到此我們結(jié)束,看下成果,英文版的我就不寫了,想對(duì)來(lái)說(shuō)比較容易,來(lái)看下中文版的結(jié)果




          唯美古典的工作室
          posted on 2009-12-03 15:27 唯美古典 閱讀(2894) 評(píng)論(0)  編輯  收藏 所屬分類: Java入門lucene

          只有注冊(cè)用戶登錄后才能發(fā)表評(píng)論。


          網(wǎng)站導(dǎo)航:
           
          主站蜘蛛池模板: 苏尼特右旗| 自治县| 九江市| 景谷| 城口县| 永胜县| 元阳县| 永福县| 连州市| 平原县| 凉山| 达孜县| 桑植县| 开原市| 连州市| 南投县| 太和县| 泽州县| 富顺县| 桃园市| 巩义市| 保靖县| 遵化市| 鸡东县| 璧山县| 静宁县| 榆树市| 明光市| 浮梁县| 洞头县| 密山市| 眉山市| 贵阳市| 台江县| 梁河县| 开封市| 阜新市| 汽车| 聂拉木县| 海口市| 方城县|