隨筆-20  評論-3  文章-9  trackbacks-0

          package com.laozizhu.article.util;

          import java.io.IOException;
          import java.sql.Connection;
          import java.sql.ResultSet;
          import java.sql.SQLException;
          import java.sql.Statement;
          import java.util.ArrayList;
          import java.util.Date;
          import java.util.List;
          import javax.sql.DataSource;
          import net.paoding.analysis.analyzer.PaodingAnalyzer;
          import org.apache.lucene.analysis.Analyzer;
          import org.apache.lucene.document.Document;
          import org.apache.lucene.document.Field;
          import org.apache.lucene.index.IndexWriter;
          import org.apache.lucene.queryParser.MultiFieldQueryParser;
          import org.apache.lucene.search.BooleanClause;
          import org.apache.lucene.search.IndexSearcher;
          import org.apache.lucene.search.Query;
          import org.apache.lucene.search.ScoreDoc;
          import org.apache.lucene.search.TopDocCollector;

          /**
          * 基于庖丁解牛的Lucene 2.4的全文搜索代碼。
          *
          * @author 老紫竹研究室(laozizhu.com)
          */
          public class LucenePaoDing {
          private static final String indexPath = "d:/indexpaoding/www.laozizhu.com";

          /**
          ?? * @param args
          ?? * @throws Exception
          ?? */
          public static void main(String[] args) throws Exception {
          ??? rebuildAll();
          ??? String keyword = "Spring.jar";
          ??? LucenePaoDing l = new LucenePaoDing();
          ??? System.out.println("索引搜索\n------------------------------");
          ??? System.out.println(l.seacherIndex(keyword));
          }

          public static void rebuildAll() {
          ??? synchronized (indexPath) {
          ????? LucenePaoDing l = new LucenePaoDing();
          ????? DataSource ds = (DataSource) Factory.getBean("dataSource");
          ????? Connection con = null;
          ????? Statement stat = null;
          ????? ResultSet rs = null;
          ????? try {
          ??????? con = ds.getConnection();
          ??????? stat = con.createStatement();
          ??????? rs = stat.executeQuery("select id,subject,content from t_article");
          ??????? if (rs != null) {
          ????????? l.Index(rs);
          ??????? }
          ????? } catch (Exception ex) {
          ??????? ex.printStackTrace();
          ????? } finally {
          ??????? if (rs != null) {
          ????????? try {
          ??????????? rs.close();
          ????????? } catch (Exception ex) {}
          ??????? }
          ??????? if (stat != null) {
          ????????? try {
          ??????????? stat.close();
          ????????? } catch (Exception ex) {}
          ??????? }
          ??????? if (con != null) {
          ????????? try {
          ??????????? con.close();
          ????????? } catch (Exception ex) {}
          ??????? }
          ????? }
          ??? }
          }

          public synchronized Analyzer getAnalyzer() {
          ??? return new PaodingAnalyzer();
          }

          private synchronized void Index(ResultSet rs) {// 通過結(jié)果集就可以獲得數(shù)據(jù)源了
          ??? try {
          ????? IndexWriter writer = new IndexWriter(indexPath, getAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
          ????? writer.setMaxFieldLength(10000000);
          ????? Date start = new Date();
          ????? int index = 1;
          ????? while (rs.next()) {
          ??????? Document doc = new Document();// 一個文檔相當(dāng)與表的一條記錄
          ??????? doc.add(new Field("id", rs.getString("id"), Field.Store.YES, Field.Index.NOT_ANALYZED));// 字段id放的是數(shù)據(jù)庫表中的id,lucene的一條記錄的一個字段下的數(shù)據(jù)可以放多個值,這點與數(shù)據(jù)庫表不同
          ??????? doc.add(new Field("subject", rs.getString("subject"), Field.Store.YES, Field.Index.ANALYZED));
          ??????? doc.add(new Field("content", rs.getString("content"), Field.Store.YES, Field.Index.ANALYZED));
          ??????? writer.addDocument(doc);
          ??????? if (index++ == 1000) {
          ????????? writer.commit();
          ????????? index = 0;
          ??????? }
          ????? }
          ????? writer.commit();
          ????? writer.optimize();// 優(yōu)化
          ????? writer.close();// 一定要關(guān)閉,否則不能把內(nèi)存中的數(shù)據(jù)寫到文件
          ????? Date end = new Date();
          ????? System.out.println("重建索引成功!!!!" + "用時" + (end.getTime() - start.getTime()) + "毫秒");
          ??? } catch (IOException e) {
          ????? System.out.println(e);
          ??? } catch (SQLException e) {
          ????? System.out.println(e);
          ??? }
          }

          public void IndexSingle(long id, String subject, String content) {// 通過結(jié)果集就可以獲得數(shù)據(jù)源了
          ??? synchronized (indexPath) {
          ????? try {
          ??????? IndexWriter writer = new IndexWriter(indexPath, getAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED);
          ??????? writer.setMaxFieldLength(10000000);
          ??????? Date start = new Date();
          ??????? Document doc = new Document();// 一個文檔相當(dāng)與表的一條記錄
          ??????? doc.add(new Field("id", Long.toString(id), Field.Store.YES, Field.Index.NOT_ANALYZED));// 字段id放的是數(shù)據(jù)庫表中的id,lucene的一條記錄的一個字段下的數(shù)據(jù)可以放多個值,這點與數(shù)據(jù)庫表不同
          ??????? doc.add(new Field("subject", subject, Field.Store.YES, Field.Index.ANALYZED));
          ??????? doc.add(new Field("content", content, Field.Store.YES, Field.Index.ANALYZED));
          ??????? writer.addDocument(doc);
          ??????? // writer.optimize();// 優(yōu)化
          ??????? writer.close();// 一定要關(guān)閉,否則不能把內(nèi)存中的數(shù)據(jù)寫到文件
          ??????? Date end = new Date();
          ??????? System.out.println("索引建立成功!!!!" + "用時" + (end.getTime() - start.getTime()) + "毫秒");
          ????? } catch (IOException e) {
          ??????? System.out.println(e);
          ????? }
          ??? }
          }

          /**
          ?? * 最主要的搜索方法。
          ?? *
          ?? * @param queryString
          ?? * @return
          ?? */
          public List<Long> seacherIndex(String queryString) {// 根據(jù)關(guān)鍵字搜索
          ??? try {
          ????? IndexSearcher isearcher = new IndexSearcher(indexPath);
          ????? /* 下面這個表示要同時搜索這兩個域,而且只要一個域里面有滿足我們搜索的內(nèi)容就行 */
          ????? BooleanClause.Occur[] clauses = { BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD };
          ????? TopDocCollector collector = new TopDocCollector(10);
          ????? Query query = MultiFieldQueryParser.parse(queryString, new String[] { "subject", "content" }, clauses, getAnalyzer());
          ????? isearcher.search(query, collector);
          ????? ScoreDoc[] hits = collector.topDocs().scoreDocs;
          ????? List<Long> rtn = new ArrayList<Long>();
          ????? Long id;
          ????? int docId;
          ????? for (int i = 0; i < hits.length; i++) {
          ??????? docId = hits[i].doc;
          ??????? Document doc = isearcher.doc(docId);
          ??????? id = Long.parseLong(doc.get("id").trim());
          ??????? if (!rtn.contains(id)) {
          ????????? rtn.add(id);
          ??????? }
          ????? }
          ????? isearcher.close();
          ????? return rtn;
          ??? } catch (Exception e) {
          ????? e.printStackTrace();
          ????? return null;
          ??? }
          }
          }

          posted on 2009-03-09 17:24 藍(lán)山 閱讀(489) 評論(0)  編輯  收藏

          只有注冊用戶登錄后才能發(fā)表評論。


          網(wǎng)站導(dǎo)航:
           

          窩窩影視:www.wowoys.com是一個程序員開的電影網(wǎng)站!


          常用鏈接

          留言簿

          隨筆檔案(20)

          文章分類(8)

          文章檔案(9)

          搜索

          •  

          積分與排名

          • 積分 - 20393
          • 排名 - 1716

          最新評論

          閱讀排行榜

          評論排行榜

          主站蜘蛛池模板: 高要市| 历史| 阿图什市| 扶沟县| 农安县| 贵阳市| 扬中市| 曲靖市| 仪征市| 虹口区| 临清市| 鲁山县| 泾源县| 盐亭县| 崇阳县| 临武县| 湖南省| 平舆县| 锦州市| 苏尼特右旗| 沙雅县| 马关县| 吴堡县| 莱阳市| 奉化市| 拉孜县| 都江堰市| 遂宁市| 四子王旗| 德昌县| 奉化市| 海南省| 长治县| 青阳县| 丰顺县| 祥云县| 玛纳斯县| 朔州市| 太和县| 霍邱县| 麻城市|