隨筆-20  評(píng)論-3  文章-9  trackbacks-0

          package com.laozizhu.article.util;

          import java.io.IOException;
          import java.sql.Connection;
          import java.sql.ResultSet;
          import java.sql.SQLException;
          import java.sql.Statement;
          import java.util.ArrayList;
          import java.util.Date;
          import java.util.List;
          import javax.sql.DataSource;
          import net.paoding.analysis.analyzer.PaodingAnalyzer;
          import org.apache.lucene.analysis.Analyzer;
          import org.apache.lucene.document.Document;
          import org.apache.lucene.document.Field;
          import org.apache.lucene.index.IndexWriter;
          import org.apache.lucene.queryParser.MultiFieldQueryParser;
          import org.apache.lucene.search.BooleanClause;
          import org.apache.lucene.search.IndexSearcher;
          import org.apache.lucene.search.Query;
          import org.apache.lucene.search.ScoreDoc;
          import org.apache.lucene.search.TopDocCollector;

          /**
          * 基于庖丁解牛的Lucene 2.4的全文搜索代碼。
          *
          * @author 老紫竹研究室(laozizhu.com)
          */
          public class LucenePaoDing {
          private static final String indexPath = "d:/indexpaoding/www.laozizhu.com";

          /**
          ?? * @param args
          ?? * @throws Exception
          ?? */
          public static void main(String[] args) throws Exception {
          ??? rebuildAll();
          ??? String keyword = "Spring.jar";
          ??? LucenePaoDing l = new LucenePaoDing();
          ??? System.out.println("索引搜索\n------------------------------");
          ??? System.out.println(l.seacherIndex(keyword));
          }

          public static void rebuildAll() {
          ??? synchronized (indexPath) {
          ????? LucenePaoDing l = new LucenePaoDing();
          ????? DataSource ds = (DataSource) Factory.getBean("dataSource");
          ????? Connection con = null;
          ????? Statement stat = null;
          ????? ResultSet rs = null;
          ????? try {
          ??????? con = ds.getConnection();
          ??????? stat = con.createStatement();
          ??????? rs = stat.executeQuery("select id,subject,content from t_article");
          ??????? if (rs != null) {
          ????????? l.Index(rs);
          ??????? }
          ????? } catch (Exception ex) {
          ??????? ex.printStackTrace();
          ????? } finally {
          ??????? if (rs != null) {
          ????????? try {
          ??????????? rs.close();
          ????????? } catch (Exception ex) {}
          ??????? }
          ??????? if (stat != null) {
          ????????? try {
          ??????????? stat.close();
          ????????? } catch (Exception ex) {}
          ??????? }
          ??????? if (con != null) {
          ????????? try {
          ??????????? con.close();
          ????????? } catch (Exception ex) {}
          ??????? }
          ????? }
          ??? }
          }

          public synchronized Analyzer getAnalyzer() {
          ??? return new PaodingAnalyzer();
          }

          private synchronized void Index(ResultSet rs) {// 通過(guò)結(jié)果集就可以獲得數(shù)據(jù)源了
          ??? try {
          ????? IndexWriter writer = new IndexWriter(indexPath, getAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
          ????? writer.setMaxFieldLength(10000000);
          ????? Date start = new Date();
          ????? int index = 1;
          ????? while (rs.next()) {
          ??????? Document doc = new Document();// 一個(gè)文檔相當(dāng)與表的一條記錄
          ??????? doc.add(new Field("id", rs.getString("id"), Field.Store.YES, Field.Index.NOT_ANALYZED));// 字段id放的是數(shù)據(jù)庫(kù)表中的id,lucene的一條記錄的一個(gè)字段下的數(shù)據(jù)可以放多個(gè)值,這點(diǎn)與數(shù)據(jù)庫(kù)表不同
          ??????? doc.add(new Field("subject", rs.getString("subject"), Field.Store.YES, Field.Index.ANALYZED));
          ??????? doc.add(new Field("content", rs.getString("content"), Field.Store.YES, Field.Index.ANALYZED));
          ??????? writer.addDocument(doc);
          ??????? if (index++ == 1000) {
          ????????? writer.commit();
          ????????? index = 0;
          ??????? }
          ????? }
          ????? writer.commit();
          ????? writer.optimize();// 優(yōu)化
          ????? writer.close();// 一定要關(guān)閉,否則不能把內(nèi)存中的數(shù)據(jù)寫(xiě)到文件
          ????? Date end = new Date();
          ????? System.out.println("重建索引成功!!!!" + "用時(shí)" + (end.getTime() - start.getTime()) + "毫秒");
          ??? } catch (IOException e) {
          ????? System.out.println(e);
          ??? } catch (SQLException e) {
          ????? System.out.println(e);
          ??? }
          }

          public void IndexSingle(long id, String subject, String content) {// 通過(guò)結(jié)果集就可以獲得數(shù)據(jù)源了
          ??? synchronized (indexPath) {
          ????? try {
          ??????? IndexWriter writer = new IndexWriter(indexPath, getAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED);
          ??????? writer.setMaxFieldLength(10000000);
          ??????? Date start = new Date();
          ??????? Document doc = new Document();// 一個(gè)文檔相當(dāng)與表的一條記錄
          ??????? doc.add(new Field("id", Long.toString(id), Field.Store.YES, Field.Index.NOT_ANALYZED));// 字段id放的是數(shù)據(jù)庫(kù)表中的id,lucene的一條記錄的一個(gè)字段下的數(shù)據(jù)可以放多個(gè)值,這點(diǎn)與數(shù)據(jù)庫(kù)表不同
          ??????? doc.add(new Field("subject", subject, Field.Store.YES, Field.Index.ANALYZED));
          ??????? doc.add(new Field("content", content, Field.Store.YES, Field.Index.ANALYZED));
          ??????? writer.addDocument(doc);
          ??????? // writer.optimize();// 優(yōu)化
          ??????? writer.close();// 一定要關(guān)閉,否則不能把內(nèi)存中的數(shù)據(jù)寫(xiě)到文件
          ??????? Date end = new Date();
          ??????? System.out.println("索引建立成功!!!!" + "用時(shí)" + (end.getTime() - start.getTime()) + "毫秒");
          ????? } catch (IOException e) {
          ??????? System.out.println(e);
          ????? }
          ??? }
          }

          /**
          ?? * 最主要的搜索方法。
          ?? *
          ?? * @param queryString
          ?? * @return
          ?? */
          public List<Long> seacherIndex(String queryString) {// 根據(jù)關(guān)鍵字搜索
          ??? try {
          ????? IndexSearcher isearcher = new IndexSearcher(indexPath);
          ????? /* 下面這個(gè)表示要同時(shí)搜索這兩個(gè)域,而且只要一個(gè)域里面有滿足我們搜索的內(nèi)容就行 */
          ????? BooleanClause.Occur[] clauses = { BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD };
          ????? TopDocCollector collector = new TopDocCollector(10);
          ????? Query query = MultiFieldQueryParser.parse(queryString, new String[] { "subject", "content" }, clauses, getAnalyzer());
          ????? isearcher.search(query, collector);
          ????? ScoreDoc[] hits = collector.topDocs().scoreDocs;
          ????? List<Long> rtn = new ArrayList<Long>();
          ????? Long id;
          ????? int docId;
          ????? for (int i = 0; i < hits.length; i++) {
          ??????? docId = hits[i].doc;
          ??????? Document doc = isearcher.doc(docId);
          ??????? id = Long.parseLong(doc.get("id").trim());
          ??????? if (!rtn.contains(id)) {
          ????????? rtn.add(id);
          ??????? }
          ????? }
          ????? isearcher.close();
          ????? return rtn;
          ??? } catch (Exception e) {
          ????? e.printStackTrace();
          ????? return null;
          ??? }
          }
          }

          posted on 2009-03-09 17:24 藍(lán)山 閱讀(489) 評(píng)論(0)  編輯  收藏

          只有注冊(cè)用戶登錄后才能發(fā)表評(píng)論。


          網(wǎng)站導(dǎo)航:
           
          主站蜘蛛池模板: 彭阳县| 萨迦县| 舒兰市| 平昌县| 册亨县| 宿松县| 大同县| 麻城市| 广南县| 临沭县| 泌阳县| 加查县| 都江堰市| 调兵山市| 紫云| 鄱阳县| 宜春市| 上饶市| 垣曲县| 灵石县| 宜君县| 江安县| 衢州市| 襄垣县| 波密县| 新安县| 闻喜县| 改则县| 南汇区| 库尔勒市| 饶平县| 油尖旺区| 元阳县| 盖州市| 崇左市| 临洮县| 松江区| 华容县| 名山县| 巫山县| 砚山县|