日韩欧美国产高清,一本久久a久久精品亚洲,激情小说亚洲图片

利用lucene給網站、系統增加搜索功能

Posted on 2010-10-07 15:53 penngo 閱讀(3337) 評論(4) 編輯收藏所屬分類: Java

有些網站的搜索功能都是直接使用like %關鍵詞%方式對數據庫進行關鍵詞查找，不過這種方式速度比較慢，而且影響數據庫服務器性能。
其實我們可以先把數據從數據庫查詢出來，利用lucene建立索引。以后每次查找都從索引中查找，可以提高查詢速度和減輕服務器負擔。
本篇用到的技術：lucene3.0.2,IKAnalyzer3.2.5
search.properties主要是配置搜索的信息，內容：

sql=select iId,title,content,credate from archeive //指定查找sql，需要建立索引的數據

update.field=iId

update.value=

search.condition=title,content //搜索時的查找字段

index.path=D:/project/Java/lucene/WebContent/WEB-INF/classes/Index //索引的保存地址

period=10000 //更新索引的時間間隔

com.search.util.SearchConfig主要是讀取search.properties的信息。

public class SearchConfig {

private Properties searchPro;

private String searchFile = "search.properties";

private String SQL = "sql";

private String CONDITION = "search.condition";

private String INDEX = "index.path";

public SearchConfig(){

initSearch();

}

public void initSearch(){

searchPro = PropertiesUtil.getProperties(searchFile);

}

public String getSql(){

return searchPro.getProperty(SQL, "");

}

public String getCondition(){

return searchPro.getProperty(CONDITION, "");

}

public File getIndexPath(){

String path = searchPro.getProperty(INDEX, "");

File file = new File(path);

if (!file.exists()) {

file.mkdir();

}

return file;

}

public long getPeriod(){

String period = searchPro.getProperty("period", "0");

return Integer.valueOf(period);

}

public String getUpdateField(){

return searchPro.getProperty("update.field", "");

}

public String getUpdateValue(){

return searchPro.getProperty("update.value", "");

}

public void save(){

PropertiesUtil.saveProperties(searchPro, searchFile);

}

com.search.util.LuceneUtil代碼介紹，主要是生成索引和搜索。

public class LuceneUtil {

private File indexpath = null;

private String sql = null;

private String condition = null;

private String updateField = null;

private String updateValue = null;

private SearchConfig sc = null;

public LuceneUtil() {

sc = new SearchConfig();

indexpath = sc.getIndexPath();

sql = sc.getSql();

condition = sc.getCondition();

updateField = sc.getUpdateField();

updateValue = sc.getUpdateValue();

if(!updateValue.equals("")){

sql = sql + " where " + updateField + " > " + updateValue;

}

public void createIndex() {

System.out.println("==========正在生成數據庫索引。");

//把數據庫中的數據查詢出來，

ResultSet rs = SQLHelper.getResultSet(sql);

try {

//打開索引文件

FSDirectory directory = FSDirectory.open(indexpath);

Analyzer analyzer = new IKAnalyzer();

IndexWriter writer = new IndexWriter(FSDirectory.open(indexpath),

analyzer, true, IndexWriter.MaxFieldLength.LIMITED);

while (rs.next()) {

Document doc = new Document();

doc.add(new Field("id", String.valueOf(rs.getInt(1)),

Field.Store.YES, Field.Index.ANALYZED));

doc.add(new Field("title", rs.getString(2), Field.Store.YES,

Field.Index.ANALYZED));

doc.add(new Field("content", rs.getString(3), Field.Store.YES,

Field.Index.ANALYZED));

writer.addDocument(doc);

}

writer.close();

directory.close();

} catch (Exception e) {

e.printStackTrace();

}

public List<Document> search(String keyword) {

List<Document> list = new ArrayList<Document>();

try {

FSDirectory directory = FSDirectory.open(indexpath);

IndexReader reader = IndexReader.open(directory, true);

IndexSearcher isearcher = new IndexSearcher(reader);

isearcher.setSimilarity(new IKSimilarity());

if(keyword == null || keyword.equals("")){

return list;

}

Query query = IKQueryParser.parseMultiField(condition.split(","), keyword);

// 搜索相似度最高的10條記錄

TopDocs topDocs = isearcher.search(query, 10);

// 輸出結果

ScoreDoc[] scoreDocs = topDocs.scoreDocs;

for (int i = 0; i < topDocs.totalHits; i++) {

Document targetDoc = isearcher.doc(scoreDocs[i].doc);

list.add(targetDoc);

}

isearcher.close();

directory.close();

} catch (Exception e) {

e.printStackTrace();

}

return list;

}

com.search.listener.Indexlistener啟動索引更新程序

public class Indexlistener implements ServletContextListener {

public void contextInitialized(javax.servlet.ServletContextEvent arg0) {

new IndexTask();

}

public void contextDestroyed(javax.servlet.ServletContextEvent arg0) {

}

com.search.listener.IndexTask定時更新索引

public class IndexTask {

public IndexTask(){

Timer timer = new Timer();

SearchConfig sc = new SearchConfig();

timer.schedule(new Task(), new Date(), sc.getPeriod());

}

static class Task extends TimerTask{

public void run(){

LuceneUtil lu = new LuceneUtil();

lu.createIndex();

}

com.search.util.RedHighlighter關鍵詞高亮顯示

public class RedHighlighter {

public static String getBestFragment(String keyword, String field, String word){

SearchConfig sc = new SearchConfig();

String condition = sc.getCondition();

try{

Query query = IKQueryParser.parseMultiField(condition.split(","), keyword);

SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(

"<font color='red'>", "</font>");

Highlighter highlighter = new Highlighter(simpleHTMLFormatter,

new QueryScorer(query));

highlighter.setTextFragmenter(new SimpleFragmenter(100));

String c = highlighter.getBestFragment(new IKAnalyzer(),

field, word);

return c;

}

catch(Exception e){

e.printStackTrace();

}

return "";

}

index.jsp搜索頁面

<%@ page language="java" contentType="text/html; charset=GBK"

pageEncoding="GBK"%>

<%@page import="com.search.util.LuceneUtil" %>

<%@page import="java.util.*" %>

<%@page import="org.apache.lucene.document.Document" %>

<%@page import="com.search.util.RedHighlighter" %>

<%@page import="java.net.URLEncoder"%><html>

<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"

"http://www.w3.org/TR/html4/loose.dtd">

<head>

</head>

//request.setCharacterEncoding("GBK");

String w = request.getParameter("w");

int size = 0;

long time = 0;

List<Document> list = null;

if(w != null && !w.equals("")){

w = new String(w.getBytes("ISO8859-1"), "GBK");

}

else{

w = "";

}

LuceneUtil lu = new LuceneUtil();

Date start = new Date();

list = lu.search(w);

Date end = new Date();

size = list.size();

time = end.getTime() - start.getTime();

function submit(){

}

</script>

<body>

<input type="text" class="txtSeach" id="w" name="w" value="<%=w %>"

><input type="submit"

class="btnSearch" onclick="submit" value="找一下">    <br>

</form>

</div>

<div id="searchInfo"><span style="float: left; margin-left: 15px;"></span>找到相關內容<%=size%>篇，

用時<%=time%>毫秒

</div>

if(list != null && list.size() > 0){

for(Document doc:list){

String title = RedHighlighter.getBestFragment(w, "title", doc.get("title"));

String content = RedHighlighter.getBestFragment(w, "content", doc.get

("content"));

<%=content %>

</div>

}

</div>

</body>

</html>

運行效果:

附件:完整代碼

# re: 利用lucene給網站、系統增加搜索功能[未登錄] 回復 更多評論

2010-10-07 20:40 by semovy

如何做得更強大,更專業呀

# re: 利用lucene給網站、系統增加搜索功能回復 更多評論

2010-10-07 21:09 by os

不錯啊~ 不過要想搜索更準確點,就不能這么簡單了貌似.

# re: 利用lucene給網站、系統增加搜索功能回復 更多評論

2010-10-07 22:08 by pengo

@os
搜索不準的話，可以研究下分詞，我用的中文分詞是IKAnalyzer。

# re: 利用lucene給網站、系統增加搜索功能 回復 更多評論

2010-10-12 15:58 by xpf7622

沒有數據庫腳本。

新用戶注冊刷新評論列表


只有注冊用戶登錄后才能發表評論。




網站導航: 博客園 IT新聞 Chat2DB C++博客博問管理
相關文章: java獲取剪貼板中的鏈接 java攝像頭截圖 swing程序在任務欄閃動效果實現數據庫反向生成實體類 apache與tomcat負載集群的3種方法頁面緩存的小測試瀏覽器客戶端js中調用java代碼客戶端調用服務器端方法的簡單例子 Mouse Hook java實現文件監控

penngo

利用lucene給網站、系統增加搜索功能

評論

# re: 利用lucene給網站、系統增加搜索功能[未登錄] 回復 更多評論

# re: 利用lucene給網站、系統增加搜索功能回復 更多評論

# re: 利用lucene給網站、系統增加搜索功能回復 更多評論

# re: 利用lucene給網站、系統增加搜索功能 回復 更多評論

日歷

公告

常用鏈接

留言簿(14)

隨筆分類

隨筆檔案

我參與的項目

本人的其它博客

搜索

最新評論

閱讀排行榜

評論排行榜

penngo

利用lucene給網站、系統增加搜索功能

評論

# re: 利用lucene給網站、系統增加搜索功能[未登錄] 回復 更多評論

# re: 利用lucene給網站、系統增加搜索功能 回復 更多評論

# re: 利用lucene給網站、系統增加搜索功能 回復 更多評論

# re: 利用lucene給網站、系統增加搜索功能 回復 更多評論

日歷

公告

常用鏈接

留言簿(14)

隨筆分類

隨筆檔案

我參與的項目

本人的其它博客

搜索

最新評論

閱讀排行榜

評論排行榜

# re: 利用lucene給網站、系統增加搜索功能[未登錄] 回復更多評論

# re: 利用lucene給網站、系統增加搜索功能回復更多評論

# re: 利用lucene給網站、系統增加搜索功能回復更多評論

# re: 利用lucene給網站、系統增加搜索功能回復更多評論