|
Posted on 2010-10-07 15:53 penngo 閱讀(3328) 評(píng)論(4) 編輯 收藏 所屬分類: Java
有些網(wǎng)站的搜索功能都是直接使用like %關(guān)鍵詞%方式對(duì)數(shù)據(jù)庫進(jìn)行關(guān)鍵詞查找,不過這種方式速度比較慢,而且影響數(shù)據(jù)庫服務(wù)器性能。
其實(shí)我們可以先把數(shù)據(jù)從數(shù)據(jù)庫查詢出來,利用lucene建立索引。以后每次查找都從索引中查找,可以提高查詢速度和減輕服務(wù)器負(fù)擔(dān)。
本篇用到的技術(shù):lucene3.0.2,IKAnalyzer3.2.5
search.properties主要是配置搜索的信息,內(nèi)容:
sql=select iId,title,content,credate from archeive //指定查找sql,需要建立索引的數(shù)據(jù)
update.field=iId
update.value=
search.condition=title,content //搜索時(shí)的查找字段
index.path=D:/project/Java/lucene/WebContent/WEB-INF/classes/Index //索引的保存地址
period=10000 //更新索引的時(shí)間間隔
com.search.util.SearchConfig主要是讀取search.properties的信息。
 public class SearchConfig {
private Properties searchPro;
private String searchFile = "search.properties";
private String SQL = "sql";
private String CONDITION = "search.condition";
private String INDEX = "index.path";
 public SearchConfig() {
initSearch();
}
 public void initSearch() {
searchPro = PropertiesUtil.getProperties(searchFile);
}
 public String getSql() {
return searchPro.getProperty(SQL, "");
}
 public String getCondition() {
return searchPro.getProperty(CONDITION, "");
}
 public File getIndexPath() {
String path = searchPro.getProperty(INDEX, "");
File file = new File(path);
 if (!file.exists()) {
file.mkdir();
}
return file;
}
 public long getPeriod() {
String period = searchPro.getProperty("period", "0");
return Integer.valueOf(period);
}
 public String getUpdateField() {
return searchPro.getProperty("update.field", "");
}
 public String getUpdateValue() {
return searchPro.getProperty("update.value", "");
}
 public void save() {
PropertiesUtil.saveProperties(searchPro, searchFile);
}
}
com.search.util.LuceneUtil代碼介紹,主要是生成索引和搜索。
 public class LuceneUtil {
private File indexpath = null;
private String sql = null;
private String condition = null;
private String updateField = null;
private String updateValue = null;
private SearchConfig sc = null;
 public LuceneUtil() {
sc = new SearchConfig();
indexpath = sc.getIndexPath();
sql = sc.getSql();
condition = sc.getCondition();
updateField = sc.getUpdateField();
updateValue = sc.getUpdateValue();
 if(!updateValue.equals("")) {
sql = sql + " where " + updateField + " > " + updateValue;
}
}

 public void createIndex() {
System.out.println("==========正在生成數(shù)據(jù)庫索引。");
//把數(shù)據(jù)庫中的數(shù)據(jù)查詢出來,
ResultSet rs = SQLHelper.getResultSet(sql);
 try {
//打開索引文件
FSDirectory directory = FSDirectory.open(indexpath);
Analyzer analyzer = new IKAnalyzer();
IndexWriter writer = new IndexWriter(FSDirectory.open(indexpath),
analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
 while (rs.next()) {
Document doc = new Document();
doc.add(new Field("id", String.valueOf(rs.getInt(1)),
Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field("title", rs.getString(2), Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("content", rs.getString(3), Field.Store.YES,
Field.Index.ANALYZED));
writer.addDocument(doc);
}
writer.close();
directory.close();
 } catch (Exception e) {
e.printStackTrace();
}
}

 public List<Document> search(String keyword) {
List<Document> list = new ArrayList<Document>();
 try {
FSDirectory directory = FSDirectory.open(indexpath);
IndexReader reader = IndexReader.open(directory, true);
IndexSearcher isearcher = new IndexSearcher(reader);
isearcher.setSimilarity(new IKSimilarity());
 if(keyword == null || keyword.equals("")) {
return list;
}
Query query = IKQueryParser.parseMultiField(condition.split(","), keyword);

// 搜索相似度最高的10條記錄
TopDocs topDocs = isearcher.search(query, 10);

// 輸出結(jié)果
ScoreDoc[] scoreDocs = topDocs.scoreDocs;

 for (int i = 0; i < topDocs.totalHits; i++) {
Document targetDoc = isearcher.doc(scoreDocs[i].doc);
list.add(targetDoc);
}
isearcher.close();
directory.close();
 } catch (Exception e) {
e.printStackTrace();
}
return list;
}
}
com.search.listener.Indexlistener啟動(dòng)索引更新程序
 public class Indexlistener implements ServletContextListener {
 public void contextInitialized(javax.servlet.ServletContextEvent arg0) {
new IndexTask();
}

 public void contextDestroyed(javax.servlet.ServletContextEvent arg0) {
}
}
com.search.listener.IndexTask定時(shí)更新索引
 public class IndexTask {
 public IndexTask() {
Timer timer = new Timer();
SearchConfig sc = new SearchConfig();
timer.schedule(new Task(), new Date(), sc.getPeriod());
}
 static class Task extends TimerTask {
 public void run() {
LuceneUtil lu = new LuceneUtil();
lu.createIndex();
}
}
}
com.search.util.RedHighlighter關(guān)鍵詞高亮顯示
 public class RedHighlighter {

 public static String getBestFragment(String keyword, String field, String word) {
SearchConfig sc = new SearchConfig();
String condition = sc.getCondition();
 try {
Query query = IKQueryParser.parseMultiField(condition.split(","), keyword);
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(
"<font color='red'>", "</font>");
Highlighter highlighter = new Highlighter(simpleHTMLFormatter,
new QueryScorer(query));
highlighter.setTextFragmenter(new SimpleFragmenter(100));
String c = highlighter.getBestFragment(new IKAnalyzer(),
field, word);
return c;
}
 catch(Exception e) {
e.printStackTrace();
}
return "";
}
}
index.jsp搜索頁面
 <% @ page language="java" contentType="text/html; charset=GBK"
pageEncoding="GBK"%>
 <% @page import="com.search.util.LuceneUtil" %>
 <% @page import="java.util.*" %>
 <% @page import="org.apache.lucene.document.Document" %>
 <% @page import="com.search.util.RedHighlighter" %>
 <% @page import="java.net.URLEncoder"%><html>
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"

"http://www.w3.org/TR/html4/loose.dtd">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=GBK">
<title>搜索</title>
<link rel="stylesheet" href="./style/style.css" type="text/css">
</head>
 <%
//request.setCharacterEncoding("GBK");
String w = request.getParameter("w");
int size = 0;
long time = 0;
List<Document> list = null;
if(w != null && !w.equals("")){
w = new String(w.getBytes("ISO8859-1"), "GBK");
}
else{
w = "";
}
LuceneUtil lu = new LuceneUtil();
Date start = new Date();
list = lu.search(w);
Date end = new Date();
size = list.size();
time = end.getTime() - start.getTime();
%>
 <script type="text/javascript">
 function submit() {
}
</script>
<body>
<div class="seachInput" align="center">
<form method="get" action="index.jsp"><br>
<input type="text" class="txtSeach" id="w" name="w" value="<%=w %>"
><input type="submit"
class="btnSearch" onclick="submit" value="找一下"> <br>
</form>
</div>
<div id="searchInfo"><span style="float: left; margin-left: 15px;"></span>找到相關(guān)內(nèi)容<%=size%>篇,

用時(shí)<%=time%>毫秒
</div>
<div id="main">
<div id="searchResult">
<div class="forflow">
 <%
if(list != null && list.size() > 0){
for(Document doc:list){
String title = RedHighlighter.getBestFragment(w, "title", doc.get("title"));
String content = RedHighlighter.getBestFragment(w, "content", doc.get

("content"));
%>
<div class="searchItem">
<a href="#" class="searchItemTitle" target="_blank"><%=title %></a>
<div class="searchCon">
<%=content %>
</div>
</div>
 <%
}
}
%>
</div>
</div>
</div>
</body>
</html>
運(yùn)行效果:
附件: 完整代碼
評(píng)論
# re: 利用lucene給網(wǎng)站、系統(tǒng)增加搜索功能[未登錄] 回復(fù) 更多評(píng)論
2010-10-07 20:40 by
如何做得更強(qiáng)大,更專業(yè)呀
# re: 利用lucene給網(wǎng)站、系統(tǒng)增加搜索功能 回復(fù) 更多評(píng)論
2010-10-07 21:09 by
不錯(cuò)啊~ 不過要想搜索更準(zhǔn)確點(diǎn),就不能這么簡單了貌似.
# re: 利用lucene給網(wǎng)站、系統(tǒng)增加搜索功能 回復(fù) 更多評(píng)論
2010-10-07 22:08 by
@os 搜索不準(zhǔn)的話,可以研究下分詞,我用的中文分詞是IKAnalyzer。
# re: 利用lucene給網(wǎng)站、系統(tǒng)增加搜索功能 回復(fù) 更多評(píng)論
2010-10-12 15:58 by
沒有數(shù)據(jù)庫腳本。
|