posts - 431,  comments - 344,  trackbacks - 0

          HTML 解析器
          package com.rain.util;

          import Java.io.FileInputStream;
          import Java.io.FileNotFoundException;
          import Java.io.IOException;
          import Java.io.InputStream;
          import Java.io.InputStreamReader;
          import Java.io.Reader;
          import Java.io.UnsupportedEncodingException;

          import org.apache.lucene.demo.html.HTMLParser;

          public class HTMLDocParser {

           private String htmlPath;
           private HTMLParser htmlParser;
           
           public HTMLDocParser(String htmlPath){
            this.htmlPath=htmlPath;
            initHtmlParser();
           }
           public void initHtmlParser(){
            InputStream inputStream=null;
            try{
             inputStream=new FileInputStream(htmlPath);
            }catch(FileNotFoundException e){
             e.printStackTrace();
            }
            if(null!=inputStream){
             try{
              htmlParser=new HTMLParser(new InputStreamReader(inputStream,"utf-8"));
             }catch(UnsupportedEncodingException e){
              e.printStackTrace();
             }
            }
           }
           public String getTitle(){
            if(null!=htmlParser){
             try{
              return htmlParser.getTitle();
             }catch(IOException e){
              e.printStackTrace();
             }catch(InterruptedException e){
              e.printStackTrace();
             }
            }
            return "";
           }
           public Reader getContent(){
            if(null!=htmlParser){
             try{
              return htmlParser.getReader();
             }catch(IOException e){
              e.printStackTrace();
             }
            }
            return null;
           }
           public String getPath(){
            return this.htmlPath;
           }
          }


          描述搜索結果的結構實體Bean
          package com.rain.search;

          public class SearchResultBean {
              private String htmlPath;
             
              private String htmlTitle;

           public String getHtmlPath() {
            return htmlPath;
           }

           public void setHtmlPath(String htmlPath) {
            this.htmlPath = htmlPath;
           }

           public String getHtmlTitle() {
            return htmlTitle;
           }

           public void setHtmlTitle(String htmlTitle) {
            this.htmlTitle = htmlTitle;
           }
          }


          索引子系統的實現

          package com.rain.index;

          import Java.io.File;
          import Java.io.IOException;
          import Java.io.Reader;

          import org.apache.lucene.analysis.Analyzer;
          import org.apache.lucene.analysis.standard.StandardAnalyzer;
          import org.apache.lucene.document.Document;
          import org.apache.lucene.index.IndexWriter;
          import org.apache.lucene.store.Directory;
          import org.apache.lucene.store.FSDirectory;
          import org.apache.lucene.document.Field;

          import com.rain.util.HTMLDocParser;

          public class IndexManager {
           
           //the directory that stores HTML files
           private final String dataDir="E:\\dataDir";
           
           //the directory that is used to store a Lucene index
           private final String indexDir="E:\\indexDir";
           
           public boolean creatIndex()throws IOException{
            if(true==inIndexExist()){
             return true;
            }
            File dir=new File(dataDir);
            if(!dir.exists()){
             return false;
            }
            File[] htmls=dir.listFiles();
            Directory fsDirectory=FSDirectory.getDirectory(indexDir,true);
            Analyzer analyzer=new StandardAnalyzer();
            IndexWriter indexWriter=new IndexWriter(fsDirectory,analyzer,true);
            for(int i=0;i<htmls.length;i++){
             String htmlPath=htmls[i].getAbsolutePath();
             if(htmlPath.endsWith(".html")||htmlPath.endsWith("htm")){
              addDocument(htmlPath,indexWriter);
             }
            }
            indexWriter.optimize();
            indexWriter.close();
            return true;
           }
           
           public void addDocument(String htmlPath,IndexWriter indexWriter){
            HTMLDocParser htmlParser=new HTMLDocParser(htmlPath);
            String path=htmlParser.getPath();
            String title=htmlParser.getTitle();
            Reader content=htmlParser.getContent();
            
            Document document=new Document();
            document.add(new Field("path",path,Field.Store.YES,Field.Index.NO));
            document.add(new Field("title",title,Field.Store.YES,Field.Index.TOKENIZED));
               document.add(new Field("content",content));
               try{
                indexWriter.addDocument(document);
               }catch(IOException e){
                e.printStackTrace();
               }
           }
           public String getDataDir(){
            return this.dataDir;
           }
           
           public String getIndexDir(){
            return this.indexDir;
           }
           
           public boolean inIndexExist(){
            File directory=new File(indexDir);
            if(0<directory.listFiles().length){
             return true;
            }else{
             return false;
            }
           }
          }


          搜索功能的實現
          package com.rain.search;

          import Java.io.IOException;
          import Java.util.ArrayList;
          import Java.util.List;

          import org.apache.lucene.analysis.Analyzer;
          import org.apache.lucene.analysis.standard.StandardAnalyzer;
          import org.apache.lucene.queryParser.ParseException;
          import org.apache.lucene.queryParser.QueryParser;
          import org.apache.lucene.search.Hits;
          import org.apache.lucene.search.IndexSearcher;
          import org.apache.lucene.search.Query;

          import com.rain.index.IndexManager;

          public class SearchManager {
           private String searchWord;
           private IndexManager indexManager;
           private Analyzer analyzer;
           
           public SearchManager(String searchWord){
            this.searchWord=searchWord;
            this.indexManager=new IndexManager();
            this.analyzer=new StandardAnalyzer();
           }
           
           /**
               * do search
               */
           public List search(){
            List searchResult=new ArrayList();
            if(false==indexManager.inIndexExist()){
             try{
              if(false==indexManager.creatIndex()){
               return searchResult;
              }
             }catch(IOException e){
              e.printStackTrace();
              return searchResult;
             }
            }
            IndexSearcher indexSearcher=null;
            try{
             indexSearcher=new IndexSearcher(indexManager.getIndexDir());
            }catch(IOException e){
             e.printStackTrace();
            }
            QueryParser queryParser=new QueryParser("content",analyzer);
            Query query=null;
            try{
             query=queryParser.parse(searchWord);
            }catch(ParseException e){
             e.printStackTrace();
            }
            if(null!=query&&null!=indexSearcher){
             try{
              Hits hits=indexSearcher.search(query);
              for(int i=0;i<hits.length();i++){
               SearchResultBean resultBean=new SearchResultBean();
               resultBean.setHtmlPath(hits.doc(i).get("path"));
               resultBean.setHtmlTitle(hits.doc(i).get("title"));
               searchResult.add(resultBean);
              }
             }catch(IOException e){
              e.printStackTrace();
             }
            }
             return searchResult;
           }

          }


          請求管理器的實現

          package com.rain.servlet;

          import Java.io.IOException;
          import Java.util.List;

          import javax.servlet.RequestDispatcher;
          import javax.servlet.ServletException;
          import javax.servlet.http.HttpServlet;
          import javax.servlet.http.HttpServletRequest;
          import javax.servlet.http.HttpServletResponse;

          import com.rain.search.SearchManager;

          /**
           * @author zhourui
           * 2007-1-28
           */
          public class SearchController extends HttpServlet {
           private static final long serialVersionUID=1L;
           
           /* (non-Javadoc)
            * @see javax.servlet.http.HttpServlet#doPost(javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse)
            */
           @Override
           protected void doPost(HttpServletRequest arg0, HttpServletResponse arg1) throws ServletException, IOException {
            // TODO Auto-generated method stub
            String searchWord=arg0.getParameter("searchWord");
            SearchManager searchManager=new SearchManager(searchWord);
            List searchResult=null;
            searchResult=searchManager.search();
            RequestDispatcher dispatcher=arg0.getRequestDispatcher("search.jsp");
            arg0.setAttribute("searchResult",searchResult);
                  dispatcher.forward(arg0, arg1);
           }
           
          }




          向Web服務器提交搜索請求
          <form action="SearchController" method="post">
                <table>
                  <tr>
                    <td colspan="3">
                      SearchWord:<input type="text" name="searchWord" id="searchWord" size="40">
                      <input id="doSearch" type="submit" value="search">
                    </td>
                  </tr>
                </table>
              </form>
          顯示搜索結果
           <table class="result">
                <%
                  List searchResult=(List)request.getAttribute("searchResult");
                  int resultCount=0;
                  if(null!=searchResult){
                   resultCount=searchResult.size();
                  }
                  for(int i=0;i<resultCount;i++){
                   SearchResultBean resultBean=(SearchResultBean)searchResult.get(i);
                   String title=resultBean.getHtmlTitle();
                   String path=resultBean.getHtmlPath();
                   %>
                   <tr>
                     <td class="title"><h3><a href="<%=path%>"><%=title%></a></h3></td>
                   </tr>
                   <%
                  }
                %>
              </table>
          posted on 2007-01-29 09:57 周銳 閱讀(840) 評論(0)  編輯  收藏 所屬分類: Lucene
          主站蜘蛛池模板: 思南县| 泽州县| 荃湾区| 江源县| 乳山市| 敦煌市| 永顺县| 龙南县| 平阴县| 武定县| 黄石市| 雅江县| 渭源县| 西青区| 玛曲县| 万州区| 洛浦县| 鄂伦春自治旗| 平乡县| 浦城县| 屏边| 绿春县| 宜春市| 清水河县| 江门市| 惠安县| 多伦县| 泾阳县| 安丘市| 鄂尔多斯市| 鄢陵县| 鄯善县| 吴川市| 赞皇县| 太康县| 兴文县| 楚雄市| 武功县| 镇沅| 鸡泽县| 莫力|