posts - 431,  comments - 344,  trackbacks - 0
          公告
           Don't Repeat Yourself
          座右銘:you can lose your money, you can spent all of it, and if you work hard you get it all back. But if you waste your time, you're never gonna get it back.
          公告本博客在此聲明部分文章為轉(zhuǎn)摘,只做資料收集使用。


          微信: szhourui
          QQ:109450684
          Email
          lsi.zhourui@gmail.com
          <2007年1月>
          31123456
          78910111213
          14151617181920
          21222324252627
          28293031123
          45678910

          留言簿(15)

          隨筆分類(1019)

          文章分類(3)

          文章檔案(21)

          收藏夾

          Link

          好友博客

          最新隨筆

          搜索

          •  

          積分與排名

          • 積分 - 860072
          • 排名 - 44

          最新評論

          閱讀排行榜

          HTML 解析器
          package com.rain.util;

          import Java.io.FileInputStream;
          import Java.io.FileNotFoundException;
          import Java.io.IOException;
          import Java.io.InputStream;
          import Java.io.InputStreamReader;
          import Java.io.Reader;
          import Java.io.UnsupportedEncodingException;

          import org.apache.lucene.demo.html.HTMLParser;

          public class HTMLDocParser {

           private String htmlPath;
           private HTMLParser htmlParser;
           
           public HTMLDocParser(String htmlPath){
            this.htmlPath=htmlPath;
            initHtmlParser();
           }
           public void initHtmlParser(){
            InputStream inputStream=null;
            try{
             inputStream=new FileInputStream(htmlPath);
            }catch(FileNotFoundException e){
             e.printStackTrace();
            }
            if(null!=inputStream){
             try{
              htmlParser=new HTMLParser(new InputStreamReader(inputStream,"utf-8"));
             }catch(UnsupportedEncodingException e){
              e.printStackTrace();
             }
            }
           }
           public String getTitle(){
            if(null!=htmlParser){
             try{
              return htmlParser.getTitle();
             }catch(IOException e){
              e.printStackTrace();
             }catch(InterruptedException e){
              e.printStackTrace();
             }
            }
            return "";
           }
           public Reader getContent(){
            if(null!=htmlParser){
             try{
              return htmlParser.getReader();
             }catch(IOException e){
              e.printStackTrace();
             }
            }
            return null;
           }
           public String getPath(){
            return this.htmlPath;
           }
          }


          描述搜索結(jié)果的結(jié)構(gòu)實體Bean
          package com.rain.search;

          public class SearchResultBean {
              private String htmlPath;
             
              private String htmlTitle;

           public String getHtmlPath() {
            return htmlPath;
           }

           public void setHtmlPath(String htmlPath) {
            this.htmlPath = htmlPath;
           }

           public String getHtmlTitle() {
            return htmlTitle;
           }

           public void setHtmlTitle(String htmlTitle) {
            this.htmlTitle = htmlTitle;
           }
          }


          索引子系統(tǒng)的實現(xiàn)

          package com.rain.index;

          import Java.io.File;
          import Java.io.IOException;
          import Java.io.Reader;

          import org.apache.lucene.analysis.Analyzer;
          import org.apache.lucene.analysis.standard.StandardAnalyzer;
          import org.apache.lucene.document.Document;
          import org.apache.lucene.index.IndexWriter;
          import org.apache.lucene.store.Directory;
          import org.apache.lucene.store.FSDirectory;
          import org.apache.lucene.document.Field;

          import com.rain.util.HTMLDocParser;

          public class IndexManager {
           
           //the directory that stores HTML files
           private final String dataDir="E:\\dataDir";
           
           //the directory that is used to store a Lucene index
           private final String indexDir="E:\\indexDir";
           
           public boolean creatIndex()throws IOException{
            if(true==inIndexExist()){
             return true;
            }
            File dir=new File(dataDir);
            if(!dir.exists()){
             return false;
            }
            File[] htmls=dir.listFiles();
            Directory fsDirectory=FSDirectory.getDirectory(indexDir,true);
            Analyzer analyzer=new StandardAnalyzer();
            IndexWriter indexWriter=new IndexWriter(fsDirectory,analyzer,true);
            for(int i=0;i<htmls.length;i++){
             String htmlPath=htmls[i].getAbsolutePath();
             if(htmlPath.endsWith(".html")||htmlPath.endsWith("htm")){
              addDocument(htmlPath,indexWriter);
             }
            }
            indexWriter.optimize();
            indexWriter.close();
            return true;
           }
           
           public void addDocument(String htmlPath,IndexWriter indexWriter){
            HTMLDocParser htmlParser=new HTMLDocParser(htmlPath);
            String path=htmlParser.getPath();
            String title=htmlParser.getTitle();
            Reader content=htmlParser.getContent();
            
            Document document=new Document();
            document.add(new Field("path",path,Field.Store.YES,Field.Index.NO));
            document.add(new Field("title",title,Field.Store.YES,Field.Index.TOKENIZED));
               document.add(new Field("content",content));
               try{
                indexWriter.addDocument(document);
               }catch(IOException e){
                e.printStackTrace();
               }
           }
           public String getDataDir(){
            return this.dataDir;
           }
           
           public String getIndexDir(){
            return this.indexDir;
           }
           
           public boolean inIndexExist(){
            File directory=new File(indexDir);
            if(0<directory.listFiles().length){
             return true;
            }else{
             return false;
            }
           }
          }


          搜索功能的實現(xiàn)
          package com.rain.search;

          import Java.io.IOException;
          import Java.util.ArrayList;
          import Java.util.List;

          import org.apache.lucene.analysis.Analyzer;
          import org.apache.lucene.analysis.standard.StandardAnalyzer;
          import org.apache.lucene.queryParser.ParseException;
          import org.apache.lucene.queryParser.QueryParser;
          import org.apache.lucene.search.Hits;
          import org.apache.lucene.search.IndexSearcher;
          import org.apache.lucene.search.Query;

          import com.rain.index.IndexManager;

          public class SearchManager {
           private String searchWord;
           private IndexManager indexManager;
           private Analyzer analyzer;
           
           public SearchManager(String searchWord){
            this.searchWord=searchWord;
            this.indexManager=new IndexManager();
            this.analyzer=new StandardAnalyzer();
           }
           
           /**
               * do search
               */
           public List search(){
            List searchResult=new ArrayList();
            if(false==indexManager.inIndexExist()){
             try{
              if(false==indexManager.creatIndex()){
               return searchResult;
              }
             }catch(IOException e){
              e.printStackTrace();
              return searchResult;
             }
            }
            IndexSearcher indexSearcher=null;
            try{
             indexSearcher=new IndexSearcher(indexManager.getIndexDir());
            }catch(IOException e){
             e.printStackTrace();
            }
            QueryParser queryParser=new QueryParser("content",analyzer);
            Query query=null;
            try{
             query=queryParser.parse(searchWord);
            }catch(ParseException e){
             e.printStackTrace();
            }
            if(null!=query&&null!=indexSearcher){
             try{
              Hits hits=indexSearcher.search(query);
              for(int i=0;i<hits.length();i++){
               SearchResultBean resultBean=new SearchResultBean();
               resultBean.setHtmlPath(hits.doc(i).get("path"));
               resultBean.setHtmlTitle(hits.doc(i).get("title"));
               searchResult.add(resultBean);
              }
             }catch(IOException e){
              e.printStackTrace();
             }
            }
             return searchResult;
           }

          }


          請求管理器的實現(xiàn)

          package com.rain.servlet;

          import Java.io.IOException;
          import Java.util.List;

          import javax.servlet.RequestDispatcher;
          import javax.servlet.ServletException;
          import javax.servlet.http.HttpServlet;
          import javax.servlet.http.HttpServletRequest;
          import javax.servlet.http.HttpServletResponse;

          import com.rain.search.SearchManager;

          /**
           * @author zhourui
           * 2007-1-28
           */
          public class SearchController extends HttpServlet {
           private static final long serialVersionUID=1L;
           
           /* (non-Javadoc)
            * @see javax.servlet.http.HttpServlet#doPost(javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse)
            */
           @Override
           protected void doPost(HttpServletRequest arg0, HttpServletResponse arg1) throws ServletException, IOException {
            // TODO Auto-generated method stub
            String searchWord=arg0.getParameter("searchWord");
            SearchManager searchManager=new SearchManager(searchWord);
            List searchResult=null;
            searchResult=searchManager.search();
            RequestDispatcher dispatcher=arg0.getRequestDispatcher("search.jsp");
            arg0.setAttribute("searchResult",searchResult);
                  dispatcher.forward(arg0, arg1);
           }
           
          }




          向Web服務(wù)器提交搜索請求
          <form action="SearchController" method="post">
                <table>
                  <tr>
                    <td colspan="3">
                      SearchWord:<input type="text" name="searchWord" id="searchWord" size="40">
                      <input id="doSearch" type="submit" value="search">
                    </td>
                  </tr>
                </table>
              </form>
          顯示搜索結(jié)果
           <table class="result">
                <%
                  List searchResult=(List)request.getAttribute("searchResult");
                  int resultCount=0;
                  if(null!=searchResult){
                   resultCount=searchResult.size();
                  }
                  for(int i=0;i<resultCount;i++){
                   SearchResultBean resultBean=(SearchResultBean)searchResult.get(i);
                   String title=resultBean.getHtmlTitle();
                   String path=resultBean.getHtmlPath();
                   %>
                   <tr>
                     <td class="title"><h3><a href="<%=path%>"><%=title%></a></h3></td>
                   </tr>
                   <%
                  }
                %>
              </table>
          posted on 2007-01-29 09:57 周銳 閱讀(839) 評論(0)  編輯  收藏 所屬分類: Lucene

          只有注冊用戶登錄后才能發(fā)表評論。


          網(wǎng)站導(dǎo)航:
           
          主站蜘蛛池模板: 东台市| 四会市| 饶阳县| 蓬安县| 澄江县| 三门峡市| 江达县| 阳新县| 双鸭山市| 昌江| 靖西县| 濉溪县| 辽宁省| 奉节县| 龙门县| 楚雄市| 萨迦县| 昔阳县| 利川市| 白河县| 松桃| 宿松县| 开封县| 横峰县| 三穗县| 张家界市| 小金县| 达州市| 繁昌县| 洪雅县| 当涂县| 原平市| 馆陶县| 离岛区| 梁河县| 达孜县| 黑河市| 潼南县| 娱乐| 达州市| 伊吾县|