posts - 8, comments - 0, trackbacks - 0, articles - 0

          置頂隨筆

          這是我做畢業設計時畫的,感覺還可以就放過來共享,有什么不足的地方,請多指點。

          看Lucene代碼也算是中享受,根據下圖可以先看看關鍵類的代碼。
          Lucene2.3.1發布不久,源代碼下載地址:http://apache.mirror.phpchina.com/lucene/java/


           

           

           org.apache.Lucene.search/

           搜索入口

           org.apache.Lucene.index/

           索引入口

           org.apache.Lucene.analysis/

           語言分析器

           org.apache.Lucene.queryParser/

          查詢分析器

           org.apache.Lucene.document/

           存儲結構

           org.apache.Lucene.store/ 

           底層IO/存儲結構

           org.apache.Lucene.util/

           一些公用的數據結構


           

           

          posted @ 2008-04-06 17:02 HanLab 閱讀(417) | 評論 (0)編輯 收藏

          2008年6月1日

           

          package indexer;
          //package ch2.lucenedemo.process;

          import java.io.File;

          import java.io.IOException;
          import java.util.ArrayList;
          import java.util.logging.Level;
          import java.util.logging.Logger;
          import jeasy.analysis.MMAnalyzer;

          import org.apache.lucene.document.Document;
          import org.apache.lucene.document.Field;
          import org.apache.lucene.index.IndexWriter;
          import org.apache.lucene.store.Directory;
          import org.apache.lucene.store.FSDirectory;
          import parameters.Param;
          import pretreat.FileControl;

          public class IndexOnFS implements IIndexTool{
              
          // 成員變量存儲創建的索引文件存放的位置
              private String INDEX_STORE_PATH = Param.INDEX_STORE_PATH;
                  
          //建立索引的目標文件
                  private String INDEX_WANTED_PATH = "e:\\";
                  
          //目錄數組總數
                  private int NumOfDir = 0;
                  
          //存放根目錄下的所有子目錄
                  private ArrayList<String> DirList = new ArrayList<String>();
                  
          //地址映射
                  private Directory dir = null;
                  
                  
          private IndexWriter writer;
                  
                  
          public IndexOnFS(String path){
                          
          try {
                              dir 
          = FSDirectory.getDirectory(INDEX_STORE_PATH);
                          } 
          catch (IOException ex) {
                              Logger.getLogger(IndexOnFS.
          class.getName()).log(Level.SEVERE, null, ex);
                          }
                          INDEX_WANTED_PATH 
          = path;
                          makeSegments();
                          searchDirectorys(path);
                  }

                  
          //建立索引之前遍歷所有目錄并存放,這是為了迎合IndexWriter的同步機制
                  public void searchDirectorys(String rootDir){
                  
                          File rootfile 
          = new File(rootDir);
                          File[] files 
          = rootfile.listFiles();
                          
          if(files!=null)
                          
          for (int i = 0; i < files.length; i++){
                              
          if(files[i].isDirectory()){
                                 DirList.add(files[i].getPath());
                                 searchDirectorys(files[i].getPath()); 
                              }
                          }
                  }
                  
          public void printAllDirectorys(){
                          
          for(int i = 0;i<DirList.size();i++)
                                 System.out.println(DirList.get(i));
                  }
                  
          public void createIndexs() {
                          createIndex(INDEX_WANTED_PATH);
                          
          for(int k = 0;k<DirList.size();k++)
                              createIndex(DirList.get(k));
                  }
                  
          public Document preIndexWrite(File file){
                          
          // 創建一個新的Document
                      Document doc = new Document();
                      
          // 文件名對應的Field
                      Field field = new Field("filename", file.getName(), 
                                           Field.Store.YES, Field.Index.TOKENIZED); 
                  doc.add(field);
                  
          // 文件內容對應的Filed
                  field = new Field("content", FileControl.fileToString(file),//轉到控制器
                             Field.Store.NO, Field.Index.TOKENIZED);
                  doc.add(field);
                          
          //文件路徑對應的Filed
                          field = new Field("filepath", file.getPath(), 
                                     Field.Store.YES, Field.Index.TOKENIZED); 
                  doc.add(field);
                          
                          
          return doc;
                  }
              
          /*單目錄創建索引*/
              
          public void createIndex(String inputDir) {
                  
          try {
                      
          /*MMAnalyzer作為分詞工具創建一個IndexWriter*/
                          writer 
          = new IndexWriter(dir,new MMAnalyzer(), false); /*第一次創建索引時為true*/
                      File filesDir 
          = new File(inputDir);
                      
          /*取得所有需要建立索引的文件數組*/
                      File[] files 
          = filesDir.listFiles();
                      
          /*遍歷數組*/
                                  
          if(files!=null)
                      
          for (int i = 0; i < files.length; i++) { 
                                        
          /*判斷是否為文件*/
                                        
          if(files[i].isFile()){ 
                              
          /*把Document加入IndexWriter*/
                              writer.addDocument(preIndexWrite(files[i]));  
                                                  System.out.println( files[i].getPath());
                                  }
                                              } 
                                  writer.optimize(); 
          /*索引優化*/

                  } 
          catch (Exception e) { e.printStackTrace(); }
                          
                          
          finally
                              
          try{writer.close();
                              }
          catch(Exception ee){ ee.printStackTrace(); }
                          }

              }
                  
          //初始化空索引庫
                  public void makeSegments(){
                      
          if(new File(INDEX_STORE_PATH).list().length==0){
                          
          try {
                              IndexWriter iw 
          = new IndexWriter(dir, new MMAnalyzer(), true);
                              writer.addDocument(preIndexWrite(
          new File(Param.INITFILE_PATH)));
                          } 
          catch (Exception ex) { ex.printStackTrace(); } 
                          
                          
          finally
                                
          try{writer.close();
                                }
          catch(Exception ee){ ee.printStackTrace(); }
                            }
                      }
                  }
                  
          public ArrayList getDirs(){
                      
          return this.DirList;
                  }
                  
                  
          public void startIndex() {
                      makeSegments();
                      createIndexs();
                  }

              
          public static void main(String[] args) {
                  IndexOnFS processor 
          = new IndexOnFS("e:\\畢業論文");
                          
          //processor.searchDirectorys("e:\\1");
                          processor.startIndex();
              }


          }

          posted @ 2008-06-01 05:11 HanLab 閱讀(232) | 評論 (0)編輯 收藏

          2008年4月6日

          這是我做畢業設計時畫的,感覺還可以就放過來共享,有什么不足的地方,請多指點。

          看Lucene代碼也算是中享受,根據下圖可以先看看關鍵類的代碼。
          Lucene2.3.1發布不久,源代碼下載地址:http://apache.mirror.phpchina.com/lucene/java/


           

           

           org.apache.Lucene.search/

           搜索入口

           org.apache.Lucene.index/

           索引入口

           org.apache.Lucene.analysis/

           語言分析器

           org.apache.Lucene.queryParser/

          查詢分析器

           org.apache.Lucene.document/

           存儲結構

           org.apache.Lucene.store/ 

           底層IO/存儲結構

           org.apache.Lucene.util/

           一些公用的數據結構


           

           

          posted @ 2008-04-06 17:02 HanLab 閱讀(417) | 評論 (0)編輯 收藏

          2008年4月4日

             /*首先利用PL/SQL創建一個過程

          CREATE OR REPLACE PROCEDURE INS_FILES(P1 IN VARCHAR2,P2 IN VARCHAR2,P3 IN number)
          AS
          BEGIN
               INSERT INTO tb_files(fid,fname,fpath,indexed) VALUES (SEQ_fid.nextval,P1,P2,P3);
          END INS_FILES;*/


           

           public static void recursion(String path){
            
                  File file=new File(path);
                  File[] files=file.listFiles();
                  try{
                        for(int i=0;i<files.length;i++)
                          { 
                             if(files[i].isFile())
                                   {   
                                        conn.callablestatement.setString(1,files[i].getName().toString());
                                        conn.callablestatement.setString(2,files[i].getPath().toString());
                                        conn.callablestatement.setInt(3,0);
                                        conn.executeCall();
                                        count++;
                                    }
                             else if(files[i].isDirectory())
                                       recursion(files[i].getAbsolutePath());//對于目錄進一步檢索
                            
                           }
                       }catch(Exception e){ e.printStackTrace(); }
                }
              public void storeFilesToDB(String rootpath){
                 conn = new JDBCConnection();
                 conn.setCallableStatement(SQL.call_ins_files);
                 recursion(rootpath);
                 conn.close();
                 System.out.println("共有"+count+"個文件.");
              }

          posted @ 2008-04-04 06:36 HanLab 閱讀(895) | 評論 (0)編輯 收藏

          public static String Procedure_Ins_path(){
              String dir = "e:\\";
              IndexProcesser p;
              p = new IndexProcesser();
              p.searchDirectorys(dir,true);
              String s0 ="DROP SEQUENCE  SEQ_pid;" +
                         " CREATE SEQUENCE  SEQ_pid INCREMENT BY 1 START WITH 1 NOCYCLE; ";
              String s1 = "declare " +
                         "type path_varray is varray(2000) of varchar2(200); " +
                         "p_v path_varray:=path_varray (";
              String s3 =  "); begin " +
                                "for i in 1..2000 loop " +
                                   "if p_v(i) = 'null' then " +
                                       "p_v(i):='null'; " +
                                   "else " +
                                       "INSERT INTO tb_allpath (ID ,path) VALUES (SEQ_PID.NEXTVAL,p_v(i));"+
                                   "end if; " +
                                 "end loop;" +
                              "commit;" +
                              "end;";
              String s2 ="";
                     for(int i = 0;i<1999;i++)
                         s2 = s2+"'"+p.Directorys[i]+"',";
                     s2 = s2 +"'"+ p.Directorys[1999]+"'";
                     return s1+s2+s3;
          }

          posted @ 2008-04-04 06:30 HanLab 閱讀(908) | 評論 (0)編輯 收藏

          主站蜘蛛池模板: 安宁市| 应城市| 黎川县| 界首市| 晋中市| 偃师市| 乐东| 辽阳县| 弋阳县| 龙里县| 收藏| 琼中| 康马县| 城口县| 会同县| 汤阴县| 包头市| 屯门区| 武隆县| 武鸣县| 平顶山市| 明溪县| 阳曲县| 清镇市| 五河县| 高阳县| 梁河县| 宣城市| 禄丰县| 饶河县| 聂拉木县| 信宜市| 阿合奇县| 安义县| 腾冲县| 甘泉县| 达尔| 滦南县| 香港| 德令哈市| 浦城县|