posts - 8, comments - 0, trackbacks - 0, articles - 0

          索引讀寫器_FSDiractory模式

          Posted on 2008-06-01 05:11 HanLab 閱讀(232) 評論(0)  編輯  收藏 所屬分類: Lucene

           

          package indexer;
          //package ch2.lucenedemo.process;

          import java.io.File;

          import java.io.IOException;
          import java.util.ArrayList;
          import java.util.logging.Level;
          import java.util.logging.Logger;
          import jeasy.analysis.MMAnalyzer;

          import org.apache.lucene.document.Document;
          import org.apache.lucene.document.Field;
          import org.apache.lucene.index.IndexWriter;
          import org.apache.lucene.store.Directory;
          import org.apache.lucene.store.FSDirectory;
          import parameters.Param;
          import pretreat.FileControl;

          public class IndexOnFS implements IIndexTool{
              
          // 成員變量存儲創建的索引文件存放的位置
              private String INDEX_STORE_PATH = Param.INDEX_STORE_PATH;
                  
          //建立索引的目標文件
                  private String INDEX_WANTED_PATH = "e:\\";
                  
          //目錄數組總數
                  private int NumOfDir = 0;
                  
          //存放根目錄下的所有子目錄
                  private ArrayList<String> DirList = new ArrayList<String>();
                  
          //地址映射
                  private Directory dir = null;
                  
                  
          private IndexWriter writer;
                  
                  
          public IndexOnFS(String path){
                          
          try {
                              dir 
          = FSDirectory.getDirectory(INDEX_STORE_PATH);
                          } 
          catch (IOException ex) {
                              Logger.getLogger(IndexOnFS.
          class.getName()).log(Level.SEVERE, null, ex);
                          }
                          INDEX_WANTED_PATH 
          = path;
                          makeSegments();
                          searchDirectorys(path);
                  }

                  
          //建立索引之前遍歷所有目錄并存放,這是為了迎合IndexWriter的同步機制
                  public void searchDirectorys(String rootDir){
                  
                          File rootfile 
          = new File(rootDir);
                          File[] files 
          = rootfile.listFiles();
                          
          if(files!=null)
                          
          for (int i = 0; i < files.length; i++){
                              
          if(files[i].isDirectory()){
                                 DirList.add(files[i].getPath());
                                 searchDirectorys(files[i].getPath()); 
                              }
                          }
                  }
                  
          public void printAllDirectorys(){
                          
          for(int i = 0;i<DirList.size();i++)
                                 System.out.println(DirList.get(i));
                  }
                  
          public void createIndexs() {
                          createIndex(INDEX_WANTED_PATH);
                          
          for(int k = 0;k<DirList.size();k++)
                              createIndex(DirList.get(k));
                  }
                  
          public Document preIndexWrite(File file){
                          
          // 創建一個新的Document
                      Document doc = new Document();
                      
          // 文件名對應的Field
                      Field field = new Field("filename", file.getName(), 
                                           Field.Store.YES, Field.Index.TOKENIZED); 
                  doc.add(field);
                  
          // 文件內容對應的Filed
                  field = new Field("content", FileControl.fileToString(file),//轉到控制器
                             Field.Store.NO, Field.Index.TOKENIZED);
                  doc.add(field);
                          
          //文件路徑對應的Filed
                          field = new Field("filepath", file.getPath(), 
                                     Field.Store.YES, Field.Index.TOKENIZED); 
                  doc.add(field);
                          
                          
          return doc;
                  }
              
          /*單目錄創建索引*/
              
          public void createIndex(String inputDir) {
                  
          try {
                      
          /*MMAnalyzer作為分詞工具創建一個IndexWriter*/
                          writer 
          = new IndexWriter(dir,new MMAnalyzer(), false); /*第一次創建索引時為true*/
                      File filesDir 
          = new File(inputDir);
                      
          /*取得所有需要建立索引的文件數組*/
                      File[] files 
          = filesDir.listFiles();
                      
          /*遍歷數組*/
                                  
          if(files!=null)
                      
          for (int i = 0; i < files.length; i++) { 
                                        
          /*判斷是否為文件*/
                                        
          if(files[i].isFile()){ 
                              
          /*把Document加入IndexWriter*/
                              writer.addDocument(preIndexWrite(files[i]));  
                                                  System.out.println( files[i].getPath());
                                  }
                                              } 
                                  writer.optimize(); 
          /*索引優化*/

                  } 
          catch (Exception e) { e.printStackTrace(); }
                          
                          
          finally
                              
          try{writer.close();
                              }
          catch(Exception ee){ ee.printStackTrace(); }
                          }

              }
                  
          //初始化空索引庫
                  public void makeSegments(){
                      
          if(new File(INDEX_STORE_PATH).list().length==0){
                          
          try {
                              IndexWriter iw 
          = new IndexWriter(dir, new MMAnalyzer(), true);
                              writer.addDocument(preIndexWrite(
          new File(Param.INITFILE_PATH)));
                          } 
          catch (Exception ex) { ex.printStackTrace(); } 
                          
                          
          finally
                                
          try{writer.close();
                                }
          catch(Exception ee){ ee.printStackTrace(); }
                            }
                      }
                  }
                  
          public ArrayList getDirs(){
                      
          return this.DirList;
                  }
                  
                  
          public void startIndex() {
                      makeSegments();
                      createIndexs();
                  }

              
          public static void main(String[] args) {
                  IndexOnFS processor 
          = new IndexOnFS("e:\\畢業論文");
                          
          //processor.searchDirectorys("e:\\1");
                          processor.startIndex();
              }


          }

          只有注冊用戶登錄后才能發表評論。


          網站導航:
           
          主站蜘蛛池模板: 沧州市| 彰化市| 陈巴尔虎旗| 新干县| 曲沃县| 云南省| 迁西县| 沂水县| 田阳县| 修文县| 威信县| 页游| 灵丘县| 辽阳县| 宣化县| 伽师县| 香格里拉县| 静安区| 大兴区| 深水埗区| 富川| 简阳市| 睢宁县| 贵德县| 商水县| 邮箱| 荆州市| 和龙市| 汨罗市| 巴马| 邳州市| 蓬安县| 威宁| 舟山市| 屏山县| 剑河县| 惠水县| 康马县| 都匀市| 江油市| 简阳市|