posts - 8, comments - 0, trackbacks - 0, articles - 0

          2008年6月1日

           

          package indexer;
          //package ch2.lucenedemo.process;

          import java.io.File;

          import java.io.IOException;
          import java.util.ArrayList;
          import java.util.logging.Level;
          import java.util.logging.Logger;
          import jeasy.analysis.MMAnalyzer;

          import org.apache.lucene.document.Document;
          import org.apache.lucene.document.Field;
          import org.apache.lucene.index.IndexWriter;
          import org.apache.lucene.store.Directory;
          import org.apache.lucene.store.FSDirectory;
          import parameters.Param;
          import pretreat.FileControl;

          public class IndexOnFS implements IIndexTool{
              
          // 成員變量存儲創(chuàng)建的索引文件存放的位置
              private String INDEX_STORE_PATH = Param.INDEX_STORE_PATH;
                  
          //建立索引的目標(biāo)文件
                  private String INDEX_WANTED_PATH = "e:\\";
                  
          //目錄數(shù)組總數(shù)
                  private int NumOfDir = 0;
                  
          //存放根目錄下的所有子目錄
                  private ArrayList<String> DirList = new ArrayList<String>();
                  
          //地址映射
                  private Directory dir = null;
                  
                  
          private IndexWriter writer;
                  
                  
          public IndexOnFS(String path){
                          
          try {
                              dir 
          = FSDirectory.getDirectory(INDEX_STORE_PATH);
                          } 
          catch (IOException ex) {
                              Logger.getLogger(IndexOnFS.
          class.getName()).log(Level.SEVERE, null, ex);
                          }
                          INDEX_WANTED_PATH 
          = path;
                          makeSegments();
                          searchDirectorys(path);
                  }

                  
          //建立索引之前遍歷所有目錄并存放,這是為了迎合IndexWriter的同步機(jī)制
                  public void searchDirectorys(String rootDir){
                  
                          File rootfile 
          = new File(rootDir);
                          File[] files 
          = rootfile.listFiles();
                          
          if(files!=null)
                          
          for (int i = 0; i < files.length; i++){
                              
          if(files[i].isDirectory()){
                                 DirList.add(files[i].getPath());
                                 searchDirectorys(files[i].getPath()); 
                              }
                          }
                  }
                  
          public void printAllDirectorys(){
                          
          for(int i = 0;i<DirList.size();i++)
                                 System.out.println(DirList.get(i));
                  }
                  
          public void createIndexs() {
                          createIndex(INDEX_WANTED_PATH);
                          
          for(int k = 0;k<DirList.size();k++)
                              createIndex(DirList.get(k));
                  }
                  
          public Document preIndexWrite(File file){
                          
          // 創(chuàng)建一個(gè)新的Document
                      Document doc = new Document();
                      
          // 文件名對應(yīng)的Field
                      Field field = new Field("filename", file.getName(), 
                                           Field.Store.YES, Field.Index.TOKENIZED); 
                  doc.add(field);
                  
          // 文件內(nèi)容對應(yīng)的Filed
                  field = new Field("content", FileControl.fileToString(file),//轉(zhuǎn)到控制器
                             Field.Store.NO, Field.Index.TOKENIZED);
                  doc.add(field);
                          
          //文件路徑對應(yīng)的Filed
                          field = new Field("filepath", file.getPath(), 
                                     Field.Store.YES, Field.Index.TOKENIZED); 
                  doc.add(field);
                          
                          
          return doc;
                  }
              
          /*單目錄創(chuàng)建索引*/
              
          public void createIndex(String inputDir) {
                  
          try {
                      
          /*MMAnalyzer作為分詞工具創(chuàng)建一個(gè)IndexWriter*/
                          writer 
          = new IndexWriter(dir,new MMAnalyzer(), false); /*第一次創(chuàng)建索引時(shí)為true*/
                      File filesDir 
          = new File(inputDir);
                      
          /*取得所有需要建立索引的文件數(shù)組*/
                      File[] files 
          = filesDir.listFiles();
                      
          /*遍歷數(shù)組*/
                                  
          if(files!=null)
                      
          for (int i = 0; i < files.length; i++) { 
                                        
          /*判斷是否為文件*/
                                        
          if(files[i].isFile()){ 
                              
          /*把Document加入IndexWriter*/
                              writer.addDocument(preIndexWrite(files[i]));  
                                                  System.out.println( files[i].getPath());
                                  }
                                              } 
                                  writer.optimize(); 
          /*索引優(yōu)化*/

                  } 
          catch (Exception e) { e.printStackTrace(); }
                          
                          
          finally
                              
          try{writer.close();
                              }
          catch(Exception ee){ ee.printStackTrace(); }
                          }

              }
                  
          //初始化空索引庫
                  public void makeSegments(){
                      
          if(new File(INDEX_STORE_PATH).list().length==0){
                          
          try {
                              IndexWriter iw 
          = new IndexWriter(dir, new MMAnalyzer(), true);
                              writer.addDocument(preIndexWrite(
          new File(Param.INITFILE_PATH)));
                          } 
          catch (Exception ex) { ex.printStackTrace(); } 
                          
                          
          finally
                                
          try{writer.close();
                                }
          catch(Exception ee){ ee.printStackTrace(); }
                            }
                      }
                  }
                  
          public ArrayList getDirs(){
                      
          return this.DirList;
                  }
                  
                  
          public void startIndex() {
                      makeSegments();
                      createIndexs();
                  }

              
          public static void main(String[] args) {
                  IndexOnFS processor 
          = new IndexOnFS("e:\\畢業(yè)論文");
                          
          //processor.searchDirectorys("e:\\1");
                          processor.startIndex();
              }


          }

          posted @ 2008-06-01 05:11 HanLab 閱讀(231) | 評論 (0)編輯 收藏

          主站蜘蛛池模板: 屏东县| 武威市| 原阳县| 集贤县| 太和县| 绵阳市| 集安市| 临清市| 桐梓县| 西昌市| 新巴尔虎右旗| 玉溪市| 广安市| 乐亭县| 治多县| 开鲁县| 塔城市| 高雄县| 宝鸡市| 大方县| 吉安县| 平遥县| 剑川县| 祁门县| 长乐市| 泽普县| 湛江市| 棋牌| 隆德县| 呼玛县| 固始县| 互助| 昌邑市| 额济纳旗| 吐鲁番市| 阿尔山市| 平顶山市| 平南县| 晋中市| 墨江| 扬中市|