數(shù)據(jù)加載中……
          lucence_對文件建立索引

          package org.apache.lucene.demo;

          /**
           * Licensed to the Apache Software Foundation (ASF) under one or more
           * contributor license agreements.  See the NOTICE file distributed with
           * this work for additional information regarding copyright ownership.
           * The ASF licenses this file to You under the Apache License, Version 2.0
           * (the "License"); you may not use this file except in compliance with
           * the License.  You may obtain a copy of the License at
           *
           *     http://www.apache.org/licenses/LICENSE-2.0
           *
           * Unless required by applicable law or agreed to in writing, software
           * distributed under the License is distributed on an "AS IS" BASIS,
           * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
           * See the License for the specific language governing permissions and
           * limitations under the License.
           */

          import org.apache.lucene.analysis.standard.StandardAnalyzer;
          import org.apache.lucene.index.IndexWriter;

          import java.io.File;
          import java.io.FileNotFoundException;
          import java.io.IOException;
          import java.util.Date;

          /** Index all text files under a directory. */
          public class IndexFiles {
           
            private IndexFiles() {}

            static final File INDEX_DIR = new File("index");//索引止錄。建在當(dāng)前目錄的/index下
           
            /** Index all text files under a directory. */
            public static void main(String[] args) {//args[0] 文件路徑.  main 方法:對args[0]指定的文件路徑下的所有文件建立索引。
            final File docDir = new File(args[0]);
            if (!docDir.exists() || !docDir.canRead()) {
             System.out .println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path");
             System.exit(1);
            }
             
              Date start = new Date();
              try {
             IndexWriter writer = new IndexWriter(INDEX_DIR, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
             //IndexWriter負責(zé)創(chuàng)建和維護索引
             //IndexWriter(String path, Analyzer a, boolean create, IndexWriter.MaxFieldLength mfl)
             //path:路徑 Analyzer:文本分析器  create:是否創(chuàng)建新索引  mfl 最大field數(shù)量
             System.out.println("Indexing to directory '" + INDEX_DIR + "'...");
             indexDocs(writer, docDir);
             System.out.println("Optimizing...");
             writer.optimize();//優(yōu)化索引
             writer.close();//關(guān)閉
             Date end = new Date();
             System.out.println(end.getTime() - start.getTime() + " total milliseconds");
            } catch (IOException e) {
             System.out.println(" caught a " + e.getClass()
               + "\n with message: " + e.getMessage());
            }
            }

            static void indexDocs(IndexWriter writer, File file) throws IOException {
            // do not try to index files that cannot be read
            if (file.canRead()) {
             if (file.isDirectory()) {
              String[] files = file.list();
              // an IO error could occur
              if (files != null) {
               for (int i = 0; i < files.length; i++) {
                indexDocs(writer, new File(file, files[i]));
               }
              }
             } else {
              System.out.println("adding " + file);
              try {
               writer.addDocument(FileDocument.Document(file));
              }
              // at least on windows, some temporary files raise this
              // exception with an "access denied" message
              // checking if the file can be read doesn't help
              catch (FileNotFoundException fnfe) {
               ;
              }
             }
            }
           }
          }



          package org.apache.lucene.demo;

          /**
           * Licensed to the Apache Software Foundation (ASF) under one or more
           * contributor license agreements.  See the NOTICE file distributed with
           * this work for additional information regarding copyright ownership.
           * The ASF licenses this file to You under the Apache License, Version 2.0
           * (the "License"); you may not use this file except in compliance with
           * the License.  You may obtain a copy of the License at
           *
           *     http://www.apache.org/licenses/LICENSE-2.0
           *
           * Unless required by applicable law or agreed to in writing, software
           * distributed under the License is distributed on an "AS IS" BASIS,
           * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
           * See the License for the specific language governing permissions and
           * limitations under the License.
           */

          import java.io.File;
          import java.io.FileReader;

          import org.apache.lucene.document.DateTools;
          import org.apache.lucene.document.Document;
          import org.apache.lucene.document.Field;

          /** A utility for making Lucene Documents from a File. */

          public class FileDocument {

            public static Document Document(File f)
                 throws java.io.FileNotFoundException {
           
              // make a new, empty document
              Document doc = new Document();
             
              doc.add(new Field("contents", new FileReader(f)));
              doc.add(new Field("path", f.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED));
              doc.add(new Field("modified", DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE),Field.Store.YES, Field.Index.ANALYZED));
              doc.add(new Field("name",f.getName(),Field.Store.YES,Field.Index.NOT_ANALYZED));
              //Field

              // return the document
              return doc;
            }

            private FileDocument() {}
          }
             


          posted on 2009-08-12 15:38 R99 閱讀(933) 評論(0)  編輯  收藏 所屬分類: Lucene


          只有注冊用戶登錄后才能發(fā)表評論。


          網(wǎng)站導(dǎo)航:
           
          主站蜘蛛池模板: 通许县| 四川省| 曲沃县| 龙海市| 东兴市| 崇仁县| 天津市| 蓬莱市| 花莲市| 即墨市| 留坝县| 磴口县| 武宁县| 丁青县| 城步| 赣榆县| 象州县| 鄢陵县| 富宁县| 尼玛县| 湖南省| 黄平县| 叙永县| 米泉市| 黄龙县| 青冈县| 河津市| 乌什县| 青海省| 淳化县| 孟州市| 沈阳市| 大姚县| 绥中县| 泌阳县| 昌平区| 崇义县| 麻城市| 平罗县| 江西省| 巴中市|