欧洲在线视频,成人欧美一区二区三区在线,精品免费视频一区二区

轩辕 — Wed, 19 Mar 2008 08:52:00 GMT

package searchfileexample;

/**
* ��d��Excel文�g
*/
import java.io.*;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import java.util.Date;
import org.apache.poi.hssf.usermodel.HSSFRow;

public class ExcelReader {
// 创徏文�g输入��?br /> private BufferedReader reader = null;

// 文�g�c�d��
private String filetype;

// 文�g二进制输入流
private InputStream is = null;

// 当前的Sheet
private int currSheet;

// 当前位置
private int currPosition;

// Sheet数量
private int numOfSheets;

// HSSFWorkbook
HSSFWorkbook workbook = null;
// 讄��Cell之间以空格分�?br /> private static String EXCEL_LINE_DELIMITER = " ";

// 讄��最大列�?br /> private static int MAX_EXCEL_COLUMNS = 64;

public int rows = 0;
public int getRows() {
return rows;
}

// 构造函数创��Z��个ExcelReader

public ExcelReader(String inputfile) throws IOException, Exception {
    // 判断参数是否为空或没有意�?br />     if (inputfile == null || inputfile.trim().equals("")) {
      throw new IOException("no input file specified");
    }
    // 取得文�g名的后缀名赋值给filetype
    this.filetype = inputfile.substring(inputfile.lastIndexOf(".") + 1);
    // 讄��开始行�?
    currPosition = 0;
    // 讄��当前位置�?
    currSheet = 0;
    // 创徏文�g输入��?br />     is = new FileInputStream(inputfile);
    // 判断文�g格式
    if (filetype.equalsIgnoreCase("txt")) {
      // 如果是txt则直接创建BufferedReader��d��
      reader = new BufferedReader(new InputStreamReader(is));
    }
    else if (filetype.equalsIgnoreCase("xls")) {
      // 如果是Excel文�g则创建HSSFWorkbook��d��
      workbook = new HSSFWorkbook(is);
      // 讄��Sheet�?br />       numOfSheets = workbook.getNumberOfSheets();
    }
    else {
      throw new Exception("File Type Not Supported");
    }
}

// 函数readLine��d��文�g的一�?br /> public String readLine() throws IOException {
    // 如果是txt文�g则通过reader��d��
    if (filetype.equalsIgnoreCase("txt")) {
      String str = reader.readLine();
      // �I��则略去，直接��d��下一�?br />       while (str.trim().equals("")) {
        str = reader.readLine();
      }
      return str;
    }
    // 如果是XLS文�g则通过POI提供的API��d��文�g
    else if (filetype.equalsIgnoreCase("xls")) {
      // �Ҏ(gu��)��currSheet��D��得当前的sheet
      HSSFSheet sheet = workbook.getSheetAt(currSheet);
      rows = sheet.getLastRowNum();
      // 判断当前行是否到但前Sheet的结��?br />       if (currPosition > sheet.getLastRowNum()) {
        // 当前行位�|�清�?br />         currPosition = 0;
        // 判断是否�q�有Sheet
        while (currSheet != numOfSheets - 1) {
          // 得到下一张Sheet
          sheet = workbook.getSheetAt(currSheet + 1);
          // 当前行数是否已经到达文�g末尾
          if (currPosition == sheet.getLastRowNum()) {
            // 当前Sheet指向下一张Sheet
            currSheet++;
            continue;
          }
          else {
            // 获取当前行数
            int row = currPosition;
            currPosition++;
            // ��d��当前行数�?br />             return getLine(sheet, row);
          }
        }
        return null;
      }
      // 获取当前行数
      int row = currPosition;
      currPosition++;
      // ��d��当前行数�?br />       return getLine(sheet, row);
    }
    return null;
}

// 函数getLine�q�回Sheet的一行数�?br /> private String getLine(HSSFSheet sheet, int row) {
    // �Ҏ(gu��)��行数取得Sheet的一�?br />     HSSFRow rowline = sheet.getRow(row);
    // 创徏字符创缓冲区
    StringBuffer buffer = new StringBuffer();
    // 获取当前行的列数
    int filledColumns = rowline.getLastCellNum();
    HSSFCell cell = null;
    // 循环遍历所有列
    for (int i = 0; i < filledColumns; i++) {
      // 取得当前Cell
      cell = rowline.getCell( (short) i);
      String cellvalue = null;
      if (cell != null) {
        // 判断当前Cell的Type
        switch (cell.getCellType()) {
          // 如果当前Cell的Type为NUMERIC
          case HSSFCell.CELL_TYPE_NUMERIC: {
            // 判断当前的cell是否为Date
            if (HSSFDateUtil.isCellDateFormatted(cell)) {
              // 如果是Date�c�d��则，取得该Cell的Date�?br />               Date date = cell.getDateCellValue();
              // 把Date转换成本地格式的字符�?br />               cellvalue = cell.getDateCellValue().toLocaleString();
            }
            // 如果是纯数字
            else {
              // 取得当前Cell的数�?br />               Integer num = new Integer( (int) cell
                                        .getNumericCellValue());
              cellvalue = String.valueOf(num);
            }
            break;
          }
          // 如果当前Cell的Type为STRIN
          case HSSFCell.CELL_TYPE_STRING:

            // 取得当前的Cell字符�?br />             cellvalue = cell.getStringCellValue().replaceAll("'", "''");
            break;
            // 默认的Cell�?br />           default:
            cellvalue = " ";
        }
      }
      else {
        cellvalue = "";
      }
      // 在每个字�D�之间插入分割符
      buffer.append(cellvalue).append(EXCEL_LINE_DELIMITER);
    }
    // 以字�W�串�q�回该行的数�?br />     return buffer.toString();
}

// close函数执行��的关闭操作
public void close() {
    // 如果is不�ؓ(f��)�I�，则关闭InputSteam文�g输入��?br />     if (is != null) {
      try {
        is.close();
      }
      catch (IOException e) {
        is = null;
      }
    }
    // 如果reader不�ؓ(f��)�I�则关闭BufferedReader文�g输入��?br />     if (reader != null) {
      try {
        reader.close();
      }
      catch (IOException e) {
        reader = null;
      }
    }
}

public static void main(String[] args) {
    try {
      ExcelReader er = new ExcelReader("d:\\xp.xls");
      String line = er.readLine();
      while (line != null) {
        System.out.println(line);
        line = er.readLine();
      }
      er.close();
    }
    catch (Exception e) {
      e.printStackTrace();
    }
}

}

package searchfileexample;

import javax.servlet.*;
import javax.servlet.http.*;
import java.io.*;
import java.util.*;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.IndexWriter;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Date;
import org.apache.lucene.demo.FileDocument;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import java.io.FileReader;
import org.apache.lucene.index.*;
import java.text.DateFormat;
import org.apache.poi.hdf.extractor.WordDocument;
import java.io.InputStream;
import java.io.StringWriter;
import java.io.PrintWriter;
import java.io.FileInputStream;
import java.io.*;
import org.textmining.text.extraction.WordExtractor;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;

/**
* �l�某个目录下的所有文件生成烦�?br /> *

Title:

Description:

Company:

* @author not attributable
* @version 1.0
* �Ҏ(gu��)��文�g的不同，可以把烦引文件创建到不同的文件夹下去�Q�这样可以分�c�M��存烦引信息�?br /> */

public class IndexFilesServlet
extends HttpServlet {
static final File INDEX_DIR = new File("index");

//Initialize global variables
public void init() throws ServletException {
}

//Process the HTTP Get request
public void service(HttpServletRequest request, HttpServletResponse response) throws
      ServletException, IOException {
    final File docDir = new File("a"); //需要生成烦引的文�g的文件夹
    if (!docDir.exists() || !docDir.canRead()) {
      System.out.println("Document directory '" + docDir.getAbsolutePath() +
                         "' does not exist or is not readable, please check the path");
      System.exit(1);
    }

    Date start = new Date();
    try {
      IndexWriter writer = new IndexWriter(INDEX_DIR, new StandardAnalyzer(), true); //true-覆盖原有的烦�?false-不覆盖原有的索引
      System.out.println("Indexing to directory '" + INDEX_DIR + "'...");
      indexDocs(writer, docDir);
      System.out.println("Optimizing...");
      writer.optimize();
      writer.close();

      Date end = new Date();
      System.out.println(end.getTime() - start.getTime() +
                         " total milliseconds");

    }
    catch (IOException e) {
      System.out.println(" caught a " + e.getClass() +
                         "\n with message: " + e.getMessage());
    }

}

//Clean up resources
public void destroy() {
}

public void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    int index = 0;
    String filehouzui = "";
    index = file.getName().indexOf(".");
    //strFileName = strFileName.substring(0, index) +DateUtil.getCurrDateTime() + "." + strFileName.substring(index + 1);
    filehouzui = file.getName().substring(index + 1);

    if (file.canRead()) {
      if (file.isDirectory()) {
        String[] files = file.list();
        // an IO error could occur
        if (files != null) {
          for (int i = 0; i < files.length; i++) {
            indexDocs(writer, new File(file, files[i]));
          }
        }
      }
      else {
        System.out.println("adding " + file);
        try {
          if (filehouzui.equals("doc")) {
            writer.addDocument(getWordDocument(file, new FileInputStream(file)));
          }
          else if (filehouzui.equals("txt")) {
            writer.addDocument(getTxtDocument(file, new FileInputStream(file)));
          }
          else if (filehouzui.equals("xls")) {
            writer.addDocument(getExcelDocument(file, new FileInputStream(file)));
          }
          //writer.addDocument(parseFile(file));

          //writer.addDocument(FileDocument.Document(file));//path 存放文�g的相对�\�?br />         }
        // at least on windows, some temporary files raise this exception with an "access denied" message
        // checking if the file can be read doesn't help
        catch (Exception fnfe) {
          ;
        }
      }
    }
}

/**
   *@paramfile
   *
   *把File变成Document
   */
public Document parseFile(File file) throws Exception {
    Document doc = new Document();
    doc.add(new Field("path", file.getAbsolutePath(), Field.Store.YES,
                      Field.Index.UN_TOKENIZED)); //取文件的�l�对路径
    try {
      doc.add(new Field("contents", new FileReader(file))); //索引文�g内容
      doc.add(new Field("title", file.getName(), Field.Store.YES,
                        Field.Index.UN_TOKENIZED));
      //索引最后修�Ҏ(gu��)��?br />       doc.add(new Field("modified",
                        String.valueOf(DateFormat.
                                       getDateTimeInstance().format(new
          Date(file.lastModified()))), Field.Store.YES,
                        Field.Index.UN_TOKENIZED));
      //doc.removeField("title");
    }
    catch (Exception e) {
      e.printStackTrace();
    }
    return doc;
}

/**
   *@paramfile
   *
   *使用POI��d��word文��
   * 不太好用�Q�读取word文档不全
   */
public Document getDocument(File file, FileInputStream is) throws Exception {
    String bodyText = null;
    try {
      WordDocument wd = new WordDocument(is);
      StringWriter docTextWriter = new StringWriter();
      wd.writeAllText(new PrintWriter(docTextWriter));
      bodyText = docTextWriter.toString();
      docTextWriter.close();
      //   bodyText   =   new   WordExtractor().extractText(is);
      System.out.println("word content====" + bodyText);
    }
    catch (Exception e) {
      ;
    }
    if ( (bodyText != null)) {
      Document doc = new Document();
      doc.add(new Field("path", file.getAbsolutePath(), Field.Store.YES,
                        Field.Index.UN_TOKENIZED)); //取文件的�l�对路径
      doc.add(new Field("contents", bodyText, Field.Store.YES,
                        Field.Index.TOKENIZED));
      return doc;
    }
    return null;
}

//Document   doc   =   getDocument(new   FileInputStream(new   File(file)));
/**
   *@paramfile
   *
   *使用tm-extractors-0.4.jar��d��word文档
   * 好用
   */
public Document getWordDocument(File file, FileInputStream is) throws
      Exception {
    String bodyText = null;
    try {
      WordExtractor extractor = new WordExtractor();
      System.out.println("word文��");
      bodyText = extractor.extractText(is);
      if ( (bodyText != null)) {
        Document doc = new Document();
        doc.add(new Field("path", file.getAbsolutePath(), Field.Store.YES,
                          Field.Index.UN_TOKENIZED)); //取文件的�l�对路径
        doc.add(new Field("contents", bodyText, Field.Store.YES,
                          Field.Index.TOKENIZED));
        System.out.println("word content====" + bodyText);
        return doc;
      }
    }
    catch (Exception e) {
      ;
    }
    return null;
}

/**
   *@paramfile
   *
   *��d��TXT文��
   */
public Document getTxtDocument(File file, FileInputStream is) throws
      Exception {
    try {
      Reader textReader = new FileReader(file);
      Document doc = new Document();
      doc.add(new Field("path", file.getAbsolutePath(), Field.Store.YES,
                        Field.Index.UN_TOKENIZED)); //取文件的�l�对路径
      doc.add(new Field("contents", textReader));
      return doc;
    }
    catch (Exception e) {
      ;
    }
    return null;
}

/**
   * 使用POI��d��Excel文�g
   * @param file File
   * @param is FileInputStream
   * @throws Exception
   * @return Document
   */
public Document getExcelDocument(File file, FileInputStream is) throws
      Exception {
    String bodyText = "";
    try {
      System.out.println("��d��excel文�g");
      ExcelReader er = new ExcelReader(file.getAbsolutePath());
      bodyText = er.readLine();
      int rows = 0;
      rows = er.getRows();
      for (int i = 0; i < rows; i++) {
        bodyText = bodyText + er.readLine();
        System.out.println("bodyText===" + bodyText);
      }
      Document doc = new Document();
      doc.add(new Field("path", file.getAbsolutePath(), Field.Store.YES,
                        Field.Index.UN_TOKENIZED)); //取文件的�l�对路径
      doc.add(new Field("contents", bodyText, Field.Store.YES,
                        Field.Index.TOKENIZED));
      System.out.println("word content====" + bodyText);
      return doc;
    }
    catch (Exception e) {
      System.out.println(e);
    }
    return null;
}
}

package searchfileexample;

import javax.servlet.*;
import javax.servlet.http.*;
import java.io.*;
import java.util.*;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.FilterIndexReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Date;
import org.apache.lucene.queryParser.*;

public class SearchFileServlet
extends HttpServlet {
private static final String CONTENT_TYPE = "text/html; charset=GBK";

//Initialize global variables
public void init() throws ServletException {
}

/** Use the norms from one field for all fields. Norms are read into memory,
   * using a byte of memory per document per searched field. This can cause
   * search of large collections with a large number of fields to run out of
   * memory. If all of the fields contain only a single token, then the norms
   * are all identical, then single norm vector may be shared. */
private static class OneNormsReader
      extends FilterIndexReader {
    private String field;

    public OneNormsReader(IndexReader in, String field) {
      super(in);
      this.field = field;
    }

    public byte[] norms(String field) throws IOException {
      return in.norms(this.field);
    }
}

//Process the HTTP Get request
public void service(HttpServletRequest request, HttpServletResponse response) throws
      ServletException, IOException {
    response.setContentType(CONTENT_TYPE);
    PrintWriter out = response.getWriter();

    String[] args = {
        "a", "b"};
    String usage =
        "Usage: java org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-raw] [-norms field]";
    if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) {
      System.out.println(usage);
      System.exit(0);
    }

    String index = "index"; //该值是用来存放生成的烦引文件的文�g夹的名称�Q�不能改�?br />     String field = "contents"; //不能修改 field 的�?br />     String queries = null; //是用来存��N��要检索的关键字的一个文件�?br />     queries = "D:/lfy_programe/全文��?SearchFileExample/aa.txt";
    System.out.println("-----------------------" + request.getContextPath());
    int repeat = 1;
    boolean raw = false;
    String normsField = null;

    for (int i = 0; i < args.length; i++) {
      if ("-index".equals(args[i])) {
        index = args[i + 1];
        i++;
      }
      else if ("-field".equals(args[i])) {
        field = args[i + 1];
        i++;
      }
      else if ("-queries".equals(args[i])) {
        queries = args[i + 1];
        i++;
      }
      else if ("-repeat".equals(args[i])) {
        repeat = Integer.parseInt(args[i + 1]);
        i++;
      }
      else if ("-raw".equals(args[i])) {
        raw = true;
      }
      else if ("-norms".equals(args[i])) {
        normsField = args[i + 1];
        i++;
      }
    }

IndexReader reader = IndexReader.open(index);

if (normsField != null) {
reader = new OneNormsReader(reader, normsField);

    }
    Searcher searcher = new IndexSearcher(reader); //用来打开索引文�g
    Analyzer analyzer = new StandardAnalyzer(); //分析�?br />     //Analyzer analyzer = new StandardAnalyzer();

    out.println("");
    out.println("SearchFileServlet");
    out.println("");

    while (true) {
      if (queries == null) { // prompt the user
        System.out.println("Enter query: ");

      }
      String line = in.readLine(); //�l�成查询关键字字�W�串
      System.out.println("查询字符�?==" + line);

      if (line == null || line.length() == -1) {
        break;
      }

      line = line.trim();
      if (line.length() == 0) {
        break;
      }

      Query query = null;
      try {
        query = parser.parse(line);
      }
      catch (ParseException ex) {
      }
      System.out.println("Searching for: " + query.toString(field)); //每个关键�?/p>

Hits hits = searcher.search(query);

      if (repeat > 0) { // repeat & time as benchmark
        Date start = new Date();
        for (int i = 0; i < repeat; i++) {
          hits = searcher.search(query);
        }
        Date end = new Date();
        System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms");
      }
      out.println("

查询刎ͼ�(x��)" + hits.length() + "个含有[" +
query.toString(field) + "]的文�?lt;/p>");

System.out.println("查询刎ͼ�(x��)" + hits.length() + " 个含�?[" +
query.toString(field) + "]的文�?);

final int HITS_PER_PAGE = 10; //查询�q�回的最大记录数
int currentNum = 5; //当前记录�?/p>

      for (int start = 0; start < hits.length(); start += HITS_PER_PAGE) {
        //start = start + currentNum;
        int end = Math.min(hits.length(), start + HITS_PER_PAGE);

for (int i = start; i < end; i++) {

          //if (raw) {                              // output raw format
          System.out.println("doc=" + hits.id(i) + " score=" + hits.score(i)); //score是接�q�度的意�?br />           //continue;
          //}

Document doc = hits.doc(i);
String path = doc.get("path");

          if (path != null) {
            System.out.println( (i + 1) + ". " + path);
            out.println("

" + (i + 1) + ". " + path + "

");
            String title = doc.get("title");
            System.out.println("   modified: " + doc.get("modified"));
            if (title != null) {
              System.out.println("   Title: " + doc.get("title"));
            }
          }
          else {
            System.out.println( (i + 1) + ". " + "No path for this document");
          }
        }

        if (queries != null) { // non-interactive
          break;
        }

        if (hits.length() > end) {
          System.out.println("more (y/n) ? ");
          line = in.readLine();
          if (line.length() == 0 || line.charAt(0) == 'n') {
            break;
          }
        }
      }
    }
    reader.close();

out.println("");
}

//Clean up resources
public void destroy() {
}
}

轩辕 2008-03-19 16:52 发表评论

轩辕 — Tue, 18 Mar 2008 02:35:00 GMT

package searchfileexample;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.IndexWriter;

/**
* �l�某个目录下的所有文件生成烦�?br /> *

Title:

Description:

Company:

/** Index all text files under a directory. */
public class IndexFiles {

private IndexFiles() {}

static final File INDEX_DIR = new File("index");

/** Index all text files under a directory. */
public static void main(String[] args) {
    String usage = "java org.apache.lucene.demo.IndexFiles ";
    //String[] arg = {"a","b"};
    //System.out.println(arg[0]);
    /*
         if (args.length == 0) {
      System.err.println("Usage: " + usage);
      System.exit(1);
         }*/
    /*
        if (INDEX_DIR.exists()) {
          System.out.println("Cannot save index to '" +INDEX_DIR+ "' directory, please delete it first");
          System.exit(1);
        }*/

    final File docDir = new File("a"); //需要生成烦引的文�g的文件夹
    if (!docDir.exists() || !docDir.canRead()) {
      System.out.println("Document directory '" + docDir.getAbsolutePath() +
                         "' does not exist or is not readable, please check the path");
      System.exit(1);
    }

      Date end = new Date();
      System.out.println(end.getTime() - start.getTime() +
                         " total milliseconds");

    }
    catch (IOException e) {
      System.out.println(" caught a " + e.getClass() +
                         "\n with message: " + e.getMessage());
    }
}

static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
      if (file.isDirectory()) {
        String[] files = file.list();
        // an IO error could occur
        if (files != null) {
          for (int i = 0; i < files.length; i++) {
            indexDocs(writer, new File(file, files[i]));
          }
        }
      }
      else {
        System.out.println("adding " + file);
        try {

writer.addDocument(getDocument2(file, new FileInputStream(file)));
//writer.addDocument(parseFile(file));

/**
   *@paramfile
   *
   *把File变成Document
   */
static Document parseFile(File file) throws Exception {
    Document doc = new Document();
    doc.add(new Field("path", file.getAbsolutePath(), Field.Store.YES,
                      Field.Index.UN_TOKENIZED)); //取文件的�l�对路径
    try {
      doc.add(new Field("contents", new FileReader(file))); //索引文�g内容
      doc.add(new Field("title", file.getName(), Field.Store.YES,
                        Field.Index.UN_TOKENIZED));
      //索引最后修�Ҏ(gu��)��?br />       doc.add(new Field("modified",
                        String.valueOf(DateFormat.
                                       getDateTimeInstance().format(new
          Date(file.lastModified()))), Field.Store.YES,
                        Field.Index.UN_TOKENIZED));
      //doc.removeField("title");
    }
    catch (Exception e) {
      e.printStackTrace();
    }
    return doc;
}

/**
   *@paramfile
   *
   *转换word文档

         static String changeWord(File file) throws Exception {
    String re = "";
    try {
      WordDocument wd = new WordDocument(is);
        StringWriter docTextWriter = new StringWriter();
        wd.writeAllText(new PrintWriter(docTextWriter));
        docTextWriter.close();
        bodyText = docTextWriter.toString();

    } catch (Exception e) {
        e.printStackTrace();
    }
    return re;
         }*/
/**
   *@paramfile
   *
   *使用POI��d��word文档
   */
static Document getDocument(File file, FileInputStream is) throws Exception {

String bodyText = null;

try {

      //BufferedReader wt = new BufferedReader(new InputStreamReader(is));
      //bodyText = wt.readLine();
      //System.out.println("word ===="+bodyText);

      WordDocument wd = new WordDocument(is);
      StringWriter docTextWriter = new StringWriter();
      wd.writeAllText(new PrintWriter(docTextWriter));
      bodyText = docTextWriter.toString();
      docTextWriter.close();
      //   bodyText   =   new   WordExtractor().extractText(is);
      System.out.println("word content====" + bodyText);
    }
    catch (Exception e) {
      ;

}

    if ( (bodyText != null)) {
      Document doc = new Document();
      doc.add(new Field("path", file.getAbsolutePath(), Field.Store.YES,
                        Field.Index.UN_TOKENIZED)); //取文件的�l�对路径
      doc.add(new Field("contents", bodyText, Field.Store.YES,
                        Field.Index.TOKENIZED));

      return doc;
    }
    return null;
}

//Document   doc   =   getDocument(new   FileInputStream(new   File(file)));
/**
   *@paramfile
   *
   *使用tm-extractors-0.4.jar��d��word文��
   */
static Document getDocument2(File file, FileInputStream is) throws Exception {

String bodyText = null;

try {

      //FileInputStream in = new FileInputStream("D:/lfy_programe/全文��?SearchFileExample/a/aa.doc");
      // FileInputStream in = new FileInputStream ("D:/szqxjzhbase/技术测�?新徏 Microsoft Word 文��.doc");
      WordExtractor extractor = new WordExtractor();
      System.out.println(is.available());

bodyText = extractor.extractText(is);

// System.out.println("the result length is"+str.length());
System.out.println("word content===="+bodyText);

    }
    catch (Exception e) {
      ;

}

      return doc;
    }
    return null;
}

}

package searchfileexample;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Date;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Fieldable;

/** Simple command-line based search demo. */
public class SearchFiles {

/** Use the norms from one field for all fields. Norms are read into memory,
   * using a byte of memory per document per searched field. This can cause
   * search of large collections with a large number of fields to run out of
   * memory. If all of the fields contain only a single token, then the norms
   * are all identical, then single norm vector may be shared. */
private static class OneNormsReader extends FilterIndexReader {
    private String field;

    public OneNormsReader(IndexReader in, String field) {
      super(in);
      this.field = field;
    }

    public byte[] norms(String field) throws IOException {
      return in.norms(this.field);
    }
}

private SearchFiles() {}

/** Simple command-line based search demo. */
public static void main(String[] arg) throws Exception {
    String[] args = {"a","b"};
    String usage =
      "Usage: java org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-raw] [-norms field]";
    if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) {
      System.out.println(usage);
      System.exit(0);
    }

    int repeat = 1;
    boolean raw = false;
    String normsField = null;

    for (int i = 0; i < args.length; i++) {
      if ("-index".equals(args[i])) {
        index = args[i+1];
        i++;
      } else if ("-field".equals(args[i])) {
        field = args[i+1];
        i++;
      } else if ("-queries".equals(args[i])) {
        queries = args[i+1];
        i++;
      } else if ("-repeat".equals(args[i])) {
        repeat = Integer.parseInt(args[i+1]);
        i++;
      } else if ("-raw".equals(args[i])) {
        raw = true;
      } else if ("-norms".equals(args[i])) {
        normsField = args[i+1];
        i++;
      }
    }

IndexReader reader = IndexReader.open(index);

if (normsField != null)
reader = new OneNormsReader(reader, normsField);

Searcher searcher = new IndexSearcher(reader);//用来打开索引文�g
Analyzer analyzer = new StandardAnalyzer();//分析�?br /> //Analyzer analyzer = new StandardAnalyzer();

    BufferedReader in = null;
    if (queries != null) {
      in = new BufferedReader(new FileReader(queries));
    } else {
      in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
    }
      QueryParser parser = new QueryParser(field, analyzer);
    while (true) {
      if (queries == null)                        // prompt the user
        System.out.println("Enter query: ");

String line = in.readLine();//�l�成查询关键字字�W�串
System.out.println("查询字符�?=="+line);

if (line == null || line.length() == -1)
break;

      line = line.trim();
      if (line.length() == 0)
        break;

Query query = parser.parse(line);
System.out.println("Searching for: " + query.toString(field));//每个关键�?/p>

Hits hits = searcher.search(query);

      if (repeat > 0) {                           // repeat & time as benchmark
        Date start = new Date();
        for (int i = 0; i < repeat; i++) {
          hits = searcher.search(query);
        }
        Date end = new Date();
        System.out.println("Time: "+(end.getTime()-start.getTime())+"ms");
      }

System.out.println("查询刎ͼ�(x��)" + hits.length() + " 个含�?["+query.toString(field)+"]的文�?);

      final int HITS_PER_PAGE = 10;//查询�q�回的最大记录数
      int currentNum = 2;//当前记录�?br />       for (int start = 0; start < hits.length(); start += HITS_PER_PAGE) {
        //start = start + currentNum;
        int end = Math.min(hits.length(), start + HITS_PER_PAGE);
        for (int i = start; i < end; i++) {

          //if (raw) {                              // output raw format
            System.out.println("doc="+hits.id(i)+" score="+hits.score(i));//score是接�q�度的意�?br />             //continue;
          //}

Document doc = hits.doc(i);
String path = doc.get("path");

          if (path != null) {
            System.out.println((i+1) + ". " + path);
            String title = doc.get("title");
            System.out.println("   modified: " + doc.get("modified"));
            if (title != null) {
              System.out.println("   Title: " + doc.get("title"));
            }
          } else {
            System.out.println((i+1) + ". " + "No path for this document");
          }
        }

if (queries != null) // non-interactive
break;

        if (hits.length() > end) {
          System.out.println("more (y/n) ? ");
          line = in.readLine();
          if (line.length() == 0 || line.charAt(0) == 'n')
            break;
        }
      }
    }
    reader.close();
}
}

package searchfileexample;

import javax.servlet.*;
import javax.servlet.http.*;
import java.io.*;
import java.util.*;
import org.textmining.text.extraction.WordExtractor;

public class ReadWord extends HttpServlet {
private static final String CONTENT_TYPE = "text/html; charset=GBK";

//Initialize global variables
public void init() throws ServletException {
}

//Process the HTTP Get request
public void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
    response.setContentType(CONTENT_TYPE);
    FileInputStream in = new FileInputStream ("D:/lfy_programe/全文��?SearchFileExample/a/aa.doc");
       // FileInputStream in = new FileInputStream ("D:/szqxjzhbase/技术测�?新徏 Microsoft Word 文档.doc");
   WordExtractor extractor = new WordExtractor();
   System.out.println(in.available());
String str = null;
try {
    str = extractor.extractText(in);
}
catch (Exception ex) {
}
//    System.out.println("the result length is"+str.length());
   System.out.println(str);

}

//Clean up resources
public void destroy() {
}
}

1.英文的模�p�查询问�?br /> 查询时的关键字的后边加上通配�W?nbsp; " * " ��可以了�?/p>

2.IndexFiles.java
用来索引文�g的java�c?/p>

3.SearchFiles.java
用来搜烦的java�c?/p>

4.ReadWord.java
使用tm-extractors-0.4.jar来读取word文�g

轩辕 2008-03-18 10:35 发表评论

使用tm-extractors-0.4.jar来读取word文�g

轩辕 — Tue, 18 Mar 2008 02:33:00 GMT

package searchfileexample;

import javax.servlet.*;
import javax.servlet.http.*;
import java.io.*;
import java.util.*;
import org.textmining.text.extraction.WordExtractor;

public class ReadWord extends HttpServlet {
private static final String CONTENT_TYPE = "text/html; charset=GBK";

//Initialize global variables
public void init() throws ServletException {
}

}

//Clean up resources
public void destroy() {
}
}

轩辕 2008-03-18 10:33 发表评论

掌控上传�q�度的AJAX Upload

轩辕 — Tue, 07 Aug 2007 09:02:00 GMT

摘要: 掌控上传�q�度的AJAX Upload cleverpig 发表�?2007-01-08 11:12:14作�?cleverpig 来源:Matrix评论�?83 点击�?5,066 投票��d��?12 投票��M�h��?4关键�?AJAX,upload,monitor ... 阅读全文

轩辕 2007-08-07 17:02 发表评论

ajax 上传文�g

轩辕 — Tue, 07 Aug 2007 08:54:00 GMT

http://www.matrix.org.cn/resource/article/2007-01-08/09db6d69-9ec6-11db-ab77-2bbe780ebfbf.html

轩辕 2007-08-07 16:54 发表评论

�E�序下蝲java�E�序

轩辕 — Wed, 01 Aug 2007 07:23:00 GMT

  /*
* 创徏日期 2006-1-11
*
* 更改所生成文�g模板�?br> * �H�口 > 首选项 > Java > 代码生成 > 代码和注�?br> */
package com.abc.cc.util.file ;

import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import javax.servlet.ServletOutputStream;
import java.io.FileInputStream;

import com.abc.callcenter.DataStatistic.Export.CreatUDStatisticExport;
import com.abc.callcenter.uds.unitedealwith.UniteUtil;

/**
*
* 创徏日期�Q?006-2-9
* 功　　能：(x��)工作�?nbsp;> 文档��理 > 文�g下蝲
* @author asx
*
*/
public class Down extends HttpServlet {
    public void doGet(HttpServletRequest request , HttpServletResponse response) {
        System.out.println("logining Down");
        response.setContentType("text/html; charset=GBK");
        String downfile = request.getRealPath("/") + "/exportfile/" + TimeTool.getCurrentDateForEight() + "_" + StringTool.getExportFileName(Integer.parseInt(request.getParameter("fileName"))) ;
        try {downfile = new String(downfile.getBytes("GBK")) ;}catch(Exception e){}
        System.out.println("downfile = "+downfile);
        String fileName = buildFilename(downfile) ;
        System.out.println("fileName = "+fileName);

        String strBeginDate = request.getParameter("excel_begindate"); //起始日期
        String strEndDate = request.getParameter("excel_enddate"); //�l�束日期
        String strUnite_dept = request.getParameter("excel_department_name");//部门
        try{
            strUnite_dept = UniteUtil.Query_NameDepartment(""+strUnite_dept);;
        }catch(Exception e){
            e.printStackTrace();
        }
        CreatUDStatisticExport cue = new CreatUDStatisticExport();
        cue.queryPrintInfo(strBeginDate,strEndDate,strUnite_dept,request);

        System.out.println("logining Down1");
        try
        {
            fileName=response.encodeURL(new String(fileName.getBytes(),"iso-8859-1"));
            response.reset();
            response.setContentType("APPLICATION/OCTET-STREAM");
            response.setHeader("Content-Disposition", "attachment; filename=\"" + fileName + "\"");
            ServletOutputStream out = response.getOutputStream();
            FileInputStream inStream = new FileInputStream(downfile);

            //循环取出��中的数�?nbsp;
            byte[] b = new byte[1024];
            int len;
            while((len=inStream.read(b , 0 , b.length)) >0) {
                out.write(b,0,len);
            }
            out.close();
            inStream.close();
        } catch(Exception e) {}
    }
    public void doPost(HttpServletRequest request , HttpServletResponse response) {
        doGet(request , response) ;
    }

    /**
     * 转换上传文�g的文件名
     * @param sou
     * @param ts
     * @return String
     */
    private static String buildFilename(String sou) {
        while(sou.indexOf("/") > -1) {
            sou = sou.substring(sou.indexOf("/") + 1) ;
        }
        return sou;
    }
}

轩辕 2007-08-01 15:23 发表评论

jsp��面下蝲�E�序

轩辕 — Wed, 01 Aug 2007 07:22:00 GMT

<%@page contentType="text/html;charset=GBK"%>
<%@ page import="java.io.*" %>
<%@ page import="java.net.*" %>
<%@ page import="java.lang.*" %>
<%
    // ��查用��h��限：(x��)
    //
String userid = (String) request.getParameter("userid");
System.out.println("userid======"+userid);
String sortid = (String) request.getParameter("sortid");
System.out.println("sortid======"+sortid);
String workspaceid = (String) request.getParameter("workspaceid");
System.out.println("workspaceid======"+workspaceid);
String id = (String) request.getParameter("id");
System.out.println("id======"+id);
String name = (String) request.getParameter("name");
System.out.println("name======"+name);

    name=response.encodeURL(new String(name.getBytes(),"GBK"));
    System.out.println("name======"+name);
String path =  "d:/" + request.getContextPath()+"//" + userid + "//"   + workspaceid  + "//" + sortid + "//" + id + name ;

//消除输出��错误的代码
    out.clear();
    out = pageContext.pushBody();

    //File file = new File("d:\\1.txt");
    File file = new File(path);
   try
        {
            name=response.encodeURL(new String(name.getBytes(),"iso-8859-1"));
            response.reset();
            response.setContentType("APPLICATION/OCTET-STREAM");
            response.setHeader("Content-Disposition", "attachment; filename=" + name);
            ServletOutputStream output = response.getOutputStream();
            FileInputStream inStream = new FileInputStream(file);

            //循环取出��中的数�?nbsp;
            byte[] b = new byte[1024];
            int len;
            while((len=inStream.read(b , 0 , b.length)) >0) {
                output.write(b,0,len);
            }
            output.close();
            inStream.close();
        } catch(Exception e) {}
%>

轩辕 2007-08-01 15:22 发表评论

��h��带有frame的主界面

轩辕 — Sun, 29 Jul 2007 09:02:00 GMT

在frame子页面中�?br>
parent.window.location.reload()

轩辕 2007-07-29 17:02 发表评论

轩辕 — Thu, 12 Jul 2007 06:20:00 GMT

java.io.File fileName=new java.io.File(path);
fileName.mkdirs();
path是要创徏的文件夹的�\径名�U?br>比如 c://tmp//lfy// ��创��Z�� lfy 文�g�?

使用 "./" + request.getContextPath() 可以得到服务器�\�?

轩辕 2007-07-12 14:20 发表评论

写好的目录树(w��i)�E�序

轩辕 — Wed, 04 Jul 2007 08:39:00 GMT

只有注册用户��d��后才能阅读该文�?a href='http://www.aygfsteel.com/dreamer/archive/2007/07/04/128140.html'>阅读全文

轩辕 2007-07-04 16:39 发表评论

轩辕 — Wed, 27 Jun 2007 09:33:00 GMT

package com.borland.samples.welcome;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.File;

public class ReadFile {
public ReadFile() {}

/**
   * 删除某个文�g夹下的所有文件夹和文�?br>    * @param delpath String
   * @throws FileNotFoundException
   * @throws IOException
   * @return boolean
   */
public static boolean deletefile(String delpath) throws FileNotFoundException,
      IOException {
    try {

      File file = new File(delpath);
      if (!file.isDirectory()) {
        System.out.println("1");
        file.delete();
      }
      else if (file.isDirectory()) {
        System.out.println("2");
        String[] filelist = file.list();
        for (int i = 0; i < filelist.length; i++) {
          File delfile = new File(delpath + "\\" + filelist[i]);
          if (!delfile.isDirectory()) {
            System.out.println("path=" + delfile.getPath());
            System.out.println("absolutepath=" + delfile.getAbsolutePath());
            System.out.println("name=" + delfile.getName());
            delfile.delete();
            System.out.println("删除文�g成功");
          }
          else if (delfile.isDirectory()) {
            deletefile(delpath + "\\" + filelist[i]);
          }
        }
        file.delete();

      }

    }
    catch (FileNotFoundException e) {
      System.out.println("deletefile()   Exception:" + e.getMessage());
    }
    return true;
}

/**
   * 删除某个文�g夹下的所有文件夹和文�?br>    * @param delpath String
   * @throws FileNotFoundException
   * @throws IOException
   * @return boolean
   */
public static boolean readfile(String filepath) throws FileNotFoundException,
      IOException {
    try {

      File file = new File(filepath);
      if (!file.isDirectory()) {
        System.out.println("文�g");
        System.out.println("path=" + file.getPath());
        System.out.println("absolutepath=" + file.getAbsolutePath());
        System.out.println("name=" + file.getName());

      }
      else if (file.isDirectory()) {
        System.out.println("文�g�?);
        String[] filelist = file.list();
        for (int i = 0; i < filelist.length; i++) {
          File readfile = new File(filepath + "\\" + filelist[i]);
          if (!readfile.isDirectory()) {
            System.out.println("path=" + readfile.getPath());
            System.out.println("absolutepath=" + readfile.getAbsolutePath());
            System.out.println("name=" + readfile.getName());

          }
          else if (readfile.isDirectory()) {
            readfile(filepath + "\\" + filelist[i]);
          }
        }

      }

    }
    catch (FileNotFoundException e) {
      System.out.println("readfile()   Exception:" + e.getMessage());
    }
    return true;
}

public static void main(String[] args) {
    try {
      readfile("D:/file");
      //deletefile("D:/file");
    }
    catch (FileNotFoundException ex) {
    }
    catch (IOException ex) {
    }
    System.out.println("ok");
}

}

轩辕 2007-06-27 17:33 发表评论