使用tm-extractors-0.4.jar來(lái)讀取word文件
package searchfileexample;
import javax.servlet.*;
import javax.servlet.http.*;
import java.io.*;
import java.util.*;
import org.textmining.text.extraction.WordExtractor;
public class ReadWord extends HttpServlet {
private static final String CONTENT_TYPE = "text/html; charset=GBK";
//Initialize global variables
public void init() throws ServletException {
}
//Process the HTTP Get request
public void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
response.setContentType(CONTENT_TYPE);
FileInputStream in = new FileInputStream ("D:/lfy_programe/全文檢索/SearchFileExample/a/aa.doc");
// FileInputStream in = new FileInputStream ("D:/szqxjzhbase/技術(shù)測(cè)試/新建 Microsoft Word 文檔.doc");
WordExtractor extractor = new WordExtractor();
System.out.println(in.available());
String str = null;
try {
str = extractor.extractText(in);
}
catch (Exception ex) {
}
// System.out.println("the result length is"+str.length());
System.out.println(str);
}
//Clean up resources
public void destroy() {
}
}
posted on 2008-03-18 10:33 軒轅 閱讀(5512) 評(píng)論(5) 編輯 收藏 所屬分類: java