??xml version="1.0" encoding="utf-8" standalone="yes"?>午夜久久免费观看,国产区精品区,国内精品伊人久久http://www.aygfsteel.com/dreamstone/category/24601.html开发出高质量的pȝzh-cnThu, 02 Aug 2007 19:05:52 GMTThu, 02 Aug 2007 19:05:52 GMT60lucene入门合集http://www.aygfsteel.com/dreamstone/archive/2007/07/29/133168.htmldreamstonedreamstoneSun, 29 Jul 2007 12:09:00 GMThttp://www.aygfsteel.com/dreamstone/archive/2007/07/29/133168.htmlhttp://www.aygfsteel.com/dreamstone/comments/133168.htmlhttp://www.aygfsteel.com/dreamstone/archive/2007/07/29/133168.html#Feedback0http://www.aygfsteel.com/dreamstone/comments/commentRss/133168.htmlhttp://www.aygfsteel.com/dreamstone/services/trackbacks/133168.html
http://www.aygfsteel.com/dreamstone/archive/2007/06/11/123317.html
lucene单实?br>http://www.aygfsteel.com/dreamstone/archive/2007/06/12/123528.html
lucene核心c?br>http://www.aygfsteel.com/dreamstone/archive/2007/06/12/123531.html
lucene索引非txt文档
http://www.aygfsteel.com/dreamstone/archive/2007/06/14/124286.html
lucene建立索引时用C些文档操?br>http://www.aygfsteel.com/dreamstone/archive/2007/06/20/125369.html
比较各种英文分析?br>http://www.aygfsteel.com/dreamstone/archive/2007/06/20/125372.html
lucene丰富的查?br>http://www.aygfsteel.com/dreamstone/archive/2007/06/21/125573.html
lucene丰富的查?
http://www.aygfsteel.com/dreamstone/archive/2007/06/21/125574.html
lucene中文分词
http://www.aygfsteel.com/dreamstone/archive/2007/06/22/125726.html

dreamstone 2007-07-29 20:09 发表评论
]]>
lucene的中文分词器http://www.aygfsteel.com/dreamstone/archive/2007/06/22/125726.htmldreamstonedreamstoneFri, 22 Jun 2007 01:15:00 GMThttp://www.aygfsteel.com/dreamstone/archive/2007/06/22/125726.htmlhttp://www.aygfsteel.com/dreamstone/comments/125726.htmlhttp://www.aygfsteel.com/dreamstone/archive/2007/06/22/125726.html#Feedback0http://www.aygfsteel.com/dreamstone/comments/commentRss/125726.htmlhttp://www.aygfsteel.com/dreamstone/services/trackbacks/125726.html
package analyzer;

import java.io.Reader;
import java.io.StringReader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
import org.apache.lucene.analysis.cn.ChineseAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.mira.lucene.analysis.IK_CAnalyzer;
import org.mira.lucene.analysis.MIK_CAnalyzer;

import com.sohospace.lucene.analysis.xanalyzer.XAnalyzer;
import com.sohospace.lucene.analysis.xanalyzer.XFactory;
import com.sohospace.lucene.analysis.xanalyzer.XTokenizer;
//中文分词使用了Paoding的分词技术,特表C感?/span>
public class TestCJKAnalyzer {
    
private static String testString1 = "中华人民共和国在1949q徏立,从此开始了C国的伟大章";
    
private static String testString2 = "比尔盖茨从事饮业和服务业方面的工作";
    
public static void testStandard(String testString) throws Exception{
        Analyzer analyzer 
= new StandardAnalyzer();      
        Reader r 
= new StringReader(testString);      
        StopFilter sf 
= (StopFilter) analyzer.tokenStream("", r);
        System.err.println(
"=====standard analyzer====");
        System.err.println(
"分析ҎQ默认没有词只有?/span>");
        Token t;      
        
while ((t = sf.next()) != null{      
            System.out.println(t.termText());      
        }
     
    }

    
public static void testCJK(String testString) throws Exception{
        Analyzer analyzer 
= new CJKAnalyzer();      
        Reader r 
= new StringReader(testString);      
        StopFilter sf 
= (StopFilter) analyzer.tokenStream("", r);
        System.err.println(
"=====cjk analyzer====");
        System.err.println(
"分析Ҏ:交叉双字分割");
        Token t;      
        
while ((t = sf.next()) != null{      
            System.out.println(t.termText());      
        }
     
    }

    
public static void testChiniese(String testString) throws Exception{
        Analyzer analyzer 
= new ChineseAnalyzer();      
        Reader r 
= new StringReader(testString);      
        TokenFilter tf 
= (TokenFilter) analyzer.tokenStream("", r);
        System.err.println(
"=====chinese analyzer====");
        System.err.println(
"分析Ҏ:基本{同StandardAnalyzer");
        Token t;      
        
while ((t = tf.next()) != null{      
            System.out.println(t.termText());      
        }
     
    }

    
public static void testPaoding(String testString) throws Exception{
        XAnalyzer analyzer 
= XFactory.getQueryAnalyzer();   
        Reader r 
= new StringReader(testString);   
        XTokenizer ts 
= (XTokenizer) analyzer.tokenStream("", r);   
        System.err.println(
"=====paoding analyzer====");
        System.err.println(
"分析Ҏ:字典分词,L停止词。在字典不能匚w的情况下使用CJKAnalyzer的分割发?/span>");
        Token t;   
        
while ((t = ts.next()) != null{   
           System.out.println(t.termText());   
        }
   
    }

    
public static void testJe(String testString) throws Exception{
//        Analyzer analyzer = new MIK_CAnalyzer();
        Analyzer analyzer = new IK_CAnalyzer();
        Reader r 
= new StringReader(testString); 
        TokenStream ts 
= (TokenStream)analyzer.tokenStream("", r);
        System.err.println(
"=====je analyzer====");
        System.err.println(
"分析Ҏ:字典分词,正反双向搜烦Q具体不?/span>");
        Token t;   
        
while ((t = ts.next()) != null{   
           System.out.println(t.termText());   
        }
   
    }

    
public static void main(String[] args) throws Exception{
//        String testString = testString1;
        String testString = testString1;
        System.out.println(testString);
        
        testStandard(testString);
        testCJK(testString);
        testPaoding(testString);
        
//        testChiniese(testString);
//        testJe(testString);
    }


}



dreamstone 2007-06-22 09:15 发表评论
]]>
lucene的丰富的各种查询Q二Q?/title><link>http://www.aygfsteel.com/dreamstone/archive/2007/06/21/125574.html</link><dc:creator>dreamstone</dc:creator><author>dreamstone</author><pubDate>Thu, 21 Jun 2007 07:08:00 GMT</pubDate><guid>http://www.aygfsteel.com/dreamstone/archive/2007/06/21/125574.html</guid><wfw:comment>http://www.aygfsteel.com/dreamstone/comments/125574.html</wfw:comment><comments>http://www.aygfsteel.com/dreamstone/archive/2007/06/21/125574.html#Feedback</comments><slash:comments>0</slash:comments><wfw:commentRss>http://www.aygfsteel.com/dreamstone/comments/commentRss/125574.html</wfw:commentRss><trackback:ping>http://www.aygfsteel.com/dreamstone/services/trackbacks/125574.html</trackback:ping><description><![CDATA[和上文一P列写lucene的查询用?br>包括了RangeQuery  prefixQuery  phraseQuery  wildcastQuery   fuzzyQuery<br>被烦引查询的文gQ按照需求自己构造即可?br> <div style="BORDER-RIGHT: #cccccc 1px solid; PADDING-RIGHT: 5px; BORDER-TOP: #cccccc 1px solid; PADDING-LEFT: 4px; FONT-SIZE: 13px; PADDING-BOTTOM: 4px; BORDER-LEFT: #cccccc 1px solid; WIDTH: 98%; WORD-BREAK: break-all; PADDING-TOP: 4px; BORDER-BOTTOM: #cccccc 1px solid; BACKGROUND-COLOR: #eeeeee"><img src="http://www.aygfsteel.com/Images/OutliningIndicators/None.gif" align=top><span style="COLOR: #0000ff">package</span><span style="COLOR: #000000"> search;<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/None.gif" align=top><br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/None.gif" align=top></span><span style="COLOR: #0000ff">import</span><span style="COLOR: #000000"> java.io.BufferedReader;<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/None.gif" align=top></span><span style="COLOR: #0000ff">import</span><span style="COLOR: #000000"> java.io.File;<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/None.gif" align=top></span><span style="COLOR: #0000ff">import</span><span style="COLOR: #000000"> java.io.FileInputStream;<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/None.gif" align=top></span><span style="COLOR: #0000ff">import</span><span style="COLOR: #000000"> java.io.InputStreamReader;<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/None.gif" align=top></span><span style="COLOR: #0000ff">import</span><span style="COLOR: #000000"> java.text.SimpleDateFormat;<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/None.gif" align=top></span><span style="COLOR: #0000ff">import</span><span style="COLOR: #000000"> java.util.Date;<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/None.gif" align=top><br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/None.gif" align=top></span><span style="COLOR: #0000ff">import</span><span style="COLOR: #000000"> org.apache.lucene.analysis.standard.StandardAnalyzer;<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/None.gif" align=top></span><span style="COLOR: #0000ff">import</span><span style="COLOR: #000000"> org.apache.lucene.document.Document;<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/None.gif" align=top></span><span style="COLOR: #0000ff">import</span><span style="COLOR: #000000"> org.apache.lucene.document.Field;<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/None.gif" align=top></span><span style="COLOR: #0000ff">import</span><span style="COLOR: #000000"> org.apache.lucene.index.IndexWriter;<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/None.gif" align=top></span><span style="COLOR: #0000ff">import</span><span style="COLOR: #000000"> org.apache.lucene.index.Term;<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/None.gif" align=top></span><span style="COLOR: #0000ff">import</span><span style="COLOR: #000000"> org.apache.lucene.search.FuzzyQuery;<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/None.gif" align=top></span><span style="COLOR: #0000ff">import</span><span style="COLOR: #000000"> org.apache.lucene.search.Hits;<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/None.gif" align=top></span><span style="COLOR: #0000ff">import</span><span style="COLOR: #000000"> org.apache.lucene.search.IndexSearcher;<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/None.gif" align=top></span><span style="COLOR: #0000ff">import</span><span style="COLOR: #000000"> org.apache.lucene.search.PhraseQuery;<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/None.gif" align=top></span><span style="COLOR: #0000ff">import</span><span style="COLOR: #000000"> org.apache.lucene.search.PrefixQuery;<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/None.gif" align=top></span><span style="COLOR: #0000ff">import</span><span style="COLOR: #000000"> org.apache.lucene.search.Query;<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/None.gif" align=top></span><span style="COLOR: #0000ff">import</span><span style="COLOR: #000000"> org.apache.lucene.search.RangeQuery;<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/None.gif" align=top></span><span style="COLOR: #0000ff">import</span><span style="COLOR: #000000"> org.apache.lucene.search.TermQuery;<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/None.gif" align=top></span><span style="COLOR: #0000ff">import</span><span style="COLOR: #000000"> org.apache.lucene.search.WildcardQuery;<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/None.gif" align=top></span><span style="COLOR: #0000ff">import</span><span style="COLOR: #000000"> org.apache.lucene.store.Directory;<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/None.gif" align=top></span><span style="COLOR: #0000ff">import</span><span style="COLOR: #000000"> org.apache.lucene.store.RAMDirectory;<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/None.gif" align=top><br><img id=Codehighlighter1_927_4230_Open_Image onclick="this.style.display='none'; Codehighlighter1_927_4230_Open_Text.style.display='none'; Codehighlighter1_927_4230_Closed_Image.style.display='inline'; Codehighlighter1_927_4230_Closed_Text.style.display='inline';" src="http://www.aygfsteel.com/Images/OutliningIndicators/ExpandedBlockStart.gif" align=top><img id=Codehighlighter1_927_4230_Closed_Image style="DISPLAY: none" onclick="this.style.display='none'; Codehighlighter1_927_4230_Closed_Text.style.display='none'; Codehighlighter1_927_4230_Open_Image.style.display='inline'; Codehighlighter1_927_4230_Open_Text.style.display='inline';" src="http://www.aygfsteel.com/Images/OutliningIndicators/ContractedBlock.gif" align=top></span><span style="COLOR: #0000ff">public</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">class</span><span style="COLOR: #000000"> SearcherShow </span><span id=Codehighlighter1_927_4230_Closed_Text style="BORDER-RIGHT: #808080 1px solid; BORDER-TOP: #808080 1px solid; DISPLAY: none; BORDER-LEFT: #808080 1px solid; BORDER-BOTTOM: #808080 1px solid; BACKGROUND-COLOR: #ffffff"><img src="http://www.aygfsteel.com/Images/dot.gif"></span><span id=Codehighlighter1_927_4230_Open_Text><span style="COLOR: #000000">{<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>    </span><span style="COLOR: #0000ff">private</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">static</span><span style="COLOR: #000000"> Directory directory </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">new</span><span style="COLOR: #000000"> RAMDirectory();<br><img id=Codehighlighter1_1034_1480_Open_Image onclick="this.style.display='none'; Codehighlighter1_1034_1480_Open_Text.style.display='none'; Codehighlighter1_1034_1480_Closed_Image.style.display='inline'; Codehighlighter1_1034_1480_Closed_Text.style.display='inline';" src="http://www.aygfsteel.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif" align=top><img id=Codehighlighter1_1034_1480_Closed_Image style="DISPLAY: none" onclick="this.style.display='none'; Codehighlighter1_1034_1480_Closed_Text.style.display='none'; Codehighlighter1_1034_1480_Open_Image.style.display='inline'; Codehighlighter1_1034_1480_Open_Text.style.display='inline';" src="http://www.aygfsteel.com/Images/OutliningIndicators/ContractedSubBlock.gif" align=top>    </span><span style="COLOR: #0000ff">public</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">static</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">void</span><span style="COLOR: #000000"> preIndex() </span><span style="COLOR: #0000ff">throws</span><span style="COLOR: #000000"> Exception</span><span id=Codehighlighter1_1034_1480_Closed_Text style="BORDER-RIGHT: #808080 1px solid; BORDER-TOP: #808080 1px solid; DISPLAY: none; BORDER-LEFT: #808080 1px solid; BORDER-BOTTOM: #808080 1px solid; BACKGROUND-COLOR: #ffffff"><img src="http://www.aygfsteel.com/Images/dot.gif"></span><span id=Codehighlighter1_1034_1480_Open_Text><span style="COLOR: #000000">{<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        String fileName1 </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> </span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">./data/searchShow.txt</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">;<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        String fileName2 </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> </span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">./data/searchShow2.txt</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">;<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        String fileName3 </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> </span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">./data/test.txt</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">;<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        IndexWriter writer </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">new</span><span style="COLOR: #000000"> IndexWriter(directory,</span><span style="COLOR: #0000ff">new</span><span style="COLOR: #000000"> StandardAnalyzer(),</span><span style="COLOR: #0000ff">true</span><span style="COLOR: #000000">);<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        Document doc1 </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> getDocument(fileName1);<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        Document doc2 </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> getDocument(fileName2);<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        Document doc3 </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> getDocument(fileName3);<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        writer.addDocument(doc1);<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        writer.addDocument(doc2);<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        writer.addDocument(doc3);<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        writer.close();<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        <br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/ExpandedSubBlockEnd.gif" align=top>    }</span></span><span style="COLOR: #000000"><br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>    </span><span style="COLOR: #008000">//</span><span style="COLOR: #008000">先徏立烦引才能执?br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>    <br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>    </span><span style="COLOR: #008000">//</span><span style="COLOR: #008000">termQuery   rangeQuery   booleanQuery的查询在SearchercM</span><span style="COLOR: #008000"><br><img id=Codehighlighter1_1601_1887_Open_Image onclick="this.style.display='none'; Codehighlighter1_1601_1887_Open_Text.style.display='none'; Codehighlighter1_1601_1887_Closed_Image.style.display='inline'; Codehighlighter1_1601_1887_Closed_Text.style.display='inline';" src="http://www.aygfsteel.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif" align=top><img id=Codehighlighter1_1601_1887_Closed_Image style="DISPLAY: none" onclick="this.style.display='none'; Codehighlighter1_1601_1887_Closed_Text.style.display='none'; Codehighlighter1_1601_1887_Open_Image.style.display='inline'; Codehighlighter1_1601_1887_Open_Text.style.display='inline';" src="http://www.aygfsteel.com/Images/OutliningIndicators/ContractedSubBlock.gif" align=top></span><span style="COLOR: #000000">    </span><span style="COLOR: #0000ff">public</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">static</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">void</span><span style="COLOR: #000000"> rangeQuery() </span><span style="COLOR: #0000ff">throws</span><span style="COLOR: #000000"> Exception</span><span id=Codehighlighter1_1601_1887_Closed_Text style="BORDER-RIGHT: #808080 1px solid; BORDER-TOP: #808080 1px solid; DISPLAY: none; BORDER-LEFT: #808080 1px solid; BORDER-BOTTOM: #808080 1px solid; BACKGROUND-COLOR: #ffffff"><img src="http://www.aygfsteel.com/Images/dot.gif"></span><span id=Codehighlighter1_1601_1887_Open_Text><span style="COLOR: #000000">{<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        Term startTerm </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">new</span><span style="COLOR: #000000"> Term(</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">lastmodified</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">,</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">20070620</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">);<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        Term endTerm </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">new</span><span style="COLOR: #000000"> Term(</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">lastmodified</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">,</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">20070622</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">);<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        RangeQuery query </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">new</span><span style="COLOR: #000000"> RangeQuery(startTerm,endTerm,</span><span style="COLOR: #0000ff">true</span><span style="COLOR: #000000">);<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        IndexSearcher searcher </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">new</span><span style="COLOR: #000000"> IndexSearcher(directory);<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        Hits hits </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> searcher.search(query);<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        prtHits(hits);<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/ExpandedSubBlockEnd.gif" align=top>    }</span></span><span style="COLOR: #000000"><br><img id=Codehighlighter1_1939_2359_Open_Image onclick="this.style.display='none'; Codehighlighter1_1939_2359_Open_Text.style.display='none'; Codehighlighter1_1939_2359_Closed_Image.style.display='inline'; Codehighlighter1_1939_2359_Closed_Text.style.display='inline';" src="http://www.aygfsteel.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif" align=top><img id=Codehighlighter1_1939_2359_Closed_Image style="DISPLAY: none" onclick="this.style.display='none'; Codehighlighter1_1939_2359_Closed_Text.style.display='none'; Codehighlighter1_1939_2359_Open_Image.style.display='inline'; Codehighlighter1_1939_2359_Open_Text.style.display='inline';" src="http://www.aygfsteel.com/Images/OutliningIndicators/ContractedSubBlock.gif" align=top>    </span><span style="COLOR: #0000ff">public</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">static</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">void</span><span style="COLOR: #000000"> prefixQuery() </span><span style="COLOR: #0000ff">throws</span><span style="COLOR: #000000"> Exception</span><span id=Codehighlighter1_1939_2359_Closed_Text style="BORDER-RIGHT: #808080 1px solid; BORDER-TOP: #808080 1px solid; DISPLAY: none; BORDER-LEFT: #808080 1px solid; BORDER-BOTTOM: #808080 1px solid; BACKGROUND-COLOR: #ffffff"><img src="http://www.aygfsteel.com/Images/dot.gif"></span><span id=Codehighlighter1_1939_2359_Open_Text><span style="COLOR: #000000">{<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        Term term </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">new</span><span style="COLOR: #000000"> Term(</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">fileName</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">,</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">searchShow.txt</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">);<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        Term prefixterm </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">new</span><span style="COLOR: #000000"> Term(</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">fileName</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">,</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">searchShow</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">);<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        IndexSearcher searcher </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">new</span><span style="COLOR: #000000"> IndexSearcher(directory);<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        Query query </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">new</span><span style="COLOR: #000000"> TermQuery(term);<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        Query prefixQuery </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">new</span><span style="COLOR: #000000"> PrefixQuery(prefixterm);<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        Hits hits </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> searcher.search(query);<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        Hits prefixHits </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> searcher.search(prefixQuery);<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        prtHits(hits);<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        System.out.println(</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">----------</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">);<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        prtHits(prefixHits);<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/ExpandedSubBlockEnd.gif" align=top>    }</span></span><span style="COLOR: #000000"><br><img id=Codehighlighter1_2411_2671_Open_Image onclick="this.style.display='none'; Codehighlighter1_2411_2671_Open_Text.style.display='none'; Codehighlighter1_2411_2671_Closed_Image.style.display='inline'; Codehighlighter1_2411_2671_Closed_Text.style.display='inline';" src="http://www.aygfsteel.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif" align=top><img id=Codehighlighter1_2411_2671_Closed_Image style="DISPLAY: none" onclick="this.style.display='none'; Codehighlighter1_2411_2671_Closed_Text.style.display='none'; Codehighlighter1_2411_2671_Open_Image.style.display='inline'; Codehighlighter1_2411_2671_Open_Text.style.display='inline';" src="http://www.aygfsteel.com/Images/OutliningIndicators/ContractedSubBlock.gif" align=top>    </span><span style="COLOR: #0000ff">public</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">static</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">void</span><span style="COLOR: #000000"> phraseQuery() </span><span style="COLOR: #0000ff">throws</span><span style="COLOR: #000000"> Exception</span><span id=Codehighlighter1_2411_2671_Closed_Text style="BORDER-RIGHT: #808080 1px solid; BORDER-TOP: #808080 1px solid; DISPLAY: none; BORDER-LEFT: #808080 1px solid; BORDER-BOTTOM: #808080 1px solid; BACKGROUND-COLOR: #ffffff"><img src="http://www.aygfsteel.com/Images/dot.gif"></span><span id=Codehighlighter1_2411_2671_Open_Text><span style="COLOR: #000000">{<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        IndexSearcher searcher </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">new</span><span style="COLOR: #000000"> IndexSearcher(directory);<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        PhraseQuery query </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">new</span><span style="COLOR: #000000"> PhraseQuery();<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        query.setSlop(</span><span style="COLOR: #000000">2</span><span style="COLOR: #000000">);<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        query.add(</span><span style="COLOR: #0000ff">new</span><span style="COLOR: #000000"> Term(</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">contents</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">,</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">quick</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">));<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        query.add(</span><span style="COLOR: #0000ff">new</span><span style="COLOR: #000000"> Term(</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">contents</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">,</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">fox</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">));<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        Hits hits </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> searcher.search(query);<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        prtHits(hits);<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/ExpandedSubBlockEnd.gif" align=top>    }</span></span><span style="COLOR: #000000"><br><img id=Codehighlighter1_2725_2905_Open_Image onclick="this.style.display='none'; Codehighlighter1_2725_2905_Open_Text.style.display='none'; Codehighlighter1_2725_2905_Closed_Image.style.display='inline'; Codehighlighter1_2725_2905_Closed_Text.style.display='inline';" src="http://www.aygfsteel.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif" align=top><img id=Codehighlighter1_2725_2905_Closed_Image style="DISPLAY: none" onclick="this.style.display='none'; Codehighlighter1_2725_2905_Closed_Text.style.display='none'; Codehighlighter1_2725_2905_Open_Image.style.display='inline'; Codehighlighter1_2725_2905_Open_Text.style.display='inline';" src="http://www.aygfsteel.com/Images/OutliningIndicators/ContractedSubBlock.gif" align=top>    </span><span style="COLOR: #0000ff">public</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">static</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">void</span><span style="COLOR: #000000"> wildcardQuery() </span><span style="COLOR: #0000ff">throws</span><span style="COLOR: #000000"> Exception</span><span id=Codehighlighter1_2725_2905_Closed_Text style="BORDER-RIGHT: #808080 1px solid; BORDER-TOP: #808080 1px solid; DISPLAY: none; BORDER-LEFT: #808080 1px solid; BORDER-BOTTOM: #808080 1px solid; BACKGROUND-COLOR: #ffffff"><img src="http://www.aygfsteel.com/Images/dot.gif"></span><span id=Codehighlighter1_2725_2905_Open_Text><span style="COLOR: #000000">{<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        IndexSearcher searcher </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">new</span><span style="COLOR: #000000"> IndexSearcher(directory);<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        Query query </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">new</span><span style="COLOR: #000000"> WildcardQuery(</span><span style="COLOR: #0000ff">new</span><span style="COLOR: #000000"> Term(</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">contents</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">,</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">?ild*</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">));<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        Hits hits </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> searcher.search(query);<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        prtHits(hits);<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/ExpandedSubBlockEnd.gif" align=top>    }</span></span><span style="COLOR: #000000"><br><img id=Codehighlighter1_2956_3158_Open_Image onclick="this.style.display='none'; Codehighlighter1_2956_3158_Open_Text.style.display='none'; Codehighlighter1_2956_3158_Closed_Image.style.display='inline'; Codehighlighter1_2956_3158_Closed_Text.style.display='inline';" src="http://www.aygfsteel.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif" align=top><img id=Codehighlighter1_2956_3158_Closed_Image style="DISPLAY: none" onclick="this.style.display='none'; Codehighlighter1_2956_3158_Closed_Text.style.display='none'; Codehighlighter1_2956_3158_Open_Image.style.display='inline'; Codehighlighter1_2956_3158_Open_Text.style.display='inline';" src="http://www.aygfsteel.com/Images/OutliningIndicators/ContractedSubBlock.gif" align=top>    </span><span style="COLOR: #0000ff">public</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">static</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">void</span><span style="COLOR: #000000"> fuzzyQuery() </span><span style="COLOR: #0000ff">throws</span><span style="COLOR: #000000"> Exception</span><span id=Codehighlighter1_2956_3158_Closed_Text style="BORDER-RIGHT: #808080 1px solid; BORDER-TOP: #808080 1px solid; DISPLAY: none; BORDER-LEFT: #808080 1px solid; BORDER-BOTTOM: #808080 1px solid; BACKGROUND-COLOR: #ffffff"><img src="http://www.aygfsteel.com/Images/dot.gif"></span><span id=Codehighlighter1_2956_3158_Open_Text><span style="COLOR: #000000">{<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        IndexSearcher searcher </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">new</span><span style="COLOR: #000000"> IndexSearcher(directory);<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        Term term </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">new</span><span style="COLOR: #000000"> Term(</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">contents</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">,</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">wuzza</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">);<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        FuzzyQuery query </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">new</span><span style="COLOR: #000000"> FuzzyQuery(term);<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        Hits hits </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> searcher.search(query);<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        prtHits(hits);<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/ExpandedSubBlockEnd.gif" align=top>    }</span></span><span style="COLOR: #000000"><br><img id=Codehighlighter1_3229_3837_Open_Image onclick="this.style.display='none'; Codehighlighter1_3229_3837_Open_Text.style.display='none'; Codehighlighter1_3229_3837_Closed_Image.style.display='inline'; Codehighlighter1_3229_3837_Closed_Text.style.display='inline';" src="http://www.aygfsteel.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif" align=top><img id=Codehighlighter1_3229_3837_Closed_Image style="DISPLAY: none" onclick="this.style.display='none'; Codehighlighter1_3229_3837_Closed_Text.style.display='none'; Codehighlighter1_3229_3837_Open_Image.style.display='inline'; Codehighlighter1_3229_3837_Open_Text.style.display='inline';" src="http://www.aygfsteel.com/Images/OutliningIndicators/ContractedSubBlock.gif" align=top>    </span><span style="COLOR: #0000ff">public</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">static</span><span style="COLOR: #000000"> Document getDocument(String fileName) </span><span style="COLOR: #0000ff">throws</span><span style="COLOR: #000000"> Exception</span><span id=Codehighlighter1_3229_3837_Closed_Text style="BORDER-RIGHT: #808080 1px solid; BORDER-TOP: #808080 1px solid; DISPLAY: none; BORDER-LEFT: #808080 1px solid; BORDER-BOTTOM: #808080 1px solid; BACKGROUND-COLOR: #ffffff"><img src="http://www.aygfsteel.com/Images/dot.gif"></span><span id=Codehighlighter1_3229_3837_Open_Text><span style="COLOR: #000000">{<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        File file </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">new</span><span style="COLOR: #000000"> File(fileName);<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        Document doc </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">new</span><span style="COLOR: #000000"> Document();<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        doc.add(Field.Keyword(</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">fileName</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">,file.getName() ));<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        Date modified </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">new</span><span style="COLOR: #000000"> Date(file.lastModified());<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        String lastmodified </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">new</span><span style="COLOR: #000000"> SimpleDateFormat(</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">yyyyMMdd</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">).format(modified);<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        doc.add(Field.Keyword(</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">lastmodified</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">, lastmodified));<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        BufferedReader br </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">new</span><span style="COLOR: #000000"> BufferedReader(</span><span style="COLOR: #0000ff">new</span><span style="COLOR: #000000"> InputStreamReader(<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>                </span><span style="COLOR: #0000ff">new</span><span style="COLOR: #000000"> FileInputStream(file)));<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        StringBuffer sb </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">new</span><span style="COLOR: #000000"> StringBuffer();<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        String line </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">null</span><span style="COLOR: #000000">;<br><img id=Codehighlighter1_3732_3756_Open_Image onclick="this.style.display='none'; Codehighlighter1_3732_3756_Open_Text.style.display='none'; Codehighlighter1_3732_3756_Closed_Image.style.display='inline'; Codehighlighter1_3732_3756_Closed_Text.style.display='inline';" src="http://www.aygfsteel.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif" align=top><img id=Codehighlighter1_3732_3756_Closed_Image style="DISPLAY: none" onclick="this.style.display='none'; Codehighlighter1_3732_3756_Closed_Text.style.display='none'; Codehighlighter1_3732_3756_Open_Image.style.display='inline'; Codehighlighter1_3732_3756_Open_Text.style.display='inline';" src="http://www.aygfsteel.com/Images/OutliningIndicators/ContractedSubBlock.gif" align=top>        </span><span style="COLOR: #0000ff">while</span><span style="COLOR: #000000"> ((line </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> br.readLine()) </span><span style="COLOR: #000000">!=</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">null</span><span style="COLOR: #000000">) </span><span id=Codehighlighter1_3732_3756_Closed_Text style="BORDER-RIGHT: #808080 1px solid; BORDER-TOP: #808080 1px solid; DISPLAY: none; BORDER-LEFT: #808080 1px solid; BORDER-BOTTOM: #808080 1px solid; BACKGROUND-COLOR: #ffffff"><img src="http://www.aygfsteel.com/Images/dot.gif"></span><span id=Codehighlighter1_3732_3756_Open_Text><span style="COLOR: #000000">{<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>            sb.append(line);<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/ExpandedSubBlockEnd.gif" align=top>        }</span></span><span style="COLOR: #000000"><br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        br.close();<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        doc.add(Field.Text(</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">contents</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">,sb.toString() ));<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        </span><span style="COLOR: #0000ff">return</span><span style="COLOR: #000000"> doc;<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/ExpandedSubBlockEnd.gif" align=top>    }</span></span><span style="COLOR: #000000"><br><img id=Codehighlighter1_3894_4060_Open_Image onclick="this.style.display='none'; Codehighlighter1_3894_4060_Open_Text.style.display='none'; Codehighlighter1_3894_4060_Closed_Image.style.display='inline'; Codehighlighter1_3894_4060_Closed_Text.style.display='inline';" src="http://www.aygfsteel.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif" align=top><img id=Codehighlighter1_3894_4060_Closed_Image style="DISPLAY: none" onclick="this.style.display='none'; Codehighlighter1_3894_4060_Closed_Text.style.display='none'; Codehighlighter1_3894_4060_Open_Image.style.display='inline'; Codehighlighter1_3894_4060_Open_Text.style.display='inline';" src="http://www.aygfsteel.com/Images/OutliningIndicators/ContractedSubBlock.gif" align=top>    </span><span style="COLOR: #0000ff">public</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">static</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">void</span><span style="COLOR: #000000"> prtHits(Hits hits) </span><span style="COLOR: #0000ff">throws</span><span style="COLOR: #000000"> Exception</span><span id=Codehighlighter1_3894_4060_Closed_Text style="BORDER-RIGHT: #808080 1px solid; BORDER-TOP: #808080 1px solid; DISPLAY: none; BORDER-LEFT: #808080 1px solid; BORDER-BOTTOM: #808080 1px solid; BACKGROUND-COLOR: #ffffff"><img src="http://www.aygfsteel.com/Images/dot.gif"></span><span id=Codehighlighter1_3894_4060_Open_Text><span style="COLOR: #000000">{<br><img id=Codehighlighter1_3930_4057_Open_Image onclick="this.style.display='none'; Codehighlighter1_3930_4057_Open_Text.style.display='none'; Codehighlighter1_3930_4057_Closed_Image.style.display='inline'; Codehighlighter1_3930_4057_Closed_Text.style.display='inline';" src="http://www.aygfsteel.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif" align=top><img id=Codehighlighter1_3930_4057_Closed_Image style="DISPLAY: none" onclick="this.style.display='none'; Codehighlighter1_3930_4057_Closed_Text.style.display='none'; Codehighlighter1_3930_4057_Open_Image.style.display='inline'; Codehighlighter1_3930_4057_Open_Text.style.display='inline';" src="http://www.aygfsteel.com/Images/OutliningIndicators/ContractedSubBlock.gif" align=top>        </span><span style="COLOR: #0000ff">for</span><span style="COLOR: #000000">(</span><span style="COLOR: #0000ff">int</span><span style="COLOR: #000000"> i</span><span style="COLOR: #000000">=</span><span style="COLOR: #000000">0</span><span style="COLOR: #000000">;i</span><span style="COLOR: #000000"><</span><span style="COLOR: #000000">hits.length();i</span><span style="COLOR: #000000">++</span><span style="COLOR: #000000">)</span><span id=Codehighlighter1_3930_4057_Closed_Text style="BORDER-RIGHT: #808080 1px solid; BORDER-TOP: #808080 1px solid; DISPLAY: none; BORDER-LEFT: #808080 1px solid; BORDER-BOTTOM: #808080 1px solid; BACKGROUND-COLOR: #ffffff"><img src="http://www.aygfsteel.com/Images/dot.gif"></span><span id=Codehighlighter1_3930_4057_Open_Text><span style="COLOR: #000000">{<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>            Document doc </span><span style="COLOR: #000000">=</span><span style="COLOR: #000000"> hits.doc(i);<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>            System.out.println(doc.get(</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">fileName</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">));<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>            System.out.println(doc.get(</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">lastmodified</span><span style="COLOR: #000000">"</span><span style="COLOR: #000000">));<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/ExpandedSubBlockEnd.gif" align=top>        }</span></span><span style="COLOR: #000000"><br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/ExpandedSubBlockEnd.gif" align=top>    }</span></span><span style="COLOR: #000000"><br><img id=Codehighlighter1_4118_4228_Open_Image onclick="this.style.display='none'; Codehighlighter1_4118_4228_Open_Text.style.display='none'; Codehighlighter1_4118_4228_Closed_Image.style.display='inline'; Codehighlighter1_4118_4228_Closed_Text.style.display='inline';" src="http://www.aygfsteel.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif" align=top><img id=Codehighlighter1_4118_4228_Closed_Image style="DISPLAY: none" onclick="this.style.display='none'; Codehighlighter1_4118_4228_Closed_Text.style.display='none'; Codehighlighter1_4118_4228_Open_Image.style.display='inline'; Codehighlighter1_4118_4228_Open_Text.style.display='inline';" src="http://www.aygfsteel.com/Images/OutliningIndicators/ContractedSubBlock.gif" align=top>    </span><span style="COLOR: #0000ff">public</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">static</span><span style="COLOR: #000000"> </span><span style="COLOR: #0000ff">void</span><span style="COLOR: #000000"> main(String[] args) </span><span style="COLOR: #0000ff">throws</span><span style="COLOR: #000000"> Exception</span><span id=Codehighlighter1_4118_4228_Closed_Text style="BORDER-RIGHT: #808080 1px solid; BORDER-TOP: #808080 1px solid; DISPLAY: none; BORDER-LEFT: #808080 1px solid; BORDER-BOTTOM: #808080 1px solid; BACKGROUND-COLOR: #ffffff"><img src="http://www.aygfsteel.com/Images/dot.gif"></span><span id=Codehighlighter1_4118_4228_Open_Text><span style="COLOR: #000000">{<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top>        preIndex();<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top></span><span style="COLOR: #008000">//</span><span style="COLOR: #008000">        rangeQuery();<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top></span><span style="COLOR: #008000">//</span><span style="COLOR: #008000">        prefixQuery();<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top></span><span style="COLOR: #008000">//</span><span style="COLOR: #008000">        phraseQuery();<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top></span><span style="COLOR: #008000">//</span><span style="COLOR: #008000">        wildcardQuery();</span><span style="COLOR: #008000"><br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/InBlock.gif" align=top></span><span style="COLOR: #000000">        fuzzyQuery();<br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/ExpandedSubBlockEnd.gif" align=top>    }</span></span><span style="COLOR: #000000"><br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/ExpandedBlockEnd.gif" align=top>}</span></span><span style="COLOR: #000000"><br><img src="http://www.aygfsteel.com/Images/OutliningIndicators/None.gif" align=top></span></div> <img src ="http://www.aygfsteel.com/dreamstone/aggbug/125574.html" width = "1" height = "1" /><br><br><div align=right><a style="text-decoration:none;" href="http://www.aygfsteel.com/dreamstone/" target="_blank">dreamstone</a> 2007-06-21 15:08 <a href="http://www.aygfsteel.com/dreamstone/archive/2007/06/21/125574.html#Feedback" target="_blank" style="text-decoration:none;">发表评论</a></div>]]></description></item><item><title>lucene的丰富的各种查询(一)http://www.aygfsteel.com/dreamstone/archive/2007/06/21/125573.htmldreamstonedreamstoneThu, 21 Jun 2007 07:06:00 GMThttp://www.aygfsteel.com/dreamstone/archive/2007/06/21/125573.htmlhttp://www.aygfsteel.com/dreamstone/comments/125573.htmlhttp://www.aygfsteel.com/dreamstone/archive/2007/06/21/125573.html#Feedback0http://www.aygfsteel.com/dreamstone/comments/commentRss/125573.htmlhttp://www.aygfsteel.com/dreamstone/services/trackbacks/125573.htmlterm查询、queryParser查询 ,booleanQuery
package search;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class Searcher {
    
public static void termQuery() throws Exception{
        Directory directory 
= FSDirectory.getDirectory("./index"false); 
        IndexSearcher searcher 
= new IndexSearcher(directory);
        Term t 
= new Term("body","document");
        Query query 
= new TermQuery(t);
        Hits hits 
= searcher.search(query);
        System.out.println(hits.length());
    }

    
public static void queryParser() throws Exception{
        Directory directory 
= FSDirectory.getDirectory("./index"false); 
        IndexSearcher searcher 
= new IndexSearcher(directory);
        Query query 
= QueryParser.parse("text","body",new StandardAnalyzer());
        Hits hits 
= searcher.search(query);
        System.out.println(hits.length());
    }

    
public static void booleanQuery() throws Exception{
        Query parseQuery 
= QueryParser.parse("text","body",new StandardAnalyzer());
        Term t 
= new Term("body","document");
        Query termQuery 
= new TermQuery(t);
        BooleanQuery boolQuery 
= new BooleanQuery();
        boolQuery.add(parseQuery,
true,false);
        boolQuery.add(termQuery,
true,false);
        
        Directory directory 
= FSDirectory.getDirectory("./index"false); 
        IndexSearcher searcher 
= new IndexSearcher(directory);
        Hits hits 
= searcher.search(boolQuery);
        System.out.println(hits.length());
    }

    
public static void main(String[] args) throws Exception{
        termQuery();
        queryParser();
        booleanQuery();
    }

}




dreamstone 2007-06-21 15:06 发表评论
]]>
比较lucene各种英文分析器Analyzerhttp://www.aygfsteel.com/dreamstone/archive/2007/06/20/125372.htmldreamstonedreamstoneWed, 20 Jun 2007 08:46:00 GMThttp://www.aygfsteel.com/dreamstone/archive/2007/06/20/125372.htmlhttp://www.aygfsteel.com/dreamstone/comments/125372.htmlhttp://www.aygfsteel.com/dreamstone/archive/2007/06/20/125372.html#Feedback1http://www.aygfsteel.com/dreamstone/comments/commentRss/125372.htmlhttp://www.aygfsteel.com/dreamstone/services/trackbacks/125372.htmlSimpleAnalyzer
StandardAnalyzer
WhitespaceAnalyzer
StopAnalyzer
package analyzer;

import java.io.Reader;
import java.io.StringReader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;

public class TestAnalyzer {
    
private static String testString1 = "The quick brown fox jumped over the lazy dogs";
    
private static String testString2 = "xy&z mail is - xyz@sohu.com";
    
public static void testWhitespace(String testString) throws Exception{
        Analyzer analyzer 
= new WhitespaceAnalyzer();      
        Reader r 
= new StringReader(testString);      
        Tokenizer ts 
= (Tokenizer) analyzer.tokenStream("", r);      
        System.err.println(
"=====Whitespace analyzer====");
        System.err.println(
"分析ҎQ空格分?/span>");
        Token t;      
        
while ((t = ts.next()) != null{      
           System.out.println(t.termText());      
        }
     
    }

    
public static void testSimple(String testString) throws Exception{
        Analyzer analyzer 
= new SimpleAnalyzer();      
        Reader r 
= new StringReader(testString);      
        Tokenizer ts 
= (Tokenizer) analyzer.tokenStream("", r);      
        System.err.println(
"=====Simple analyzer====");
        System.err.println(
"分析ҎQ空格及各种W号分割");
        Token t;      
        
while ((t = ts.next()) != null{      
           System.out.println(t.termText());      
        }
     
    }

    
public static void testStop(String testString) throws Exception{
        Analyzer analyzer 
= new StopAnalyzer();      
        Reader r 
= new StringReader(testString);      
        StopFilter sf 
= (StopFilter) analyzer.tokenStream("", r);
        System.err.println(
"=====stop analyzer====");  
        System.err.println(
"分析ҎQ空格及各种W号分割,L停止词,停止词包?nbsp;is,are,in,on,the{无实际意义的词");
        
//停止?/span>
        Token t;      
        
while ((t = sf.next()) != null{      
           System.out.println(t.termText());      
        }
     
    }

    
public static void testStandard(String testString) throws Exception{
        Analyzer analyzer 
= new StandardAnalyzer();      
        Reader r 
= new StringReader(testString);      
        StopFilter sf 
= (StopFilter) analyzer.tokenStream("", r);
        System.err.println(
"=====standard analyzer====");
        System.err.println(
"分析ҎQ؜合分?包括了去掉停止词Q支持汉?/span>");
        Token t;      
        
while ((t = sf.next()) != null{      
            System.out.println(t.termText());      
        }
     
    }

    
public static void main(String[] args) throws Exception{
//        String testString = testString1;
        String testString = testString2;
        System.out.println(testString);
        testWhitespace(testString);
        testSimple(testString);
        testStop(testString);
        testStandard(testString);
    }


}



dreamstone 2007-06-20 16:46 发表评论
]]>
lucene建立索引时候的用到的一些文档和目录操作http://www.aygfsteel.com/dreamstone/archive/2007/06/20/125369.htmldreamstonedreamstoneWed, 20 Jun 2007 08:43:00 GMThttp://www.aygfsteel.com/dreamstone/archive/2007/06/20/125369.htmlhttp://www.aygfsteel.com/dreamstone/comments/125369.htmlhttp://www.aygfsteel.com/dreamstone/archive/2007/06/20/125369.html#Feedback0http://www.aygfsteel.com/dreamstone/comments/commentRss/125369.htmlhttp://www.aygfsteel.com/dreamstone/services/trackbacks/125369.html1,按照~号删除
public void deleteDoc(String indexDir) throws Exception{
        IndexReader reader 
= IndexReader.open(indexDir);
        reader.delete(
1);
        reader.close();
    }
2,Ҏterm来删?br>
public void deleteDocWithTerm(String indexDir) throws Exception{
        IndexReader reader 
= IndexReader.open(indexDir);
        reader.delete(
new Term("city","beijing"));
        reader.close();
    }
  3,取消删除
public void undeleteDoc(String indexDir) throws Exception{
        IndexReader reader 
= IndexReader.open(indexDir);
        reader.delete(
1);
        reader.undeleteAll();
        reader.close();
    }
4,删除后优?br>
public void mergeDoc(String indexDir) throws Exception{
        IndexReader reader 
= IndexReader.open(indexDir);
        reader.delete(
new Term("city","beijing"));
        reader.close();
        
        IndexWriter writer 
= new IndexWriter(indexDir,new StandardAnalyzer(),true);
        writer.optimize();
        writer.close();
    }

5,把RAMDirectory中的索引合ƈ到FSDirectroy?br>
public void indexOperator(String indexDir) throws Exception{
        FSDirectory fsDir 
= FSDirectory.getDirectory(indexDir,true);
        RAMDirectory ramDir 
= new RAMDirectory();
        IndexWriter fsWriter 
= new  IndexWriter(fsDir,new SimpleAnalyzer(),true);
        IndexWriter ramWriter 
= new IndexWriter(ramDir,new SimpleAnalyzer(),true);
        
//..ramWriter add doc
        fsWriter.addIndexes(new Directory[]{ramDir});
        ramWriter.close();
        
    }
6,把烦引的内容直接d内存
RAMDirectory ramDir = new RAMDirectory(indexDir);


dreamstone 2007-06-20 16:43 发表评论
]]>
lucene 索引非txt文档 (pdf word rtf html xml)http://www.aygfsteel.com/dreamstone/archive/2007/06/14/124286.htmldreamstonedreamstoneThu, 14 Jun 2007 05:27:00 GMThttp://www.aygfsteel.com/dreamstone/archive/2007/06/14/124286.htmlhttp://www.aygfsteel.com/dreamstone/comments/124286.htmlhttp://www.aygfsteel.com/dreamstone/archive/2007/06/14/124286.html#Feedback0http://www.aygfsteel.com/dreamstone/comments/commentRss/124286.htmlhttp://www.aygfsteel.com/dreamstone/services/trackbacks/124286.html索引ҎQ就是先把各U文档先转化成纯文本再烦引,所以关键在转换上。幸好java世界中有太多的开源工E,很多都可以拿来直接用。下边分别介l一下:
写在所有之前:下边所有介l中的is参数都是inputStreamQ就是被索引的文件?br>word文档Q?br>把word文档转换成纯文本的开源工E可以用:POI 或者TextMining
POI的用方法:
 WordDocument wd = new WordDocument(is);
      StringWriter docTextWriter 
= new StringWriter();
      wd.writeAllText(
new PrintWriter(docTextWriter));
      docTextWriter.close();
      bodyText 
= docTextWriter.toString();
TextMining的用方法更单:
bodyText = new WordExtractor().extractText(is);

PDF文档Q?br>转换PDF文档可以使用的类库是PDFbox
COSDocument cosDoc = null;
   PDFParser parser = new PDFParser(is);
    parser.parse();
cosDoc 
= parser.getDocument()
if (cosDoc.isEncrypted()) {
        DecryptDocument decryptor 
= new DecryptDocument(cosDoc);
        decryptor.decryptDocument(password);
 }
PDFTextStripper stripper 
= new PDFTextStripper();
String docText 
= stripper.getText(new PDDocument(cosDoc));
RTF文档Q?br>rtf的{换则在javax中就?br>
DefaultStyledDocument styledDoc = new DefaultStyledDocument();
new RTFEditorKit().read(is, styledDoc, 0);
      String bodyText 
= styledDoc.getText(0, styledDoc.getLength());
q样可以烦引各U格式的文本?br>
html和xml的处理方法同?br>不同的是html的可用类库是QJTidy
Xml可用的类库是SAX和digester

dreamstone 2007-06-14 13:27 发表评论
]]>
apache lucene 的核心类http://www.aygfsteel.com/dreamstone/archive/2007/06/12/123531.htmldreamstonedreamstoneTue, 12 Jun 2007 01:52:00 GMThttp://www.aygfsteel.com/dreamstone/archive/2007/06/12/123531.htmlhttp://www.aygfsteel.com/dreamstone/comments/123531.htmlhttp://www.aygfsteel.com/dreamstone/archive/2007/06/12/123531.html#Feedback0http://www.aygfsteel.com/dreamstone/comments/commentRss/123531.htmlhttp://www.aygfsteel.com/dreamstone/services/trackbacks/123531.html主要有两部分l成Q核心烦引类和核心搜索类Q顾名思意Q就是用来徏立烦引和用来搜烦的类?br>
IndexWriterQ可以对索引q行写操作,但不能读取或者搜索。是唯一能写索引的类?br>
DirectoryQDirectorycM表一个Lucene索引的位|。它是一个抽象类Q允许它的子c?其中的两个包含在Lucene?在合适时存储索引。在我们的IndexerCZ中,我们使用一个实际文件系l目录的路径传递给IndexWriter的构造函数来获得Directory的一个实例。IndexWriter然后使用Directory的一个具体实现FSDirectoryQƈ在文件系l的一个目录中创徏索引。在你的应用E序中,你可能较喜欢Lucene索引存储在磁盘上。这时可以用FSDirectoryQ一个包含文件系l真实文件列表的Driectory子类Q如同我们在Indexer中一栗另一个Directory的具体子cLRAMDirectory。尽它提供了与FSDirectory相同的接口,RAMDirectory它的所有数据加载到内存中。所以这个实现对较小索引很有用处Q可以全部加载到内存中ƈ在程序关闭时销毁。因为所有数据加载到快速存取的内存中而不是在慢速的盘上,RAMDirectory适合于你需要快速访问烦引的情况Q不是索引或搜索。做为实例,Lucene的开发者在所有他们的单元试中做了扩展用:当测试运行时Q快速的内存ȝ索引被创建搜索,当测试结束时Q烦引自动销毁,不会在磁盘上留下MD余。当Ӟ在将文g~存到内存的操作pȝ中用时RAMDirectory和FSDirectory之间的性能差别较小

Analyzer:分析文本内容Q提取关键字

Document:一个Document代表字段的集合。你可以把它惌Z后可获取的虚拟文档—一块数据,如一个网c一个邮件消息或一个文本文件。一个文档的字段代表q个文档或与q个文档相关的元数据

Field:在烦引中的每个Document含有一个或多个字段Q具体化为FieldcR每个字D늛应于数据的一个片D,在搜烦时查询或从烦引中重新获取?br>Lucene提供四个不同的字D늱型,你可以从中做出选择Q?br>
Keyword—不被分析,但是被烦引ƈ逐字存储到烦引中。这个类型适合于原始值需要保持原L字段Q如URL、文件系l\径、日期、个人名U、社会安全号码、电话号码等{。例如,我们在Indexer(列表1.1)中把文gpȝ路径作ؓKeyword字段?br>
UnIndexed—不被分析也不被索引Q但是它的值存储到索引中。这个类型适合于你需要和搜烦l果一hC的字段(如URL或数据库主键)Q但是你从不直接搜烦它的倹{因U类型字D늚原始值存储在索引中,q种cd不适合于存放比较巨大的|如果索引大小是个问题的话?br>
UnStored—和UnIndexed相反。这个字D늱型被分析q烦引但是不存储在烦引中。它适合于烦引大量的文本而不需要以原始形式重新获得它。例如网늚M或Q休其它类型的文本文档?br>
Text—被分析q烦引。这意味着q种cd的字D可以被搜烦Q但是要心字段大小。如果要索引的数据是一个StringQ它也被存储Q但如果数据(如我们的Indexer例子)是来自一个ReaderQ它׃会被存储。这通常是؜q来源Q所以在使用Field.Text时要注意q个区别?br>所有字D는名称和值组成。你要用哪U字D늱型取决于你要如何使用q个字段和它的倹{严格来_Lucene只有一个字D늱型:以各自特征来区分的字Dc有些是被分析的Q有些不是;有些是被索引Q然面有些被逐字地存储等{?br>注意 注意Field.Text(String, String)和Field.Text(String, Reader)之间的区别。String变量存储字段数据Q而Reader变量不存储。ؓ索引一个String而又不想存储它,可以用Field.UnStored(String, String)

下边是核心搜索类Q?br>IndexSearcherQIndexSearcher用来搜烦而IndexWriter用来索引Q暴露几个搜索方法的索引的主要链接。你可以把IndexSearcher惌Z只读方式打开索引的一个类。它提供几个搜烦ҎQ其中一些在抽象基类Searcher中实玎ͼ最单的接受单个Query对象做ؓ参数q返回一个Hits对象。这个方法的典型应用cMq样Q?br>
IndexSearcher is = new IndexSearcher(
FSDirectory.getDirectory(“
/tmp/index”, false));
Query q 
= new TermQuery(new Term(“contents”, “lucene”));
Hits hits 
= is.search(q);

Term:
Term是搜索的基本单元。与Field对象cMQ它׃对字W串元素l成Q字D늚名称和字D늚倹{注意Term对象也和索引q程有关。但是它们是由Lucene内部生成Q所以在索引时你
一般不必考虑它们。在搜烦Ӟ你可能创建Term对象qTermQuery同时使用?br>Query q = new TermQuery(new Term(“contents”, “lucene”));
Hits hits = is.search(q);
q段代码使Lucene扑և在contents字段中含有单词lucene的所有文档。因为TermQuery对象l承自它的抽象父cQueryQ你可以在等式的左边用Querycd?br>
Query
Lucene中包含一些Query的具体子cR到目前为止Q在本章中我们仅提到q最基本的Lucene QueryQTermQuery。其它Querycd有BooleanQueryQPhraseQuery, PrefixQuery, PhrasePrefixQuery, RangeQuery, FilteredQuery和SpanQuery

TermQuery
TermQuery是Lucene支持的最基本的查询类型,q且它也是最原始的查询类型之一。它用来匚w含有指定值的字段的文?

HitscL一个搜索结?匚wl定查询的文?文档队列指针的简单容器。基于性能考虑QHits的实例ƈ不从索引中加载所有匹配查询的所有文档,而是每次一部?

dreamstone 2007-06-12 09:52 发表评论
]]>
apache lucene 一个最单的实例http://www.aygfsteel.com/dreamstone/archive/2007/06/12/123528.htmldreamstonedreamstoneTue, 12 Jun 2007 01:46:00 GMThttp://www.aygfsteel.com/dreamstone/archive/2007/06/12/123528.htmlhttp://www.aygfsteel.com/dreamstone/comments/123528.htmlhttp://www.aygfsteel.com/dreamstone/archive/2007/06/12/123528.html#Feedback0http://www.aygfsteel.com/dreamstone/comments/commentRss/123528.htmlhttp://www.aygfsteel.com/dreamstone/services/trackbacks/123528.html阅读全文

dreamstone 2007-06-12 09:46 发表评论
]]>
apache lucene介绍http://www.aygfsteel.com/dreamstone/archive/2007/06/11/123317.htmldreamstonedreamstoneMon, 11 Jun 2007 02:42:00 GMThttp://www.aygfsteel.com/dreamstone/archive/2007/06/11/123317.htmlhttp://www.aygfsteel.com/dreamstone/comments/123317.htmlhttp://www.aygfsteel.com/dreamstone/archive/2007/06/11/123317.html#Feedback0http://www.aygfsteel.com/dreamstone/comments/commentRss/123317.htmlhttp://www.aygfsteel.com/dreamstone/services/trackbacks/123317.html 
什么是lucene
Apache Lucene是一个开放源E序的搜d引擎Q利用它可以LCؓJava软g加入全文搜寻功能。Lucene的最主要工作是替文g的每一个字作烦引,索引让搜ȝ效率比传l的逐字比较大大提高QLucen提供一l解读,qoQ分析文Ӟ~排和用烦引的APIQ它的强大之处除了高效和单外Q是最重要的是使用者可以随时应自已需要自订其功能?Lucene是apache软g基金会项目组的一个子目Q是一个开放源代码的全文检索引擎工具包Q即它不是一个完整的全文索引擎,而是一个全文检索引擎的架构Q提供了完整的查询引擎和索引引擎Q部分文本分析引擎。Lucene的目的是Y件开发h员提供一个简单易用的工具包,以方便的在目标系l中实现全文索的功能Q或者是以此为基建立起完整的全文索引擎?

Lucene的作者:
Lucene的原作者是Doug CuttingQ他是一位资深全文烦?索专Ӟ曄是V-Twin搜烦引擎的主要开发者,后在Excite担Q高pȝ架构设计师,目前从事于一些Internet底层架构的研I?/p>

Lucene的历Ԍ
早先发布在作者自qhttp://www.lucene.com/Q后来发布在SourceForgeQ?001q年底成为apache软g基金会jakarta的一个子目?现在则是apache的顶U项?a >http://lucene.apache.org/

Lucene应用:
apache软g基金会的|站使用了Lucene作ؓ全文索的引擎
IBM的开源Y件eclipse也采用了Lucene作ؓ帮助子系l的全文索引引擎Q?br>相应的IBM的商业Y件Web Sphere中也采用了Lucene?br>著名的Jive论坛使用了它
EyebrowsQEyeBrows是目前APACHE目的主要邮件列表归档系l)邮g列表HTML归档/览/查询pȝ
也用了?br>Lucene以其开放源代码的特性、优异的索引l构、良好的pȝ架构获得了越来越多的应用?br>Cocoon:ZXML的web发布框架Q全文检索部分用了Lucene
到现在lucene已经有C++、C#、Python和Perl的版?br>更多关于lucene的应用见q里Q?a >http://wiki.apache.org/lucene-java/PoweredBy

Lucene能做什?
Lucene使你可以Z的应用程序添加烦引和搜烦能力(q些功能在1.3节中描述)。Lucene可以索引q能使得可以转换成文本格式的M数据能够被搜索。在?.5可以看出QLuceneq不兛_数据的来源、格式甚臛_的语aQ只要你能将它{换ؓ文本。这意味着你可l烦引ƈ搜烦存放于文件中的数据:在远E服务器上的web面Q存于本地文件系l的文档Q简单的文本文gQ微软Word文档QHTML或PDF文g或Q何其它能够提取出文本信息的格式?

同样Q利用Lucene你可以烦引存放于数据库中的数据,提供l用户很多数据库没有提供?/p>

全文搜烦的能力。一旦你集成了LuceneQ你的应用程序的用户p够像q样来搜索:+George +Rice –eat –pudding, Apple –pie +Tiger, animal:monkey AND food:banana{等。利用LuceneQ你可以索引和搜索email邮gQ邮件列表档案,x聊天记录Q你的Wiki面……{等更多?

Lucene资料Q?br>Lucene主页Q?a >http://lucene.apache.org/

中文的lucene教程Q?http://www.chedong.com/tech/lucene.html#intro

写的很好的lucene书:lucene in action

luceneAPIQ?a >http://lucene.zones.apache.org:8080/hudson/job/Lucene-Nightly/javadoc/

lucene in action CZ代码:http://www.manning.com/hatcher2

lucene 的wikiQ?a >http://wiki.apache.org/lucene-java/FrontPage?action=show&redirect=FrontPageEN

Lucene的优点:
Q?Q烦引文件格式独立于应用q_。Lucene定义了一套以8位字节ؓ基础的烦引文件格式,使得兼容pȝ或者不同^台的应用能够׃n建立的烦引文件?
Q?Q在传统全文索引擎的倒排索引的基上,实现了分块烦引,能够针对新的文g建立文件烦引,提升索引速度。然后通过与原有烦引的合ƈQ达C化的目的?
Q?Q优U的面向对象的pȝ架构Q得对于Lucene扩展的学习难度降低,方便扩充新功能?
Q?Q设计了独立于语a和文件格式的文本分析接口Q烦引器通过接受Token完成烦引文件的创立Q用h展新的语a和文件格式,只需要实现文本分析的接口?
Q?Q已l默认实C一套强大的查询引擎Q用h需自己~写代码即ɾpȝ可获得强大的查询能力QLucene的查询实C默认实现了布操作、模p查询、分l查询等{?

Lucene的周?br>Nutch vs Lucene
Lucene 不是完整的应用程序,而是一个用于实现全文检索的软g库?br>Nutch 是一个应用程序,可以?Lucene 为基实现搜烦引擎应用?/p>

Nutch vs Larbin
"Larbin只是一个爬虫,也就是说larbin只抓取网,至于如何parse的事情则q戯己完成。另外,如何存储到数据库以及建立索引的事?larbin也不提供Nutch vs Larbin
"Larbin只是一个爬虫,也就是说larbin只抓取网,至于如何parse的事情则q戯己完成。另外,如何存储到数据库以及建立索引的事?larbin也不提供

Nutch 则还可以存储到数据库q徏立烦引?br>

搜烦的概?本段来自lucene in action原文)Q?/span>

索引和搜?span lang=EN-US>

所有搜索引擎的核心是索引的概念:原始数据处理成一个高效的交差引用的查扄构以便于快速的搜烦。让我们对烦引和搜烦q程做一ơ快速的高层ơ的览?span lang=EN-US>

 

什么是索引Qؓ什么它很重要?

惛_一下,你需要搜索大量的文gQƈ且你x出包含一个指定的词或短语的文件。你如何~写一个程序来做到q个Q一个幼E的Ҏ是针对给定的词或短语序扫描每个文g。这个方法有很多~点Q最明显的就是它不适合于大量的文g或者文仉常巨大的情况。这时就出现了烦引:Z快速搜索大量的文本Q你必须首先索引那个文本然后把它转化Z个可以让你快速搜索的格式Q除ȝ慢的序地扫描过E。这个{化过E称为烦引,它的输出UCؓ一条烦引。你可以把烦引理解ؓ一个可以让你快速随问存于其内部的词的数据结构。它隐含的概늱g一本书最后的索引Q可以让你快速找到讨论指定主题的面。在Lucene中,一个烦引是一个精心设计的数据l构Q在文gpȝ中存储ؓ一l烦引文件。我们在附录B中详l地说明了烦引文件的l构Q但是目前你只须认ؓLucene的烦引是一个能快速的词汇查找的工兗?

 

什么是搜烦Q?span lang=EN-US>

搜烦是在一个烦引中查找单词来找出它们所出现的文档的q程。一个搜索的质量用精度和召回率来描q。召回率衡量搜烦pȝ搜烦到相x档的能力Q精度衡量pȝqo不相x档的能力。然而,在考虑搜烦时你必须考虑其它一些因素。我们已l提到速度和快速搜索大量文本的能力。支持单个和多个词汇的查询,短语查询Q通配W,l果分和排序也是很重要的,在输入这些查询的时候也是友好的语法?/span>Lucene强大的Y件库提供了大量的搜烦特征?/span>bells?/span>whistlesQ?/span>




dreamstone 2007-06-11 10:42 发表评论
]]>
վ֩ģ壺 | ˼| ʡ| α| տ| ɽ| ɽ| ˮ| ɳƺ| º| | | | dz| Ϲ| | ƽ| | Ԫ| | | ̨| ʯ| ؼ| ԭ| | ʡ| | ̨| | ī| | Ȫ| е| | | | | ¡| ɽ| |