DANCE WITH JAVA

          開發出高質量的系統

          常用鏈接

          統計

          積分與排名

          好友之家

          最新評論

          比較lucene各種英文分析器Analyzer

          比較常用的幾種英文分析器,他們之間的區別見程序中的注釋。
          SimpleAnalyzer
          StandardAnalyzer
          WhitespaceAnalyzer
          StopAnalyzer
          package analyzer;

          import java.io.Reader;
          import java.io.StringReader;

          import org.apache.lucene.analysis.Analyzer;
          import org.apache.lucene.analysis.SimpleAnalyzer;
          import org.apache.lucene.analysis.StopAnalyzer;
          import org.apache.lucene.analysis.StopFilter;
          import org.apache.lucene.analysis.Token;
          import org.apache.lucene.analysis.Tokenizer;
          import org.apache.lucene.analysis.WhitespaceAnalyzer;
          import org.apache.lucene.analysis.standard.StandardAnalyzer;

          public class TestAnalyzer {
              
          private static String testString1 = "The quick brown fox jumped over the lazy dogs";
              
          private static String testString2 = "xy&z mail is - xyz@sohu.com";
              
          public static void testWhitespace(String testString) throws Exception{
                  Analyzer analyzer 
          = new WhitespaceAnalyzer();      
                  Reader r 
          = new StringReader(testString);      
                  Tokenizer ts 
          = (Tokenizer) analyzer.tokenStream("", r);      
                  System.err.println(
          "=====Whitespace analyzer====");
                  System.err.println(
          "分析方法:空格分割");
                  Token t;      
                  
          while ((t = ts.next()) != null{      
                     System.out.println(t.termText());      
                  }
               
              }

              
          public static void testSimple(String testString) throws Exception{
                  Analyzer analyzer 
          = new SimpleAnalyzer();      
                  Reader r 
          = new StringReader(testString);      
                  Tokenizer ts 
          = (Tokenizer) analyzer.tokenStream("", r);      
                  System.err.println(
          "=====Simple analyzer====");
                  System.err.println(
          "分析方法:空格及各種符號分割");
                  Token t;      
                  
          while ((t = ts.next()) != null{      
                     System.out.println(t.termText());      
                  }
               
              }

              
          public static void testStop(String testString) throws Exception{
                  Analyzer analyzer 
          = new StopAnalyzer();      
                  Reader r 
          = new StringReader(testString);      
                  StopFilter sf 
          = (StopFilter) analyzer.tokenStream("", r);
                  System.err.println(
          "=====stop analyzer====");  
                  System.err.println(
          "分析方法:空格及各種符號分割,去掉停止詞,停止詞包括 is,are,in,on,the等無實際意義的詞");
                  
          //停止詞
                  Token t;      
                  
          while ((t = sf.next()) != null{      
                     System.out.println(t.termText());      
                  }
               
              }

              
          public static void testStandard(String testString) throws Exception{
                  Analyzer analyzer 
          = new StandardAnalyzer();      
                  Reader r 
          = new StringReader(testString);      
                  StopFilter sf 
          = (StopFilter) analyzer.tokenStream("", r);
                  System.err.println(
          "=====standard analyzer====");
                  System.err.println(
          "分析方法:混合分割,包括了去掉停止詞,支持漢語");
                  Token t;      
                  
          while ((t = sf.next()) != null{      
                      System.out.println(t.termText());      
                  }
               
              }

              
          public static void main(String[] args) throws Exception{
          //        String testString = testString1;
                  String testString = testString2;
                  System.out.println(testString);
                  testWhitespace(testString);
                  testSimple(testString);
                  testStop(testString);
                  testStandard(testString);
              }


          }

          posted on 2007-06-20 16:46 dreamstone 閱讀(4024) 評論(2)  編輯  收藏 所屬分類: 搜索引擎lucence

          評論

          # re: 比較lucene各種英文分析器Analyzer 2007-06-20 18:02 good

          不錯  回復  更多評論   

          # re: 比較lucene各種英文分析器Analyzer 2008-06-21 18:03 美女

          Me with the floorshow
          Kickin' with your torso
          Boys getting high
          And the girls even more so
          Wave your hands if your not with the man
          Can I kick it?
          (Yes you can)
          I got
          (Funk)
          You got
          (Soul)
          We got everybody
          I've got the gift
          Gonna stick it in the goal
          It's time to move your body
          Babylon back in business
          Can I get a witness?
          Every girl, every man
          Houston, can you hear me?
          Ground control, can you feel me?
          Need permission to land
          I don't wanna rock, DJ
          But your making me feel so nice
          When's it gonna stop, DJ?
          Cos you're keepin' me up all night

          Singin' in the classes
          Music for your masses
          Give no head
          No backstage passes
          Have a proper giggle
          I'll be quite polite
          But when I rock the mic
          I rock the mic
          (Right)
          You got no love, then you're with the wrong man
          It's time to move your body
          If you can't get a girl
          But your best friend can
          It's time to move your body
          I don't wanna be sleazy
          Baby just tease me
          Got no family planned
          Houston, can you hear me?
          Need permission to land
          But you're making me feel so nice
          When's it gonna stop, DJ?
          Cos you're keeping me up all night
          I don't wanna rock, DJ
          But you're making me feel so nice
          When's it gonna stop, DJ?
          Cos your keeping me up all night
          Pimpin' aint easy
          Most of them fleece me
          Every night
          Pimpin' ain't easy
          But if you're sellin' it
          It's alright
          Come on
          I don't wanna rock, DJ
          But you're making me feel so nice
          When's it gonna stop, DJ?
          Cos you're keeping me up all night
          I don't wanna rock, DJ
          But you're making me feel so nice
          When's it gonna stop, DJ?
          Cos you're keeping me up all night
          I don't wanna rock, DJ
          But you're making me feel so nice
          When's it gonna stop, DJ?
          Cos you're keeping me up all night
          I don't wanna rock, DJ
          But you're making me feel so nice
          When's it gonna stop, DJ?
          Cos you're keeping me up all night   回復  更多評論   

          主站蜘蛛池模板: 石楼县| 全南县| 榆林市| 顺平县| 延长县| 宜章县| 渭南市| 嘉定区| 宿迁市| 淮滨县| 鹿邑县| 临邑县| 东兴市| 德江县| 华蓥市| 隆林| 囊谦县| 温泉县| 元谋县| 措美县| 临桂县| 黑河市| 博湖县| 和平县| 平泉县| 淳化县| 沅江市| 依兰县| 宾川县| 廊坊市| 西宁市| 东海县| 江门市| 新巴尔虎左旗| 雷州市| 武定县| 玉屏| 四平市| 潍坊市| 夏河县| 罗江县|