隨筆-295  評(píng)論-26  文章-1  trackbacks-0
          字符串切分
          ?package demo.analysis;
          ?
          ?import java.io.IOException;
          ?
          ?import jeasy.analysis.MMAnalyzer;
          ?
          ?public class Segment?
          ?{
          ????????
          ???? public static void main(String[] args)?
          ???? {
          ???????? String text = "據(jù)路透社報(bào)道,印度尼西亞社會(huì)事務(wù)部一官員星期二(29日)表示,"
          ??????????????? + "日惹市附近當(dāng)?shù)貢r(shí)間27日晨5時(shí)53分發(fā)生的里氏6.2級(jí)地震已經(jīng)造成至少5427人死亡,"
          ??????????????? + "20000余人受傷,近20萬人無家可歸。";?
          ?????????
          ???????? MMAnalyzer analyzer = new MMAnalyzer();
          ???????? try?
          ???????? {
          ???????????????? System.out.println(analyzer.segment(text, " | "));
          ???????? }?
          ???????? catch (IOException e)?
          ???????? {
          ???????????????? e.printStackTrace();
          ???????? }
          ???? }
          ?}
          ?
          生成效果:
          據(jù) | 路透社 | 報(bào)道 | 印度尼西亞 | 社會(huì) | 事務(wù) | 部 | 官員 | 星期二 | 29日 | 表示 | 日惹 | 市 |
          附近 | 當(dāng)?shù)貢r(shí)間 | 27日 | 晨 | 5時(shí) | 53分 | 發(fā)生 | 里氏 | 6.2級(jí) | 地震 | 已經(jīng) | 造成 | 至少 |
          5427人 | 死亡 | 20000 | 余人 | 受傷 | 近 | 20萬人 | 無家可歸 |
          ?
          Lucene搜索
          ?package demo.analysis;
          ?
          ?import jeasy.analysis.MMAnalyzer;
          ?
          ?import org.apache.lucene.analysis.Analyzer;
          ?import org.apache.lucene.document.Document;
          ?import org.apache.lucene.document.Field;
          ?import org.apache.lucene.index.IndexWriter;
          ?import org.apache.lucene.queryParser.QueryParser;
          ?import org.apache.lucene.search.Hits;
          ?import org.apache.lucene.search.IndexSearcher;
          ?import org.apache.lucene.search.Query;
          ?import org.apache.lucene.store.Directory;
          ?import org.apache.lucene.store.RAMDirectory;
          ?
          ?public class Segment?
          ?{
          ?????
          ???? public static void main(String[] args)?
          ???? {
          ???????? String fieldName = "text";
          ???????? String text = "據(jù)路透社報(bào)道,印度尼西亞社會(huì)事務(wù)部一官員星期二(29日)表示,"
          ???????????? + "日惹市附近當(dāng)?shù)貢r(shí)間27日晨5時(shí)53分發(fā)生的里氏6.2級(jí)地震已經(jīng)造成至少5427人死亡,"
          ???????????? + "20000余人受傷,近20萬人無家可歸。"; //檢索內(nèi)容
          ?
          ???????? //采用正向最大匹配的中文分詞算法
          ???????? Analyzer analyzer = new MMAnalyzer();
          ?
          ???????? Directory directory = new RAMDirectory();
          ???????? //Directory directory = FSDirectory.getDirectory("/tmp/testindex", true);
          ?
          ???????? try
          ????????? {
          ???????????? IndexWriter iwriter = new IndexWriter(directory, analyzer, true);
          ???????????? iwriter.setMaxFieldLength(25000);
          ???????????? Document doc = new Document();
          ???????????? doc.add(new Field(fieldName, text, Field.Store.YES, Field.Index.TOKENIZED));
          ???????????? iwriter.addDocument(doc);
          ???????????? iwriter.close();
          ?????????????
          ???????????? IndexSearcher isearcher = new IndexSearcher(directory);
          ???????????? QueryParser parser = new QueryParser(fieldName, analyzer);
          ???????????? Query query = parser.parse("印度尼西亞 6.2級(jí)地震");//檢索詞
          ???????????? Hits hits = isearcher.search(query);
          ???????????? System.out.println("命中:" + hits.length());
          ?
          ???????????? for (int i = 0; i < hits.length(); i++)?
          ???????????? {
          ???????????????? Document hitDoc = hits.doc(i);
          ???????????????? System.out.println("內(nèi)容:" + hitDoc.get(fieldName));
          ???????????? }
          ?
          ???????????? isearcher.close();
          ???????????? directory.close();
          ???????? }?
          ???????? catch (Exception e)?
          ???????? {
          ???????????? e.printStackTrace();
          ???????? }???
          ???? }
          ?????
          ?}
          ?
          生成效果:
          命中:1
          內(nèi)容:據(jù)路透社報(bào)道,印度尼西亞社會(huì)事務(wù)部一官員星期二(29日)表示,日惹市附近當(dāng)?shù)貢r(shí)間27日晨5時(shí)53分發(fā)生的
          里氏6.2級(jí)地震已經(jīng)造成至少5427人死亡,20000余人受傷,近20萬人無家可歸。
          搜索詞加亮
          ?package demo.analysis;
          ?
          ?import jeasy.analysis.MMAnalyzer;
          ?
          ?import org.apache.lucene.analysis.Analyzer;
          ?import org.apache.lucene.analysis.TokenStream;
          ?import org.apache.lucene.document.Document;
          ?import org.apache.lucene.document.Field;
          ?import org.apache.lucene.index.IndexReader;
          ?import org.apache.lucene.index.IndexWriter;
          ?import org.apache.lucene.index.TermPositionVector;
          ?import org.apache.lucene.queryParser.QueryParser;
          ?import org.apache.lucene.search.Hits;
          ?import org.apache.lucene.search.IndexSearcher;
          ?import org.apache.lucene.search.Query;
          ?import org.apache.lucene.search.highlight.Highlighter;
          ?import org.apache.lucene.search.highlight.QueryScorer;
          ?import org.apache.lucene.search.highlight.TokenSources;
          ?import org.apache.lucene.store.Directory;
          ?import org.apache.lucene.store.RAMDirectory;
          ?
          ?public class Segment
          ?{
          ?
          ???? public static void main(String[] args)
          ???? {
          ???????? String fieldName = "text";
          ???????? String text = "據(jù)路透社報(bào)道,印度尼西亞社會(huì)事務(wù)部一官員星期二(29日)表示,"
          ???????????? + "日惹市附近當(dāng)?shù)貢r(shí)間27日晨5時(shí)53分發(fā)生的里氏6.2級(jí)地震已經(jīng)造成至少5427人死亡,"
          ???????????? + "20000余人受傷,近20萬人無家可歸。"; //檢索內(nèi)容
          ?
          ???????? //采用正向最大匹配的中文分詞算法
          ???????? Analyzer analyzer = new MMAnalyzer();
          ?
          ???????? Directory directory = new RAMDirectory();
          ???????? //Directory directory = FSDirectory.getDirectory("/tmp/testindex", true);
          ?
          ???????? try
          ???????? {
          ???????????? IndexWriter iwriter = new IndexWriter(directory, analyzer, true);
          ???????????? iwriter.setMaxFieldLength(25000);
          ???????????? Document doc = new Document();
          ???????????? doc.add(new Field(fieldName, text, Field.Store.YES,
          ???????????????????? Field.Index.TOKENIZED,
          ???????????????????? Field.TermVector.WITH_POSITIONS_OFFSETS));
          ???????????? iwriter.addDocument(doc);
          ???????????? iwriter.close();
          ?
          ???????????? IndexSearcher isearcher = new IndexSearcher(directory);
          ???????????? QueryParser parser = new QueryParser(fieldName, analyzer);
          ???????????? Query query = parser.parse("印度尼西亞 6.2級(jí)地震");//檢索詞
          ???????????? Hits hits = isearcher.search(query);
          ???????????? System.out.println("命中:" + hits.length());
          ?
          ???????????? Highlighter highlighter = new Highlighter(new QueryScorer(query));
          ???????????? for (int i = 0; i < hits.length(); i++)
          ???????????? {
          ???????????????? text = hits.doc(i).get(fieldName);
          ???????????????? TermPositionVector tpv = (TermPositionVector) IndexReader.open(
          ???????????????????? directory).getTermFreqVector(hits.id(i), fieldName);
          ???????????????? TokenStream tokenStream = TokenSources.getTokenStream(tpv);
          ???????????????? String result = highlighter.getBestFragments(tokenStream, text, 3, "...");
          ???????????????? System.out.println("內(nèi)容:" + result);
          ???????????? }
          ?
          ???????????? isearcher.close();
          ???????????? directory.close();
          ???????? }
          ???????? catch (Exception e)
          ???????? {
          ???????????? e.printStackTrace();
          ???????? }
          ???? }
          ?
          ?}
          ?
          生成效果:
          命中:1
          內(nèi)容:據(jù)路透社報(bào)道,<B>印度尼西亞</B>社會(huì)事務(wù)部一官員星期二(29日)表示,日惹市附近當(dāng)?shù)貢r(shí)間27日晨
          5時(shí)53分發(fā)生的里氏<B>6.2級(jí)</B><B>地震</B>已經(jīng)造成至少5427人死亡,20000余人受傷,近20萬人無家可歸


          大盤預(yù)測(cè) 國(guó)富論
          posted on 2008-10-29 10:21 華夢(mèng)行 閱讀(906) 評(píng)論(0)  編輯  收藏

          只有注冊(cè)用戶登錄后才能發(fā)表評(píng)論。


          網(wǎng)站導(dǎo)航:
           
          主站蜘蛛池模板: 酒泉市| 商洛市| 社会| 通许县| 张北县| 元氏县| 扶沟县| 秦安县| 饶阳县| 平武县| 林甸县| 沐川县| 余庆县| 旺苍县| 涿州市| 乌鲁木齐市| 广河县| 赤峰市| 班玛县| 泸西县| 忻城县| 奉化市| 客服| 萨迦县| 嘉鱼县| 乾安县| 体育| 龙门县| 满洲里市| 大田县| 木里| 浦东新区| 河北省| 张家港市| 平乐县| 中阳县| 庆安县| 漳浦县| 汝阳县| 舞阳县| 醴陵市|