package com.infosys.lucene.code JavaSourceCodeAnalyzer.;
import java.io.Reader;
import java.util.Set;
import org.apache.lucene.analysis.*;
public class JavaSourceCodeAnalyzer extends Analyzer {
??????private Set javaStopSet;
??????private Set englishStopSet;
??????private static final String[] JAVA_STOP_WORDS = {
???????? "public","private","protected","interface",
????????????"abstract","implements","extends","null""new",
?? ????????"switch","case", "default" ,"synchronized" ,
????????????"do", "if", "else", "break","continue","this",
?? ????????"assert" ,"for","instanceof", "transient",
????????????"final", "static" ,"void","catch","try",
????????????"throws","throw","class", "finally","return",
????????????"const" , "native", "super","while", "import",
????????????"package" ,"true", "false" };
???? private static final String[] ENGLISH_STOP_WORDS ={
????????????"a", "an", "and", "are","as","at","be" "but",
????????????"by", "for", "if", "in", "into", "is", "it",
????????????"no", "not", "of", "on", "or", "s", "such",
????????????"that", "the", "their", "then", "there","these",
????????????"they", "this", "to", "was", "will", "with" };
???? public SourceCodeAnalyzer(){
????????????super();
????????????javaStopSet = StopFilter.makeStopSet(JAVA_STOP_WORDS);
????????????englishStopSet = StopFilter.makeStopSet(ENGLISH_STOP_WORDS);
???? }
???? public TokenStream tokenStream(String fieldName, Reader reader) {
????????????if (fieldName.equals("comment"))
???????????????????? return?? new PorterStemFilter(new StopFilter(
????????????????????????new LowerCaseTokenizer(reader),englishStopSet));
????????????else
???????????????????? return?? new StopFilter(
?????????????????? new LowerCaseTokenizer(reader),javaStopSet);
???? }
}
import org.apache.lucene.document.*;
import org.apache.lucene.index.*;
import com.infosys.lucene.code.JavaParser.*;
public class JavaSourceCodeIndexer {
????private static JavaParser parser = new JavaParser();
????????private static final String IMPLEMENTS = "implements";
????????private static final String IMPORT = "import";
????????...
????????public static void main(String[] args) {
????????????????File indexDir = new File("C:\\Lucene\\Java");
????????????????File dataDir = new File("C:\\JavaSourceCode ");
????????????????IndexWriter writer = new IndexWriter(indexDir,
????????????????????new JavaSourceCodeAnalyzer(), true);
????????????????indexDirectory(writer, dataDir);
????????????????writer.close();
????????}
????????public static void indexDirectory(IndexWriter writer, File dir){
????????????File[] files = dir.listFiles();
????????????for (int i = 0; i < files.length; i++) {
????????????????????File f = files[i];
????????????????// Create a Lucene Document
????????????????Document doc = new Document();
????????????????//??Use JavaParser to parse file
????????????????parser.setSource(f);
????????????????addImportDeclarations(doc, parser);
?????? ???????? ????????addComments(doc, parser);
???????? ????????// Extract Class elements Using Parser
????????????????JClass cls = parser.getDeclaredClass();
????????????????addClass(doc, cls);
???????? ????????// Add field to the Lucene Document
?????? ????????????????doc.add(Field.UnIndexed(FILENAME, f.getName()));
????????????????writer.addDocument(doc);
?? ???????? ????????}
????????}
????????private static void addClass(Document doc, JClass cls) {
?? ????????????????//For each class add Class Name field
????????????doc.add(Field.Text(CLASS, cls.className));
????????????String superCls = cls.superClass;
????????????if (superCls != null)
?? ????????????????//Add the class it extends as extends field
????????doc.add(Field.Text(EXTENDS, superCls));
????????????// Add interfaces it implements
????????????ArrayList interfaces = cls.interfaces;
????????????for (int i = 0; i < interfaces.size(); i++)
????????????????doc.add(Field.Text(IMPLEMENTS, (String) interfaces.get(i)));
?? ???????? ????????//Add details??on methods declared
????????????addMethods(cls, doc);
????????????ArrayList innerCls = cls.innerClasses;
?? ????????????????for (int i = 0; i < innerCls.size(); i++)
????????????????addClass(doc, (JClass) innerCls.get(i));
????????}
????????private static void addMethods(JClass cls, Document doc) {
????????????ArrayList methods = cls.methodDeclarations;
????????????for (int i = 0; i < methods.size(); i++) {
?????? ????????????????JMethod method = (JMethod) methods.get(i);
????????????????// Add method name field
????????????????doc.add(Field.Text(METHOD, method.methodName));
????????????????// Add return type field
????????????????doc.add(Field.Text(RETURN, method.returnType));
????????????????ArrayList params = method.parameters;
????????????????for (int k = 0; k < params.size(); k++)
????????????????// For each method add parameter types
????????????????????doc.add(Field.Text(PARAMETER, (String)params.get(k)));
????????????????String code = method.codeBlock;
????????????????if (code != null)
????????????????//add the method code block
????????????????????doc.add(Field.UnStored(CODE, code));
????????????}
????????}
????????private static void addImportDeclarations(Document doc, JavaParser parser) {
?? ????????????????ArrayList imports = parser.getImportDeclarations();
????????????if (imports == null)???? return;
????????????for (int i = 0; i < imports.size(); i++)
????????????????????//add import declarations as keyword
????????????????doc.add(Field.Keyword(IMPORT, (String) imports.get(i)));
????????}
}
public class JavaCodeSearch {
public static void main(String[] args) throws Exception{
????File indexDir = new File(args[0]);
????String q =??args[1]; //parameter:JGraph code:insert
????Directory fsDir = FSDirectory.getDirectory(indexDir,false);
????IndexSearcher is = new IndexSearcher(fsDir);
????PerFieldAnalyzerWrapper analyzer = new
????????PerFieldAnalyzerWrapper( new
????????????????JavaSourceCodeAnalyzer());
????analyzer.addAnalyzer("import", new KeywordAnalyzer());
????Query query = QueryParser.parse(q, "code", analyzer);
????long start = System.currentTimeMillis();
????Hits hits = is.search(query);
????long end = System.currentTimeMillis();
????System.err.println("Found " + hits.length() +
????????????????" docs in " + (end-start) + " millisec");
????for(int i = 0; i < hits.length(); i++){
????Document doc = hits.doc(i);
????????System.out.println(doc.get("filename")
????????????????+ " with a score of " + hits.score(i));
????}
????is.close();
}
}
只有注冊用戶登錄后才能發(fā)表評論。 | ||
![]() |
||
網(wǎng)站導(dǎo)航:
博客園
IT新聞
Chat2DB
C++博客
博問
管理
|
||
相關(guān)文章:
|
||