具体的实现方法见下面的测试代码(只做C用)(j)Q?br />
1
/** *//**
2
*
3
*/
4
package edu.tju.ikse.mi.util;
5
6
import java.io.File;
7
import java.io.IOException;
8
import java.util.Random;
9
10
import weka.attributeSelection.ASEvaluation;
11
import weka.attributeSelection.ASSearch;
12
import weka.attributeSelection.AttributeSelection;
13
import weka.attributeSelection.BestFirst;
14
import weka.attributeSelection.CfsSubsetEval;
15
import weka.core.Instances;
16
import weka.core.converters.ArffLoader;
17
18
/** *//**
19
* @author Jia Yu
20
* @date 2010-11-23
21
*/
22
public class WekaSelector
{
23
24
private ArffLoader loader;
25
private Instances dataSet;
26
private File arffFile;
27
private int sizeOfDataset;
28
private int numOfOldAttributes;
29
private int numOfNewAttributes;
30
private int classIndex;
31
private int[] selectedAttributes;
32
33
public WekaSelector(File file) throws IOException
{
34
loader = new ArffLoader();
35
arffFile = file;
36
loader.setFile(arffFile);
37
dataSet = loader.getDataSet();
38
sizeOfDataset = dataSet.numInstances();
39
numOfOldAttributes = dataSet.numAttributes();
40
classIndex = numOfOldAttributes - 1;
41
dataSet.setClassIndex(classIndex);
42
}
43
44
public void select() throws Exception
{
45
ASEvaluation evaluator = new CfsSubsetEval();
46
ASSearch search = new BestFirst();
47
AttributeSelection eval = null;
48
49
eval = new AttributeSelection();
50
eval.setEvaluator(evaluator);
51
eval.setSearch(search);
52
53
eval.SelectAttributes(dataSet);
54
numOfNewAttributes = eval.numberAttributesSelected();
55
selectedAttributes = eval.selectedAttributes();
56
System.out.println("result is "+eval.toResultsString());
57
/**//*
58
Random random = new Random(seed);
59
dataSet.randomize(random);
60
if (dataSet.attribute(classIndex).isNominal()) {
61
dataSet.stratify(numFolds);
62
}
63
for (int fold = 0; fold < numFolds; fold++) {
64
Instances train = dataSet.trainCV(numFolds, fold, random);
65
eval.selectAttributesCVSplit(train);
66
}
67
System.out.println("result is "+eval.CVResultsString());
68
*/
69
System.out.println("old number of Attributes is "+numOfOldAttributes);
70
System.out.println("new number of Attributes is "+numOfNewAttributes);
71
for(int i=0;i<selectedAttributes.length;i++)
{
72
System.out.println(selectedAttributes[i]);
73
}
74
}
75
76
/** *//**
77
* @param args
78
*/
79
public static void main(String[] args)
{
80
// TODO Auto-generated method stub
81
File file = new File("iris.arff");
82
try
{
83
WekaSelector ws = new WekaSelector(file);
84
ws.select();
85
86
} catch (IOException e)
{
87
// TODO Auto-generated catch block
88
e.printStackTrace();
89
} catch (Exception e)
{
90
// TODO Auto-generated catch block
91
e.printStackTrace();
92
}
93
94
}
95
96
}
97
其中的注释部分是使用交叉验证的部分。默认是十折?sh)叉验证Q当然这个可以通过setҎ(gu)讄。具体的使用或者用到reduce dimensionality的方法大家可以参看源代码。毕竟weka开源很是方ѝ源代码涉及(qing)到的cM要是查看weka.attributeSelection.AttributeSelectioncd可以?jin)。当然如何调用和选择可以看看weka.gui.explorer.AttributeSelectionPanelcR?br />
上面代码的实验结果如下:(x)
result is
=== Attribute Selection on all input data ===
Search Method:
Best first.
Start set: no attributes
Search direction: forward
Stale search after 5 node expansions
Total number of subsets evaluated: 12
Merit of best subset found: 0.887
Attribute Subset Evaluator (supervised, Class (nominal): 5 class):
CFS Subset Evaluator
Including locally predictive attributes
Selected attributes: 3,4 : 2
petallength
petalwidth
old number of Attributes is 5
new number of Attributes is 2
2
3
4
原来的iris数据集中共有4个属性(包含一个分cȝ标所以一?l_(d)(j)Q经q特征选择后,只有W?和第4两个l度的特征保留,所以新特征子集有两个维度(不包含类标,有点l,不好意思,我Lq样Q?br />
最后的2Q?Q?是属性数l的下标Q表C经q特征选择保留的属性子集是W?Q?Q?个属性?br />

]]>