weka的java使用(2)——分類
書接上文,既然寫了聚類,再把我用到的分類的相關代碼奉上。 1
/**
2
*
3
*/
4
package edu.tju.ikse.mi.util;
5
6
import java.io.File;
7
import java.io.IOException;
8
9
import weka.classifiers.Classifier;
10
import weka.core.Attribute;
11
import weka.core.Instances;
12
import weka.core.converters.ArffLoader;
13
14
/**
15
* @author Jia Yu
16
* @date 2010-6-9
17
*/
18
public class WekaClassifier {
19
20
/**
21
* @param args
22
*/
23
private ArffLoader loader;
24
private Instances dataSet;
25
private File arffFile;
26
private int sizeOfDataset;
27
private Classifier classifier;
28
private int sizeOfAttribute;
29
private String resultClass;
30
private double[] distributions;
31
32
public WekaClassifier(File file) throws Exception {
33
this.arffFile = file;
34
loadTrainSet();
35
loadClassifier();
36
buildClassifierModel();
37
}
38
39
private void buildClassifierModel() throws Exception {
40
this.classifier.buildClassifier(dataSet);
41
}
42
43
private void loadClassifier() {
44
this.classifier = new weka.classifiers.meta.RandomSubSpace();
45
}
46
47
private void loadTrainSet() throws IOException {
48
loader = new ArffLoader();
49
loader.setFile(this.arffFile);
50
dataSet = loader.getDataSet();
51
setSizeOfDataset(dataSet.numInstances());
52
setSizeOfAttribute(dataSet.numAttributes());
53
dataSet.setClassIndex(this.sizeOfAttribute - 1);
54
}
55
56
public void classifyInstance(weka.core.Instance instance) throws Exception {
57
double tNum = this.classifier.classifyInstance(instance);
58
setDistributions(this.classifier.distributionForInstance(instance));
59
Attribute attr = dataSet.attribute(dataSet.classIndex());
60
int classIndex = (int) tNum;
61
setResultClass(attr.value(classIndex));
62
}
63
64
public static void main(String[] args) {
65
File file = new File(
66
"iris.arff");
67
try {
68
WekaClassifier wc = new WekaClassifier(file);
69
double[] feature = { 5.1,3.5,1.4,0.2 };
70
weka.core.Instance ins = new weka.core.Instance(wc
71
.getSizeOfAttribute());
72
ins.setDataset(wc.getDataSet());
73
for (int i = 0; i < ins.numAttributes() - 1; i++) {
74
ins.setValue(i, feature[i]);
75
// System.out.println(ins.attribute(i).getLowerNumericBound());
76
}
77
ins.setValue(ins.numAttributes() - 1, "Iris-setosa");
78
System.out.println("original class is "
79
+ ins.stringValue(ins.numAttributes() - 1));
80
wc.classifyInstance(ins);
81
System.out.println("classify it to class "
82
+ wc.getResultClass());
83
} catch (Exception e) {
84
e.printStackTrace();
85
}
86
}
87
88
public int getSizeOfAttribute() {
89
return sizeOfAttribute;
90
}
91
92
public void setSizeOfAttribute(int sizeOfAttribute) {
93
this.sizeOfAttribute = sizeOfAttribute;
94
}
95
96
public Instances getDataSet() {
97
return dataSet;
98
}
99
100
public void setDataSet(Instances dataSet) {
101
this.dataSet = dataSet;
102
}
103
104
public String getResultClass() {
105
return resultClass;
106
}
107
108
public void setResultClass(String resultClass) {
109
this.resultClass = resultClass;
110
}
111
112
public void setDistributions(double[] distributions) {
113
this.distributions = distributions;
114
}
115
116
public double[] getDistributions() {
117
return distributions;
118
}
119
120
public void setSizeOfDataset(int sizeOfDataset) {
121
this.sizeOfDataset = sizeOfDataset;
122
}
123
124
public int getSizeOfDataset() {
125
return sizeOfDataset;
126
}
127
}
128

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

其中的iris數(shù)據(jù)集也是數(shù)據(jù)挖掘領域的標準數(shù)據(jù)集,這個程序的運行結果如下:
original class is Iris-setosa
classify it to class Iris-setosa
感興趣的朋友可以和我一起研究~~~郵件聯(lián)系哦。
posted on 2010-11-04 09:51 changedi 閱讀(4064) 評論(2) 編輯 收藏 所屬分類: 機器學習