今天又將算法看了一遍,終于理解其精髓
下面是主要的實現代碼,獻丑了
首先是初始化一個boolean數組,存放各個字符是否匹配的:
public static boolean[][] getCompareBooleanArray(String source,
String compareTo) {
boolean[][] comps;
if (compareTo == null || source == null) {
throw new IllegalArgumentException("必須設置完兩個文本后再進行初始化");
}
comps = new boolean[compareTo.length()][source.length()];
for (int i = 0; i < compareTo.length(); i++) {
for (int j = 0; j < source.length(); j++) {
comps[i][j] = compareTo.charAt(i) == source.charAt(j);
// System.out.print(comps[i][j] + "\t");
}
// System.out.println();
}
return comps;
}
這個實現基本上沒什么說的,就這樣String compareTo) {
boolean[][] comps;
if (compareTo == null || source == null) {
throw new IllegalArgumentException("必須設置完兩個文本后再進行初始化");
}
comps = new boolean[compareTo.length()][source.length()];
for (int i = 0; i < compareTo.length(); i++) {
for (int j = 0; j < source.length(); j++) {
comps[i][j] = compareTo.charAt(i) == source.charAt(j);
// System.out.print(comps[i][j] + "\t");
}
// System.out.println();
}
return comps;
}
接著是一個根據這個boolean數組初始化一個int矩陣的函數,這個函數就對應尋找最大匹配路徑的函數
public static int[][] getSetpPathArr(boolean[][] barr) {
int[][] iarr = new int[barr.length][barr[0].length];
for (int i = barr.length - 1; i >= 0; i--) {
for (int j = barr[i].length - 1; j >= 0; j--) {
int v_i_j = barr[i][j] ? 1 : 0;
int n_i_1_j_1 = i + 1 >= iarr.length || j + 1 >= iarr[i].length ? 0
: iarr[i + 1][j + 1];
int n_i_1_j = i + 1 >= iarr.length ? 0 : iarr[i + 1][j];
int n_i_j_1 = j + 1 >= iarr[i].length ? 0 : iarr[i][j + 1];
int v_n_1_1 = v_i_j + n_i_1_j_1;
iarr[i][j] = v_n_1_1 > n_i_1_j ? v_n_1_1 > n_i_j_1 ? v_n_1_1
: n_i_j_1 : n_i_1_j > n_i_j_1 ? n_i_1_j : n_i_j_1;
}
}
return iarr;
}
根據算法的解釋,從下往上,從右向左計算每個格子int[][] iarr = new int[barr.length][barr[0].length];
for (int i = barr.length - 1; i >= 0; i--) {
for (int j = barr[i].length - 1; j >= 0; j--) {
int v_i_j = barr[i][j] ? 1 : 0;
int n_i_1_j_1 = i + 1 >= iarr.length || j + 1 >= iarr[i].length ? 0
: iarr[i + 1][j + 1];
int n_i_1_j = i + 1 >= iarr.length ? 0 : iarr[i + 1][j];
int n_i_j_1 = j + 1 >= iarr[i].length ? 0 : iarr[i][j + 1];
int v_n_1_1 = v_i_j + n_i_1_j_1;
iarr[i][j] = v_n_1_1 > n_i_1_j ? v_n_1_1 > n_i_j_1 ? v_n_1_1
: n_i_j_1 : n_i_1_j > n_i_j_1 ? n_i_1_j : n_i_j_1;
}
}
return iarr;
}
對應尋找最優路徑的函數,也是初始化一個int[][]
public static int[][] getMinSetpArr(int[][] miarr, boolean[][] barr) {
int[][] iarr = new int[miarr.length][miarr[0].length];
for (int i = iarr.length - 1; i >= 0; i--) {
for (int j = iarr[i].length - 1; j >= 0; j--) {
if (barr[i][j]) {
iarr[i][j] = getV(iarr, i + 1, j + 1) + 1;
} else if (getV(miarr, i, j + 1) >= getV(miarr, i + 1, j)) {
iarr[i][j] = getV(iarr, i, j + 1);
} else {
iarr[i][j] = getV(iarr, i + 1, j) + 1;
}
}
}
return iarr;
}
這樣就相應對應了算法中講的那三個矩陣。int[][] iarr = new int[miarr.length][miarr[0].length];
for (int i = iarr.length - 1; i >= 0; i--) {
for (int j = iarr[i].length - 1; j >= 0; j--) {
if (barr[i][j]) {
iarr[i][j] = getV(iarr, i + 1, j + 1) + 1;
} else if (getV(miarr, i, j + 1) >= getV(miarr, i + 1, j)) {
iarr[i][j] = getV(iarr, i, j + 1);
} else {
iarr[i][j] = getV(iarr, i + 1, j) + 1;
}
}
}
return iarr;
}
當然了,上面有調用到一個叫getV的方法,方法很簡單
private static int getV(int[][] arr, int i, int j) {
if (i >= arr.length || j >= arr[i].length) {
return 0;
}
return arr[i][j];
}
分別初始化完這三個數組后對結果進行計算。if (i >= arr.length || j >= arr[i].length) {
return 0;
}
return arr[i][j];
}
boolean[][] barr = getCompareBooleanArray(source, compareTo);
int[][] miarr = getSetpPathArr(barr);
int[][] diarr = getMinSetpArr(miarr, barr);
先找出從頂點出發,下一步該走的那幾個點:int[][] miarr = getSetpPathArr(barr);
int[][] diarr = getMinSetpArr(miarr, barr);
List<Point> points = new ArrayList<Point>();
int maxJ = -1;
for (int i = 0; i < barr.length; i++) {
int tempMaxJ = -1;
for (int j = 0; j < barr[i].length; j++) {
if (j > maxJ && maxJ != -1) {
break;
}
if (barr[i][j]) {
if (tempMaxJ == -1) {
tempMaxJ = j;
}
points.add(new Point(i, j));
}
}
if (tempMaxJ != -1) {
maxJ = tempMaxJ;
}
}
找出可能的最大最優匹配路徑,這個可能會有幾條路徑,核心代碼:int maxJ = -1;
for (int i = 0; i < barr.length; i++) {
int tempMaxJ = -1;
for (int j = 0; j < barr[i].length; j++) {
if (j > maxJ && maxJ != -1) {
break;
}
if (barr[i][j]) {
if (tempMaxJ == -1) {
tempMaxJ = j;
}
points.add(new Point(i, j));
}
}
if (tempMaxJ != -1) {
maxJ = tempMaxJ;
}
}
int max = 0;
int minSetp = -1;
for (Point point : points) {
if (miarr[point.getX()][point.getY()] > max) {
max = miarr[point.getX()][point.getY()];
}
}
for (Point point : points) {
if (minSetp == -1) {
minSetp = diarr[point.getX()][point.getY()];
} else if (miarr[point.getX()][point.getY()] >= max
&& diarr[point.getX()][point.getY()] < minSetp) {
minSetp = diarr[point.getX()][point.getY()];
}
}
List<Point> willRemove = new ArrayList<Point>();
for (Point point : points) {
if (miarr[point.getX()][point.getY()] < max
|| diarr[point.getX()][point.getY()] > minSetp) {
willRemove.add(point);
}
}
points.removeAll(willRemove);
willRemove.clear();
找到這里,最大匹配并且最優匹配的路徑就找到了int minSetp = -1;
for (Point point : points) {
if (miarr[point.getX()][point.getY()] > max) {
max = miarr[point.getX()][point.getY()];
}
}
for (Point point : points) {
if (minSetp == -1) {
minSetp = diarr[point.getX()][point.getY()];
} else if (miarr[point.getX()][point.getY()] >= max
&& diarr[point.getX()][point.getY()] < minSetp) {
minSetp = diarr[point.getX()][point.getY()];
}
}
List<Point> willRemove = new ArrayList<Point>();
for (Point point : points) {
if (miarr[point.getX()][point.getY()] < max
|| diarr[point.getX()][point.getY()] > minSetp) {
willRemove.add(point);
}
}
points.removeAll(willRemove);
willRemove.clear();
下面就是要計算差異了,基本和我上次寫的計算差異的方法差不多
List<TextDiff> diffs = new ArrayList<TextDiff>();
if (points.size() >= 1) {
Point startPoint = points.get(0);
points.clear();
if (startPoint.getX() != 0) {
diffs.add(new TextDiff(TextDiff.TYPE_INSERTED, 0, startPoint
.getX()));
}
if (startPoint.getY() != 0) {
diffs.add(new TextDiff(TextDiff.TYPE_DELETED, 0, startPoint
.getY()));
}
Point next = getNext(startPoint, miarr, diarr, barr);
while (next != null) {
if (!barr[startPoint.getX()][startPoint.getY()]) {
startPoint = new Point(startPoint.getX() + 1, startPoint
.getY() + 1);
}
if (startPoint.getX() != next.getX() - 1
|| startPoint.getY() != next.getY() - 1) {
diffs.add(new TextDiff(TextDiff.TYPE_INSERTED, startPoint
.getX(), next.getX() - startPoint.getX()));
diffs.add(new TextDiff(TextDiff.TYPE_DELETED, startPoint
.getY(), next.getY() - startPoint.getY()));
}
startPoint = next;
next = getNext(startPoint, miarr, diarr, barr);
}
if (startPoint.getX() != barr.length) {
diffs.add(new TextDiff(TextDiff.TYPE_INSERTED, startPoint
.getX(), barr.length - startPoint.getX()));
}
if (startPoint.getY() != barr[0].length) {
diffs.add(new TextDiff(TextDiff.TYPE_DELETED,
startPoint.getY(), barr[0].length - startPoint.getY()));
}
}
if (points.size() >= 1) {
Point startPoint = points.get(0);
points.clear();
if (startPoint.getX() != 0) {
diffs.add(new TextDiff(TextDiff.TYPE_INSERTED, 0, startPoint
.getX()));
}
if (startPoint.getY() != 0) {
diffs.add(new TextDiff(TextDiff.TYPE_DELETED, 0, startPoint
.getY()));
}
Point next = getNext(startPoint, miarr, diarr, barr);
while (next != null) {
if (!barr[startPoint.getX()][startPoint.getY()]) {
startPoint = new Point(startPoint.getX() + 1, startPoint
.getY() + 1);
}
if (startPoint.getX() != next.getX() - 1
|| startPoint.getY() != next.getY() - 1) {
diffs.add(new TextDiff(TextDiff.TYPE_INSERTED, startPoint
.getX(), next.getX() - startPoint.getX()));
diffs.add(new TextDiff(TextDiff.TYPE_DELETED, startPoint
.getY(), next.getY() - startPoint.getY()));
}
startPoint = next;
next = getNext(startPoint, miarr, diarr, barr);
}
if (startPoint.getX() != barr.length) {
diffs.add(new TextDiff(TextDiff.TYPE_INSERTED, startPoint
.getX(), barr.length - startPoint.getX()));
}
if (startPoint.getY() != barr[0].length) {
diffs.add(new TextDiff(TextDiff.TYPE_DELETED,
startPoint.getY(), barr[0].length - startPoint.getY()));
}
}
大功告成。。。。。
源碼:http://www.aygfsteel.com/Files/phyeas/src.zip