最簡Trie的實現
無附加功能,主要提供insert,search和prefixsearch。具體原理請參見http://en.wikipedia.org/wiki/Trie
先來看下trie樹的樣子:
圖片來源wikipedia
1: public class Node {
2: char content;
3: boolean marker;
4: Collection<Node> child;
5: boolean visited;
6:
7: public Node(char c) {
8: child = new LinkedList<Node>();
9: marker = false;
10: content = c;
11: visited = false;
12: }
13:
14: public Node subNode(char c) {
15: if (child != null) {
16: for (Node eachChild : child) {
17: if (eachChild.content == c) {
18: return eachChild;
19: }
20: }
21: }
22: return null;
23: }
24: }
Node是一個trie樹的節點,其中content表示節點對應的字符,marker表示是否該節點是否是一個單詞的完結節點。child顧名思義是節點的所有子節點集合,visited在遍歷時需要,表示是否訪問過。
具體trie:
1: public class Trie {
2: private Node root;
3:
4: public Trie() {
5: root = new Node(' ');
6: }
7:
8: public void insert(String s) {
9: Node current = root;
10: if (s.length() == 0) // For an empty character
11: current.marker = true;
12: for (int i = 0; i < s.length(); i++) {
13: Node child = current.subNode(s.charAt(i));
14: if (child != null) {
15: current = child;
16: } else {
17: current.child.add(new Node(s.charAt(i)));
18: current = current.subNode(s.charAt(i));
19: }
20: // Set marker to indicate end of the word
21: if (i == s.length() - 1)
22: current.marker = true;
23: }
24: }
25:
26: public boolean search(String s) {
27: Node current = root;
28: while (current != null) {
29: for (int i = 0; i < s.length(); i++) {
30: if (current.subNode(s.charAt(i)) == null)
31: return false;
32: else
33: current = current.subNode(s.charAt(i));
34: }/*
35: * This means that a string exists, but make sure its a word by
36: * checking its 'marker' flag
37: */
38: if (current.marker == true)
39: return true;
40: else
41: return false;
42: }
43: return false;
44: }
45:
46: public List<String> searchPrefix(String s) {
47:
48: Node current = root;
49: if (current == null) {
50: return null;
51: }
52: List<String> list = new ArrayList<String>();
53: Node endNode = null;
54: StringBuilder endSB = new StringBuilder();
55: for (int i = 0; i < s.length(); i++) {
56: if (current.subNode(s.charAt(i)) == null) {
57: endNode = current;
58: break;
59: } else {
60: current = current.subNode(s.charAt(i));
61: endNode = current;
62: endSB.append(endNode.content);
63: }
64: }
65: if (endNode == null) {
66: return null;
67: }
68: if (endNode.marker == true) {// found as a word
69: list.add(endSB.toString());
70: }
71: // depth-first search the sub branch
72: Stack<Node> stack = new Stack<Node>();
73: stack.push(endNode);
74: while (!stack.isEmpty()) {
75: Node cur = stack.peek();
76: int needCount = cur.child.size();
77: for (Node node : cur.child) {
78: if (!node.visited) {
79: node.visited = true;
80: stack.push(node);
81: endSB.append(node.content);
82: if (node.marker) {
83: list.add(endSB.toString());
84: }
85: break;
86: } else {
87: needCount--;
88: }
89: }
90: if (needCount == 0) {
91: stack.pop();
92: endSB.deleteCharAt(endSB.length()-1);
93: }
94: }
95:
96: return list;
97: }
98:
99: public static void main(String[] args) {
100: Trie trie = new Trie();
101: trie.insert("ball");
102: trie.insert("bat");
103: trie.insert("dead");
104: trie.insert("do");
105: trie.insert("doll");
106: trie.insert("dork");
107: trie.insert("dorm");
108: trie.insert("send");
109: trie.insert("sense");
110: List<String> list = trie.searchPrefix("d");
111: for (String s : list) {
112: System.out.println(s);
113: }
114: }
115: }
其中構建樹的時候需要不斷的將新詞insert到結構中,search可以接受一個詞作為輸入,返回這個詞是否在詞典中。如果只是search來判斷是否存在,那么hashmap更好的解決這個問題。trie樹的最大強項是利用前綴檢索,比如想知道一個詞典中以固定前綴開始的詞有多少,那么prefixsearch可以滿足需求。
這棵樹可擴展的地方太多太多,現有代碼只是一個鋪墊,對于任何處理字符的需求,擴展屬于自己的特定業務場景的trie是個很不錯的選擇。
本文的實現是基于http://www.technicalypto.com/2010/04/trie-in-java.html這里實現的,trie樹截圖也來源于此,在此對作者表示感激。
posted on 2012-08-18 10:10 changedi 閱讀(1730) 評論(1) 編輯 收藏 所屬分類: 算法