/
BKTree.cs
127 lines (106 loc) · 3.78 KB
/
BKTree.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
/// <summary>
/// A BK Tree for searching closely matching strings within a specified tolerance.
/// </summary>
/// <remarks>
/// These articles were referenced to write the BK Tree below.
/// No code has been directly copied.
/// https://en.wikipedia.org/wiki/Levenshtein_distance
/// https://www.csharpstar.com/csharp-string-distance-algorithm
/// https://www.geeksforgeeks.org/bk-tree-introduction-implementation/
/// http://blog.notdot.net/2007/4/Damn-Cool-Algorithms-Part-1-BK-Trees
/// https://nullwords.wordpress.com/2013/03/13/the-bk-tree-a-data-structure-for-spell-checking/
/// </remarks>
public class BkTree
{
private BkNode _root;
/// <summary>
/// Add a word to the tree's dictionary.
/// </summary>
/// <param name="value"></param>
public void Add(string value)
{
if(_root == null)
{
_root = new BkNode(value);
return;
}
int distance;
var currentNode = _root;
do
{
distance = LevenshteinDistance(currentNode.Value, value);
if(distance == 0) return;
currentNode = currentNode[distance] ?? currentNode;
} while(currentNode.ContainsKey(distance));
currentNode.AddChild(distance, value);
}
/// <summary>
/// Search for a word in the tree's dictionary.
/// </summary>
/// <param name="value"></param>
/// <param name="distanceTolerance">The number of transformations that would occur to the `value` parameter to match a dictionary word.</param>
/// <returns></returns>
public List<string> Search(string value, int distanceTolerance)
{
if(string.IsNullOrEmpty(value)) throw new ArgumentNullException(nameof(value));
if(_root == null) throw new InvalidOperationException("Cannot search when no words have been added to the BK Tree.");
static void search(BkNode node, List<string> result, string value, int distanceTolerance)
{
var currentDistance = LevenshteinDistance(node.Value, value);
var minDistance = (long)currentDistance - distanceTolerance;
var maxDistance = (long)currentDistance + distanceTolerance;
if(currentDistance <= distanceTolerance)
result.Add(node.Value);
var edges = node.Edges.Where(edge => minDistance <= edge && edge <= maxDistance);
foreach(var edge in edges)
{
search(node[edge], result, value, distanceTolerance);
}
}
var result = new List<string>();
search(_root, result, value, distanceTolerance);
return result;
}
/// <summary>
/// Determine the number of insertions, deletions, and substitions that
/// would occur to `source` to match `target`.
/// </summary>
/// <param name="source"></param>
/// <param name="target"></param>
/// <returns></returns>
public static int LevenshteinDistance(string source, string target)
{
if(source.Length == 0) return target.Length;
if(target.Length == 0) return source.Length;
var vector0 = Enumerable.Range(0, target.Length + 1).ToArray();
var vector1 = new int[target.Length + 1];
vector0[vector0.Length - 1] = 0;
for(var i = 0; i < source.Length; i++)
{
vector1[0] = i + 1;
for(var j = 0; j < target.Length; j++)
{
vector1[j + 1] = new[]
{
vector0[j + 1] + 1,
vector1[j] + 1,
source[i] == target[j] ? vector0[j] : vector0[j] + 1
}.Min();
}
var temp = vector0;
vector0 = vector1;
vector1 = temp;
}
return vector0[target.Length];
}
private class BkNode
{
public readonly string Value;
private Dictionary<int, BkNode> Children = new Dictionary<int, BkNode>();
public BkNode(string value) => Value = value;
public BkNode this[int edge] => Children.TryGetValue(edge, out BkNode node) ? node : null;
public ICollection<int> Edges => Children.Keys;
public bool ContainsKey(int edge) => Children.ContainsKey(edge);
public void AddChild(int edge, string value) => Children[edge] = new BkNode(value);
}
}