public static List <string> EditDistanceAtMostN(string s, BKTree bkTree, int n) { List <string> toReturn = new List <string>(); if (bkTree == null) { return(toReturn); } int d = EditDistanceEngine.Compute(s, bkTree.StringValue); if (d <= n) { toReturn.Add(bkTree.StringValue); int maxIndex = System.Math.Min(n - d, bkTree.Children.Length - 1); for (int i = 0; i <= maxIndex; i++) { toReturn.AddRange(AllChildren(bkTree.Children[i])); } } int start = System.Math.Max(System.Math.Max(0, d - n), n - d + 1); int end = System.Math.Min(d + n, bkTree.Children.Length - 1); for (int i = start; i <= end; i++) { toReturn.AddRange(EditDistanceAtMostN(s, bkTree.Children[i], n)); } return(toReturn); }
private static void RecursivelyDeserialize(BKTree parent, BinaryReader fin) { int index = fin.ReadInt32(); string stringValue = fin.ReadString(); int childLength = fin.ReadInt32(); parent.Index = index; parent.StringValue = stringValue; parent.Children = new BKTree[childLength]; for (int c = 0; c < childLength; c++) { if (fin.ReadBoolean()) { parent.Children[c] = new BKTree(); } } for (int c = 0; c < childLength; c++) { if (parent.Children[c] != null) { RecursivelyDeserialize(parent.Children[c], fin); } } }
public static void SerializeTo(BKTree tree, string outputPath) { using (FileStream fout = File.Create(outputPath)) { using (BinaryWriter bw = new BinaryWriter(fout)) { RecursivelySerialize(tree, bw); } } }
private static BKTree CreateBKTree(List <string> strings, int inserted, int originalStringCount) { if (strings.Count() == 0) { return(null); } List <string>[] buckets = new List <string> [2 * strings.Max(s => s.Length) + 1]; for (int i = 0; i < buckets.Length; i++) { buckets[i] = new List <string>(); } Random r = new Random(); int rootIndex = r.Next(strings.Count()); string rootString = strings[rootIndex]; BKTree toReturn = new BKTree(); toReturn.StringValue = rootString; //Console.Write($"\r{++inserted}/{originalStringCount} strings added to BK tree"); for (int i = 0; i < strings.Count; i++) { if (i == rootIndex) { continue; } string currentString = strings[i]; int d = EditDistanceEngine.Compute(rootString, currentString); buckets[d].Add(currentString); } int maxIndex; for (maxIndex = buckets.Length - 1; maxIndex >= 0; maxIndex--) { if (buckets[maxIndex].Count != 0) { break; } } toReturn.Children = new BKTree[maxIndex + 1]; for (int i = 0; i <= maxIndex; i++) { toReturn.Children[i] = CreateBKTree(buckets[i], inserted, originalStringCount); inserted += buckets[i].Count; } return(toReturn); }
public static BKTree DeserializeFrom(string file) { BKTree ret = new BKTree(); using (FileStream fin = File.OpenRead(file)) { using (BinaryReader br = new BinaryReader(fin)) { RecursivelyDeserialize(ret, br); } } return(ret); }
public static List <string> AllChildren(BKTree tree) { if (tree == null) { return(new List <string>()); } List <string> toReturn = new List <string> { tree.StringValue }; for (int i = 0; i < tree.Children.Length; i++) { toReturn.AddRange(AllChildren(tree.Children[i])); } return(toReturn); }
public static List <string> LeastEditDistance(string s, BKTree bkTree, ref int bound) { List <string> toReturn = new List <string>(); if (bkTree == null) { return(toReturn); } int d = EditDistanceEngine.Compute(s, bkTree.StringValue); if (d <= bound) { bound = d; toReturn.Add(bkTree.StringValue); } int start = System.Math.Max(0, d - bound); int end = System.Math.Min(d + bound, bkTree.Children.Length - 1); for (int i = start; i <= end; i++) { int oldBound = bound; var subtreeList = LeastEditDistance(s, bkTree.Children[i], ref bound); if (oldBound == bound) { toReturn.AddRange(subtreeList); } else { toReturn = subtreeList; i = System.Math.Max(i, d - bound); end = System.Math.Min(d + bound, bkTree.Children.Length - 1); } } return(toReturn); }
private static void RecursivelySerialize(BKTree parentNode, BinaryWriter fout) { if (parentNode != null) { fout.Write(parentNode.Index); fout.Write(parentNode.StringValue); int childLength = parentNode.Children.Length; fout.Write(childLength); for (int c = 0; c < parentNode.Children.Length; c++) { fout.Write(parentNode.Children[c] != null); } for (int c = 0; c < parentNode.Children.Length; c++) { BKTree child = parentNode.Children[c]; if (child != null) { RecursivelySerialize(child, fout); } } } }
public static List <string> LeastEditDistanceWithDistance(string s, BKTree tree, out int distance) { distance = int.MaxValue; return(LeastEditDistance(s, tree, ref distance)); }