public static void AddMatch(this Matches matches, int i, int j, int distance) { var matchArray = matches.MatchArray; matches.AddDirectedMatch(i, j, distance); matches.AddDirectedMatch(j, i, distance); matchArray[i].Add(new IndexDistancePair { Index = j, Distance = distance }); matchArray[j].Add(new IndexDistancePair { Index = i, Distance = distance }); }
public static Matches EditDistanceAtMostN(string[] S, string[] T, int n) { Matches toReturn = MatchesEngine.NewMatches(S.Length); //Create BKTree var bkTree = BKTreeEngine.CreateBKTree(S.ToList()); //Create lookup table Dictionary <string, int> stringToInt = new Dictionary <string, int>(); for (int i = 0; i < S.Length; i++) { stringToInt[S[i]] = i; } int c = 0; object cLock = new object(); object matchLock = new object(); Parallel.For(0, T.Length, j => { lock (cLock) { Console.Write($"\r{c++}/{T.Length}"); } var neighbors = BKTreeEngine.EditDistanceAtMostN(T[j], bkTree, n); foreach (var neighbor in neighbors) { int i = stringToInt[neighbor]; lock (cLock) { toReturn.AddDirectedMatch(i, j, EditDistanceEngine.Compute(T[j], neighbor)); } } } ); return(toReturn); }
/// <summary> /// Returns pairs where first element is from first part and second element from second. The indices for elements of T are offset by S.Length /// </summary> /// <param name="S"></param> /// <param name="T"></param> /// <param name="n"></param> /// <returns></returns> public static Matches EditDistanceAtMostN(string[] S, string[] T, int n) { Matches toReturn = MatchesEngine.NewMatches(S.Length); Console.WriteLine("Creating the neighborhoods"); List <BipartiteEditDistanceMatchObject> neighborHood = new List <BipartiteEditDistanceMatchObject>(); int c = 0; for (int i = 0; i < S.Length; i++) { Console.Write($"\r{c++}/{S.Length} S neighborhoods computed"); var withoutParts = DeleteN(S[i], i, n); foreach (var edmo in withoutParts) { neighborHood.Add(new BipartiteEditDistanceMatchObject { EditDistanceMatchObject = edmo, Part = 0 }); } } c = 0; for (int i = 0; i < T.Length; i++) { Console.Write($"\r{c++}/{T.Length} T neighborhoods computed"); var withoutParts = DeleteN(T[i], i, n); foreach (var edmo in withoutParts) { neighborHood.Add(new BipartiteEditDistanceMatchObject { EditDistanceMatchObject = edmo, Part = 1 }); } } Console.WriteLine(); Console.WriteLine("Grouping by neighborhood"); var grouped = neighborHood.GroupBy(edmo => edmo.EditDistanceMatchObject.Substring).ToArray(); Console.WriteLine("Checking edit distance"); c = 0; foreach (var group in grouped) { var groupS = group.Where(bedmo => bedmo.Part == 0).Select(bedmo => bedmo.EditDistanceMatchObject).ToArray(); var groupT = group.Where(bedmo => bedmo.Part == 1).Select(bedmo => bedmo.EditDistanceMatchObject).ToArray(); Console.Write($"\r{c++}/{grouped.Length} edit distance groups checked"); foreach (var s in groupS) { foreach (var t in groupT) { int ed = EditDistance(s, t); if (ed <= n) { toReturn.AddDirectedMatch(s.Index, t.Index, ed); } } } } Console.WriteLine(); Console.WriteLine("Cleaning string match object"); toReturn.Clean(); //ExploreStrings(strings, toReturn); return(toReturn); }