Exemplo n.º 1
0
        public static void AddMatch(this Matches matches, int i, int j, int distance)
        {
            var matchArray = matches.MatchArray;

            matches.AddDirectedMatch(i, j, distance);
            matches.AddDirectedMatch(j, i, distance);
            matchArray[i].Add(new IndexDistancePair {
                Index = j, Distance = distance
            });
            matchArray[j].Add(new IndexDistancePair {
                Index = i, Distance = distance
            });
        }
Exemplo n.º 2
0
        public static Matches EditDistanceAtMostN(string[] S, string[] T, int n)
        {
            Matches toReturn = MatchesEngine.NewMatches(S.Length);

            //Create BKTree
            var bkTree = BKTreeEngine.CreateBKTree(S.ToList());

            //Create lookup table
            Dictionary <string, int> stringToInt = new Dictionary <string, int>();

            for (int i = 0; i < S.Length; i++)
            {
                stringToInt[S[i]] = i;
            }

            int    c         = 0;
            object cLock     = new object();
            object matchLock = new object();

            Parallel.For(0, T.Length, j =>
            {
                lock (cLock)
                {
                    Console.Write($"\r{c++}/{T.Length}");
                }

                var neighbors = BKTreeEngine.EditDistanceAtMostN(T[j], bkTree, n);
                foreach (var neighbor in neighbors)
                {
                    int i = stringToInt[neighbor];
                    lock (cLock)
                    {
                        toReturn.AddDirectedMatch(i, j, EditDistanceEngine.Compute(T[j], neighbor));
                    }
                }
            }
                         );

            return(toReturn);
        }
Exemplo n.º 3
0
        /// <summary>
        /// Returns pairs where first element is from first part and second element from second.  The indices for elements of T are offset by S.Length
        /// </summary>
        /// <param name="S"></param>
        /// <param name="T"></param>
        /// <param name="n"></param>
        /// <returns></returns>
        public static Matches EditDistanceAtMostN(string[] S, string[] T, int n)
        {
            Matches toReturn = MatchesEngine.NewMatches(S.Length);

            Console.WriteLine("Creating the neighborhoods");
            List <BipartiteEditDistanceMatchObject> neighborHood = new List <BipartiteEditDistanceMatchObject>();
            int c = 0;

            for (int i = 0; i < S.Length; i++)
            {
                Console.Write($"\r{c++}/{S.Length} S neighborhoods computed");
                var withoutParts = DeleteN(S[i], i, n);
                foreach (var edmo in withoutParts)
                {
                    neighborHood.Add(new BipartiteEditDistanceMatchObject {
                        EditDistanceMatchObject = edmo, Part = 0
                    });
                }
            }

            c = 0;
            for (int i = 0; i < T.Length; i++)
            {
                Console.Write($"\r{c++}/{T.Length} T neighborhoods computed");
                var withoutParts = DeleteN(T[i], i, n);
                foreach (var edmo in withoutParts)
                {
                    neighborHood.Add(new BipartiteEditDistanceMatchObject {
                        EditDistanceMatchObject = edmo, Part = 1
                    });
                }
            }

            Console.WriteLine();

            Console.WriteLine("Grouping by neighborhood");
            var grouped = neighborHood.GroupBy(edmo => edmo.EditDistanceMatchObject.Substring).ToArray();


            Console.WriteLine("Checking edit distance");
            c = 0;
            foreach (var group in grouped)
            {
                var groupS = group.Where(bedmo => bedmo.Part == 0).Select(bedmo => bedmo.EditDistanceMatchObject).ToArray();
                var groupT = group.Where(bedmo => bedmo.Part == 1).Select(bedmo => bedmo.EditDistanceMatchObject).ToArray();

                Console.Write($"\r{c++}/{grouped.Length} edit distance groups checked");
                foreach (var s in groupS)
                {
                    foreach (var t in groupT)
                    {
                        int ed = EditDistance(s, t);
                        if (ed <= n)
                        {
                            toReturn.AddDirectedMatch(s.Index, t.Index, ed);
                        }
                    }
                }
            }
            Console.WriteLine();

            Console.WriteLine("Cleaning string match object");
            toReturn.Clean();

            //ExploreStrings(strings, toReturn);

            return(toReturn);
        }