Ejemplo n.º 1
0
        public static List <string> EditDistanceAtMostN(string s, BKTree bkTree, int n)
        {
            List <string> toReturn = new List <string>();

            if (bkTree == null)
            {
                return(toReturn);
            }

            int d = EditDistanceEngine.Compute(s, bkTree.StringValue);

            if (d <= n)
            {
                toReturn.Add(bkTree.StringValue);
                int maxIndex = System.Math.Min(n - d, bkTree.Children.Length - 1);
                for (int i = 0; i <= maxIndex; i++)
                {
                    toReturn.AddRange(AllChildren(bkTree.Children[i]));
                }
            }



            int start = System.Math.Max(System.Math.Max(0, d - n), n - d + 1);
            int end   = System.Math.Min(d + n, bkTree.Children.Length - 1);

            for (int i = start; i <= end; i++)
            {
                toReturn.AddRange(EditDistanceAtMostN(s, bkTree.Children[i], n));
            }

            return(toReturn);
        }
Ejemplo n.º 2
0
        public static bool OneOrOneDigit(int a, int b)
        {
            if (System.Math.Abs(a - b) < 2)
            {
                return(true);
            }

            return(EditDistanceEngine.Compute(a.ToString(), b.ToString()) <= 1);
        }
Ejemplo n.º 3
0
        static int EditDistanceUpTo(string a, string b, int max)
        {
            if (a == "" || b == "")
            {
                return(-1);
            }
            int ed = EditDistanceEngine.Compute(a, b);

            return(ed <= max ? ed : max + 1);
        }
Ejemplo n.º 4
0
        private static BKTree CreateBKTree(List <string> strings, int inserted, int originalStringCount)
        {
            if (strings.Count() == 0)
            {
                return(null);
            }

            List <string>[] buckets = new List <string> [2 * strings.Max(s => s.Length) + 1];
            for (int i = 0; i < buckets.Length; i++)
            {
                buckets[i] = new List <string>();
            }

            Random r = new Random();

            int    rootIndex  = r.Next(strings.Count());
            string rootString = strings[rootIndex];
            BKTree toReturn   = new BKTree();

            toReturn.StringValue = rootString;
            //Console.Write($"\r{++inserted}/{originalStringCount} strings added to BK tree");

            for (int i = 0; i < strings.Count; i++)
            {
                if (i == rootIndex)
                {
                    continue;
                }

                string currentString = strings[i];

                int d = EditDistanceEngine.Compute(rootString, currentString);
                buckets[d].Add(currentString);
            }

            int maxIndex;

            for (maxIndex = buckets.Length - 1; maxIndex >= 0; maxIndex--)
            {
                if (buckets[maxIndex].Count != 0)
                {
                    break;
                }
            }

            toReturn.Children = new BKTree[maxIndex + 1];

            for (int i = 0; i <= maxIndex; i++)
            {
                toReturn.Children[i] = CreateBKTree(buckets[i], inserted, originalStringCount);
                inserted            += buckets[i].Count;
            }

            return(toReturn);
        }
Ejemplo n.º 5
0
        public static bool FuzzyAddressMatchEditDistance(Row a, Row b)
        {
            if (a.ADDRESS1 == "" || b.ADDRESS1 == "")
            {
                return(false);
            }
            if (a.ADDRESS1 == b.ADDRESS1)
            {
                return(true);
            }

            int editDistance = EditDistanceEngine.Compute(a.ADDRESS1, b.ADDRESS1);

            return(editDistance <= 1);
        }
Ejemplo n.º 6
0
        public static int EasiestAgreementCount(Row ri, Row rj)
        {
            int fieldAgreement = 0;

            if (ri.LAST != "" && rj.LAST != "" && EditDistanceEngine.Compute(ri.LAST, rj.LAST) <= 2)
            {
                fieldAgreement++;
            }

            if (ri.FIRST != "" && rj.FIRST != "" && EditDistanceEngine.Compute(ri.FIRST, rj.FIRST) <= 2)
            {
                fieldAgreement++;
            }

            if (MatchingManager.FuzzierSSNMatch(ri.SSN, rj.SSN)) //!!!!!!! This changed !!!!!!!!
            {
                fieldAgreement++;
            }

            if (ri.ADDRESS1 != "" && rj.ADDRESS1 != "" && EditDistanceEngine.Compute(ri.ADDRESS1, rj.ADDRESS1) <= 2)
            {
                fieldAgreement++;
            }

            if (MatchingManager.FuzzyDateEquals(ri.DOB, rj.DOB)) //!!!!!!! This changed !!!!!!!!
            {
                fieldAgreement++;
            }

            if (MatchingManager.FuzzyPhoneMatch(ri.PHONE, rj.PHONE))
            {
                fieldAgreement++;
            }

            if (System.Math.Abs(ri.MRN - rj.MRN) < 500)
            {
                fieldAgreement++;
            }

            return(fieldAgreement);
        }
Ejemplo n.º 7
0
        public static Matches EditDistanceAtMostN(string[] S, string[] T, int n)
        {
            Matches toReturn = MatchesEngine.NewMatches(S.Length);

            //Create BKTree
            var bkTree = BKTreeEngine.CreateBKTree(S.ToList());

            //Create lookup table
            Dictionary <string, int> stringToInt = new Dictionary <string, int>();

            for (int i = 0; i < S.Length; i++)
            {
                stringToInt[S[i]] = i;
            }

            int    c         = 0;
            object cLock     = new object();
            object matchLock = new object();

            Parallel.For(0, T.Length, j =>
            {
                lock (cLock)
                {
                    Console.Write($"\r{c++}/{T.Length}");
                }

                var neighbors = BKTreeEngine.EditDistanceAtMostN(T[j], bkTree, n);
                foreach (var neighbor in neighbors)
                {
                    int i = stringToInt[neighbor];
                    lock (cLock)
                    {
                        toReturn.AddDirectedMatch(i, j, EditDistanceEngine.Compute(T[j], neighbor));
                    }
                }
            }
                         );

            return(toReturn);
        }
Ejemplo n.º 8
0
        public static List <string> LeastEditDistance(string s, BKTree bkTree, ref int bound)
        {
            List <string> toReturn = new List <string>();

            if (bkTree == null)
            {
                return(toReturn);
            }

            int d = EditDistanceEngine.Compute(s, bkTree.StringValue);

            if (d <= bound)
            {
                bound = d;
                toReturn.Add(bkTree.StringValue);
            }

            int start = System.Math.Max(0, d - bound);
            int end   = System.Math.Min(d + bound, bkTree.Children.Length - 1);

            for (int i = start; i <= end; i++)
            {
                int oldBound    = bound;
                var subtreeList = LeastEditDistance(s, bkTree.Children[i], ref bound);
                if (oldBound == bound)
                {
                    toReturn.AddRange(subtreeList);
                }
                else
                {
                    toReturn = subtreeList;
                    i        = System.Math.Max(i, d - bound);
                    end      = System.Math.Min(d + bound, bkTree.Children.Length - 1);
                }
            }

            return(toReturn);
        }
Ejemplo n.º 9
0
 public static bool FuzzierSSNMatch(int a, int b)
 {
     return(IsSSNValid(a) && IsSSNValid(b) && EditDistanceEngine.Compute(a.ToString(), b.ToString()) <= 2);
 }