public static float FuzzyCompute(string a, string b, int limit)
        {
            var A = a.Split(' ');
            var B = b.Split(' ');

            if (A.Count() > B.Count())
            {
                var C = A;
                A = B;
                B = C;
            }
            int   i    = 0;
            int   j    = 0;
            float dist = 0;

            for ( ; i < A.Count(); i++)
            {
                string si = A[i].Arabic();
                string sj = B[j].Arabic();
                int    x  = EditDistance.Compute(A[i].Arabic(), B[i].Arabic());
                if (x > 0.5 * Math.Min(si.Length, sj.Length))
                {
                    j++;
                }
                else
                {
                    j++;
                    i++;
                }
                dist += x;
                if (dist > limit)
                {
                    return(dist / Math.Min(a.Length, b.Length));
                }
            }
            return(dist / Math.Min(a.Length, b.Length));
        }
Beispiel #2
0
        static public List <Match> Matches(string stud, string mis, int id = 3, int id2 = 2)
        {
            /*            List<Match> exact = new List<Match>();
             *          List<Match> semiexact = new List<Match>();
             *          List<Match> noexact = new List<Match>();*/
            HashSet <int> matched = new HashSet <int>();
            List <Match>  n       = new List <Match>();

            ExcelUtil e = new ExcelUtil();
            ExcelUtil f = new ExcelUtil();

            e.openexcel(stud);
            f.openexcel(mis);

            var n1 = e.valueArray.Slice(id);
            var n2 = f.valueArray.Slice(id2);

            for (int i = 0; i < n1.Length; i++)
            {
                string s1 = ((string)n1[i]).Trim();
                s1 = s1.Space();
                float  m      = int.MaxValue;
                int    j_indx = 0;
                string s      = "";
                for (int j = 0; j < n2.Length; j++)
                {
                    if (matched.Contains(j))
                    {
                        continue;
                    }
                    string s2 = ((string)n2[j]).Trim();
                    s2 = s2.Space();
                    float edn = EditDistance.FuzzyCompute(s1.Arabic(), s2.Arabic(), 10);
                    if (edn < m)
                    {
                        m      = edn;
                        s      = s2;
                        j_indx = j;
                    }
                }


                matchtype mt = matchtype.SemiMatch;
                if (m == 0)
                {
                    if (EditDistance.Compute(s1, s) == 0)
                    {
                        mt = matchtype.Exact;
                    }
                    else
                    {
                        mt = matchtype.Exact;
                    }
                }
                else
                {
                    if (m > 0.5)
                    {
                        mt = matchtype.NoMatch;
                    }
                    else
                    {
                        mt = matchtype.SemiMatch;
                    }
                }
                if (m < 0.5)
                {
                    matched.Add(j_indx);
                }
                n.Add(new Match {
                    A = s1, B = s, match = m, type = mt
                });
                //Console.WriteLine(s1, s, m);
            }

            return(n);
        }