예제 #1
0
        public static double ScoreExactMatch(string subject, string subjectLw, string query, string queryLw, int pos, int n, int m)
        {
            var start = ScorerUtil.IsWordStart(pos, subject, subjectLw);

            if (!start)
            {
                var pos2 = subjectLw.IndexOf(queryLw, pos + 1);
                if (pos2 > -1)
                {
                    start = ScorerUtil.IsWordStart(pos2, subject, subjectLw);
                    if (start)
                    {
                        pos = pos2;
                    }
                }
            }

            var i        = -1;
            var sameCase = 0;

            while (++i < n)
            {
                if ((query.Length > (pos + i) && subject.Length > i) && (query[pos + i] == subject[i]))
                {
                    sameCase++;
                }
            }

            var end = ScorerUtil.IsWordEnd(pos + n - 1, subject, subjectLw, m);

            return(ScoreExact(n, m, ScorePattern(n, n, sameCase, start, end), pos));
        }
예제 #2
0
        public static double ScoreConsecutives(string subject, string subjectLw, string query, string queryLw, int i,
                                               int j, bool startOfWord)
        {
            var m        = subject.Length;
            var n        = query.Length;
            var mi       = m - i;
            var nj       = n - j;
            var k        = mi < nj ? mi : nj;
            var sameCase = 0;
            var sz       = 0;

            if (query[j] == subject[i])
            {
                sameCase++;
            }

            while (++sz < k && queryLw[++j] == subjectLw[++i])
            {
                if (query[j] == subject[i])
                {
                    sameCase++;
                }
            }

            if (sz < k)
            {
                i--;
            }

            return(ScorePattern(sz, n, sameCase, startOfWord, ScorerUtil.IsWordEnd(i, subject, subjectLw, m)));
        }
예제 #3
0
        private double ScorePath(string subject, string subjectLw, double score, StringScorerOptions options)
        {
            if (score == 0)
            {
                return(0);
            }

            var pathSeparator = Convert.ToChar(options.PathSeparator);
            var end           = subject.Length - 1;

            while (subject[end] == pathSeparator)
            {
                end--;
            }

            var basePos    = subject.LastIndexOf(pathSeparator, end);
            var fileLength = end - basePos;
            var extAdjust  = 1.0;

            if (options.UseExtensionBonus)
            {
                extAdjust += GetExtensionScore(subjectLw, options.PreparedQuery.Ext, basePos, end, 2);
                score     *= extAdjust;
            }

            if (basePos == -1)
            {
                return(score);
            }

            var depth = options.PreparedQuery.Depth;

            while (basePos > -1 && depth-- > 0)
            {
                basePos = subject.LastIndexOf(options.PathSeparator, basePos - 1);
            }

            double basePathScore = 0.0;

            if (basePos == -1)
            {
                basePathScore = score;
            }
            else
            {
                basePathScore = extAdjust * Scorer.ComputeScore(subject.Slice(basePos + 1, end + 1), subjectLw.Slice(basePos + 1, end + 1), options.PreparedQuery);
            }

            var alpha = 0.5 * CONST_TAU_DEPTH / (CONST_TAU_DEPTH + ScorerUtil.CountDir(subject, end + 1, Convert.ToChar(options.PathSeparator)));

            return(alpha * basePathScore + (1 - alpha) * score * Scorer.ScoreSize(0, CONST_FILE_COEFF * (fileLength)));
        }
예제 #4
0
        public static double ComputeScore(string subject, string subjectLw, StringScorerQuery preparedQuery)
        {
            var query   = preparedQuery.Query;
            var queryLw = preparedQuery.QueryLw;

            var score = 0.0;
            var m     = subject.Length;
            var n     = query.Length;

            var acro      = AcronymResult.ScoreAcronyms(subject, subjectLw, query, queryLw);
            var acroScore = acro.Score;

            if (acro.Count == n)
            {
                return(Scorer.ScoreExact(n, m, acroScore, acro.Pos));
            }

            var pos = subjectLw.IndexOf(queryLw);

            if (pos > -1)
            {
                return(ScoreExactMatch(subject, subjectLw, query, queryLw, pos, n, m));
            }

            var scoreRow = new double[n];
            var cscRow   = new double[n];
            var sz       = Scorer.ScoreSize(n, m);

            var missBudget       = Math.Ceiling(CONST_MISS_COEFF * n) + 5;
            var missLeft         = missBudget;
            var cscShouldRebuild = true;

            var j = -1;

            while (++j < n)
            {
                scoreRow[j] = 0;
                cscRow[j]   = 0;
            }

            var i = -1;

            while (++i < m)
            {
                var siLW = subjectLw[i];

                if (!preparedQuery.CharCodes.Contains(siLW))
                {
                    if (cscShouldRebuild)
                    {
                        j = -1;
                        while (++j < n)
                        {
                            cscRow[j] = 0;
                        }

                        cscShouldRebuild = false;
                    }
                    continue;
                }

                score = 0.0;
                var scoreDiag  = 0.0;
                var cscDiag    = 0.0;
                var recordMiss = true;
                cscShouldRebuild = true;

                j = -1;
                while (++j < n)
                {
                    var scoreUp = scoreRow[j];
                    if (scoreUp > score)
                    {
                        score = scoreUp;
                    }

                    var cscScore = 0.0;

                    if (queryLw[j] == siLW)
                    {
                        var start = ScorerUtil.IsWordStart(i, subject, subjectLw);

                        if (cscDiag > 0)
                        {
                            cscScore = cscDiag;
                        }
                        else
                        {
                            cscScore = Scorer.ScoreConsecutives(subject, subjectLw, query, queryLw, i, j, start);
                        }

                        var align = scoreDiag + Scorer.ScoreCharacter(i, j, start, acroScore, cscScore);

                        if (align > score)
                        {
                            score    = align;
                            missLeft = missBudget;
                        }
                        else
                        {
                            if (recordMiss && --missLeft <= 0)
                            {
                                return(Math.Max(score, scoreRow[n - 1]) * sz);
                            }
                            else
                            {
                                recordMiss = false;
                            }
                        }
                    }

                    scoreDiag   = scoreUp;
                    cscDiag     = cscRow[j];
                    cscRow[j]   = cscScore;
                    scoreRow[j] = score;
                }
            }

            score = scoreRow[n - 1];

            return(score * sz);
        }