Beispiel #1
0
        public StringScorerQuery(string query, string charRegex, string pathSeparator)
        {
            if (string.IsNullOrEmpty(query))
            {
                throw new StringScorerException("Query cannot be null or empty");
            }

            _query         = query;
            _charRegex     = charRegex ?? CONST_OPT_CHAR_RGX;
            _pathSeparator = pathSeparator;
            _coreChars     = GetCoreChars(query, _charRegex);
            _ext           = ScorerUtil.GetExtension(QueryLw);
            _charCode      = GetCharCodes(QueryLw);
            _depth         = ScorerUtil.CountDir(query, query.Length, Convert.ToChar(_pathSeparator));
        }
        private static bool IsAcronymFullWord(string subject, string subjectLw, string query, int nbAcronymInQuery)
        {
            var m     = subject.Length;
            var n     = query.Length;
            var count = 0;

            if (m > 12 * n)
            {
                return(false);
            }

            var i = -1;

            while (++i < m)
            {
                if (ScorerUtil.IsWordStart(i, subject, subjectLw) && ++count > nbAcronymInQuery)
                {
                    return(false);
                }
            }

            return(true);
        }
Beispiel #3
0
        private static int[] ComputeMatch(string subject, string subject_lw, StringScorerQuery preparedQuery, int offset = 0)
        {
            var query      = preparedQuery.Query;
            var query_lw   = preparedQuery.QueryLw;
            var m          = subject.Length;
            var n          = query.Length;
            var acro_score = AcronymResult.ScoreAcronyms(subject, subject_lw, query, query_lw).Score;

            var score_row = new double[n];
            var csc_row   = new double[n];

            var trace = new double[m * n];
            var pos   = -1;
            var j     = -1;

            while (++j < n)
            {
                score_row[j] = 0;
                csc_row[j]   = 0;
            }

            var   i    = -1;
            Moves?move = null;

            while (++i < m)
            {
                var score    = 0.0;
                var score_up = 0.0;
                var csc_diag = 0.0;
                var si_lw    = subject_lw[i];

                j = -1;

                while (++j < n)
                {
                    var csc_score  = 0.0;
                    var align      = 0.0;
                    var score_diag = score_up;

                    if (query_lw[j] == si_lw)
                    {
                        var start = ScorerUtil.IsWordStart(i, subject, subject_lw);
                        csc_score = csc_diag > 0 ? csc_diag : Scorer.ScoreConsecutives(subject, subject_lw, query, query_lw, i, j, start);
                        align     = score_diag + Scorer.ScoreCharacter(i, j, start, acro_score, csc_score);
                    }

                    score_up = score_row[j];
                    csc_diag = csc_row[j];

                    if (score > score_up)
                    {
                        move = Moves.LEFT;
                    }
                    else
                    {
                        score = score_up;
                        move  = Moves.UP;
                    }

                    if (align > score)
                    {
                        score = align;
                        move  = Moves.DIAGONAL;
                    }
                    else
                    {
                        csc_score = 0;
                    }

                    score_row[j] = score;
                    csc_row[j]   = csc_score;
                    trace[++pos] = score > 0 ? (int)move.Value : (int)Moves.STOP;
                }
            }

            i   = m - 1;
            j   = n - 1;
            pos = i * n + j;

            var backtrack = true;
            var matches   = new List <int>();

            while (backtrack && i >= 0 && j >= 0)
            {
                switch ((Moves)trace[pos])
                {
                case Moves.UP:
                    i--;
                    pos -= n;
                    break;

                case Moves.LEFT:
                    j--;
                    pos--;
                    break;

                case Moves.DIAGONAL:
                    matches.Add(i + offset);
                    j--;
                    i--;
                    pos -= n + 1;
                    break;

                default:
                    backtrack = false;
                    break;
                }
            }

            matches.Reverse();

            return(matches.ToArray());
        }
        public static AcronymResult ScoreAcronyms(string subject, string subjectLw, string query, string queryLw)
        {
            var m = subject.Length;
            var n = query.Length;

            if (m <= 1 && n <= 1)
            {
                return(CreateEmptyAcronymResult());
            }

            var    count    = 0;
            var    sepCount = 0;
            var    sumPos   = 0;
            var    sameCase = 0;
            double score    = 0;

            var i = -1;
            var j = -1;

            while (++j < n)
            {
                var qjLw = queryLw[j];

                if (ScorerUtil.IsSeparator(qjLw))
                {
                    i = subjectLw.IndexOf(qjLw, i + 1);
                    if (i > -1)
                    {
                        sepCount++;
                        continue;
                        ;
                    }
                    else
                    {
                        break;
                    }
                }

                while (++i < m)
                {
                    if (qjLw == subjectLw[i] && ScorerUtil.IsWordStart(i, subject, subjectLw))
                    {
                        if (query[j] == subject[i])
                        {
                            sameCase++;
                        }
                        sumPos += i;
                        count++;
                        break;
                    }
                }

                if (i == m)
                {
                    break;
                }
            }

            if (count < 2)
            {
                return(CreateEmptyAcronymResult());
            }

            var isFullWord = false;

            if (count == n)
            {
                isFullWord = IsAcronymFullWord(subject, subjectLw, query, count);
            }

            score = Scorer.ScorePattern(count, n, sameCase, true, isFullWord);

            return(new AcronymResult(score, (double)sumPos / count, count + sepCount));
        }