Ejemplo n.º 1
0
            internal bool IsBetterThan(SplitSetScore s1)
            {
                if (s1 == null)
                {
                    return(true);
                }

                if ((_lettersInWordCount + _lettersInTokenCount) != (s1._lettersInWordCount + s1._lettersInTokenCount))
                {
                    return((_lettersInWordCount + _lettersInTokenCount) > (s1._lettersInWordCount + s1._lettersInTokenCount));
                }

                if (_lettersInWordCount != s1._lettersInWordCount)
                {
                    return(_lettersInWordCount > s1._lettersInWordCount);
                }

                if (_totalSplitsCount != s1._totalSplitsCount)
                {
                    return(_totalSplitsCount < s1._totalSplitsCount);
                }

                if (_identifiedSplitCount != s1._identifiedSplitCount)
                {
                    return(_identifiedSplitCount > s1._identifiedSplitCount);
                }

                return(true);
            }
Ejemplo n.º 2
0
        /// <summary>
        /// Performs split specific to this splitter
        /// </summary>
        /// <returns>List of splits</returns>
        private List <SplitWithIdentification> BestSuffixSplit(string identifier)
        {
            // find best split for whole word
            _term           = identifier.ToLowerInvariant().ToCharArray();
            _splitSetScores = new SplitSetScore[_term.Length];
            SplitSetScore wholeWordSplitSetScore = GetBestSplitSetScore(0);

            return(wholeWordSplitSetScore.GetSplitWithIdentifications(_term));
        }
Ejemplo n.º 3
0
            internal List <SplitWithIdentification> GetSplitWithIdentifications(char[] term)
            {
                SplitSetScore currentSplitSetScore    = this;
                List <SplitWithIdentification> result = new List <SplitWithIdentification>();

                do
                {
                    SplitWithIdentification splitWithIdentification = new SplitWithIdentification(new string(term, currentSplitSetScore._startIndex, currentSplitSetScore._endIndex - currentSplitSetScore._startIndex + 1), currentSplitSetScore._identification);
                    result.Add(splitWithIdentification);
                    currentSplitSetScore = currentSplitSetScore._nextSplitSetScore;
                } while (currentSplitSetScore != null);

                return(result);
            }
Ejemplo n.º 4
0
            private void AddScore(SplitSetScore s1)
            {
                _nextSplitSetScore = s1;

                // we could jump from score to score to get counts, but since the count is checked very often add them to the parent's count
                // startIndex and endIndex will be examined from next score
                if (_nextSplitSetScore == null)
                {
                    return;
                }

                _identifiedSplitCount += _nextSplitSetScore._identifiedSplitCount;
                _lettersInWordCount   += _nextSplitSetScore._lettersInWordCount;
                _lettersInTokenCount  += _nextSplitSetScore._lettersInTokenCount;
                _totalSplitsCount     += _nextSplitSetScore._totalSplitsCount;
            }
Ejemplo n.º 5
0
 /// <summary>
 /// Updates Score in array and return it
 /// </summary>
 /// <param name="startIndex">Start index of term to calculate split set score</param>
 /// <param name="endIndex">End index of term to calculate split set score</param>
 /// <returns>Split Set Score</returns>
 private SplitSetScore PopulateBestSplitSetScore(int startIndex, int endIndex)
 {
     for (int startPosition = endIndex; startPosition >= startIndex; startPosition--)
     {
         List <SplitPositionWithIdentification> possibleIndexes = TokenDictionary.GetPossibleEndIndexesList(_term, startPosition, endIndex);
         SplitSetScore bestSplitSetScore = null;
         for (int currentSplitPosition = startPosition; currentSplitPosition <= endIndex; currentSplitPosition++)
         {
             SplitPositionWithIdentification splitPositionWithIdentification = possibleIndexes.FirstOrDefault(x => x.Position == currentSplitPosition) ??
                                                                               new SplitPositionWithIdentification(currentSplitPosition, SplitIdentification.Unidentified);
             SplitSetScore splitSetScore = new SplitSetScore(splitPositionWithIdentification, GetBestSplitSetScore(currentSplitPosition + 1), startPosition);
             if (splitSetScore.IsBetterThan(bestSplitSetScore))
             {
                 bestSplitSetScore = splitSetScore;
             }
         }
         _splitSetScores[startPosition] = bestSplitSetScore;
     }
     return(_splitSetScores[startIndex]);
 }
Ejemplo n.º 6
0
            internal SplitSetScore(SplitPositionWithIdentification splitPositionWithIdentification, SplitSetScore nextSplitSetScore, int startIndex)
            {
                _startIndex = startIndex;
                _endIndex   = splitPositionWithIdentification.Position;

                if (SplitterUtility.IsNotUnidentified(splitPositionWithIdentification.SplitIdentification))
                {
                    _identification       = splitPositionWithIdentification.SplitIdentification;
                    _identifiedSplitCount = 1;
                    int length = _endIndex - _startIndex + 1;
                    if (_identification == SplitIdentification.Identified)
                    {
                        _lettersInWordCount = length;
                    }
                    else
                    {
                        _lettersInTokenCount = length;
                    }
                }

                _totalSplitsCount = 1;
                AddScore(nextSplitSetScore);
            }