Beispiel #1
0
        // ================================================= Helper Methods ================================================

        /// <summary>
        /// Determines whether the transition from lastType to type indicates a break
        /// </summary>
        /// <param name="lastType"> Last subword type </param>
        /// <param name="type"> Current subword type </param>
        /// <returns> <c>true</c> if the transition indicates a break, <c>false</c> otherwise </returns>
        private bool IsBreak(int lastType, int type)
        {
            if ((type & lastType) != 0)
            {
                return(false);
            }

            if (!splitOnCaseChange && WordDelimiterFilter.IsAlpha(lastType) && WordDelimiterFilter.IsAlpha(type))
            {
                // ALPHA->ALPHA: always ignore if case isn't considered.
                return(false);
            }
            else if (WordDelimiterFilter.IsUpper(lastType) && WordDelimiterFilter.IsAlpha(type))
            {
                // UPPER->letter: Don't split
                return(false);
            }
            else if (!splitOnNumerics && ((WordDelimiterFilter.IsAlpha(lastType) && WordDelimiterFilter.IsDigit(type)) || (WordDelimiterFilter.IsDigit(lastType) && WordDelimiterFilter.IsAlpha(type))))
            {
                // ALPHA->NUMERIC, NUMERIC->ALPHA :Don't split
                return(false);
            }

            return(true);
        }
Beispiel #2
0
 /// <summary>
 /// Determines if the text at the given position indicates an English possessive which should be removed
 /// </summary>
 /// <param name="pos"> Position in the text to check if it indicates an English possessive </param>
 /// <returns> <c>true</c> if the text at the position indicates an English posessive, <c>false</c> otherwise </returns>
 private bool EndsWithPossessive(int pos)
 {
     return(stemEnglishPossessive &&
            pos > 2 &&
            text[pos - 2] == '\'' &&
            (text[pos - 1] == 's' || text[pos - 1] == 'S') &&
            WordDelimiterFilter.IsAlpha(CharType(text[pos - 3])) &&
            (pos == endBounds || WordDelimiterFilter.IsSubwordDelim(CharType(text[pos]))));
 }
Beispiel #3
0
        /// <summary>
        /// Set the internal word bounds (remove leading and trailing delimiters). Note, if a possessive is found, don't remove
        /// it yet, simply note it.
        /// </summary>
        private void SetBounds()
        {
            while (startBounds < length && (WordDelimiterFilter.IsSubwordDelim(CharType(text[startBounds]))))
            {
                startBounds++;
            }

            while (endBounds > startBounds && (WordDelimiterFilter.IsSubwordDelim(CharType(text[endBounds - 1]))))
            {
                endBounds--;
            }
            if (EndsWithPossessive(endBounds))
            {
                hasFinalPossessive = true;
            }
            current = startBounds;
        }
Beispiel #4
0
        /// <summary>
        /// Advance to the next subword in the string.
        /// </summary>
        /// <returns> index of the next subword, or <see cref="DONE"/> if all subwords have been returned </returns>
        internal int Next()
        {
            current = end;
            if (current == DONE)
            {
                return(DONE);
            }

            if (skipPossessive)
            {
                current       += 2;
                skipPossessive = false;
            }

            int lastType = 0;

            while (current < endBounds && (WordDelimiterFilter.IsSubwordDelim(lastType = CharType(text[current]))))
            {
                current++;
            }

            if (current >= endBounds)
            {
                return(end = DONE);
            }

            for (end = current + 1; end < endBounds; end++)
            {
                int type = CharType(text[end]);
                if (IsBreak(lastType, type))
                {
                    break;
                }
                lastType = type;
            }

            if (end < endBounds - 1 && EndsWithPossessive(end + 2))
            {
                skipPossessive = true;
            }

            return(end);
        }
Beispiel #5
0
 public WordDelimiterConcatenation(WordDelimiterFilter outerInstance)
 {
     this.outerInstance = outerInstance;
 }
Beispiel #6
0
 public OffsetSorter(WordDelimiterFilter outerInstance)
 {
     this.outerInstance = outerInstance;
 }