// ================================================= Helper Methods ================================================ /// <summary> /// Determines whether the transition from lastType to type indicates a break /// </summary> /// <param name="lastType"> Last subword type </param> /// <param name="type"> Current subword type </param> /// <returns> <c>true</c> if the transition indicates a break, <c>false</c> otherwise </returns> private bool IsBreak(int lastType, int type) { if ((type & lastType) != 0) { return(false); } if (!splitOnCaseChange && WordDelimiterFilter.IsAlpha(lastType) && WordDelimiterFilter.IsAlpha(type)) { // ALPHA->ALPHA: always ignore if case isn't considered. return(false); } else if (WordDelimiterFilter.IsUpper(lastType) && WordDelimiterFilter.IsAlpha(type)) { // UPPER->letter: Don't split return(false); } else if (!splitOnNumerics && ((WordDelimiterFilter.IsAlpha(lastType) && WordDelimiterFilter.IsDigit(type)) || (WordDelimiterFilter.IsDigit(lastType) && WordDelimiterFilter.IsAlpha(type)))) { // ALPHA->NUMERIC, NUMERIC->ALPHA :Don't split return(false); } return(true); }
/// <summary> /// Determines if the text at the given position indicates an English possessive which should be removed /// </summary> /// <param name="pos"> Position in the text to check if it indicates an English possessive </param> /// <returns> <c>true</c> if the text at the position indicates an English posessive, <c>false</c> otherwise </returns> private bool EndsWithPossessive(int pos) { return(stemEnglishPossessive && pos > 2 && text[pos - 2] == '\'' && (text[pos - 1] == 's' || text[pos - 1] == 'S') && WordDelimiterFilter.IsAlpha(CharType(text[pos - 3])) && (pos == endBounds || WordDelimiterFilter.IsSubwordDelim(CharType(text[pos])))); }
/// <summary> /// Set the internal word bounds (remove leading and trailing delimiters). Note, if a possessive is found, don't remove /// it yet, simply note it. /// </summary> private void SetBounds() { while (startBounds < length && (WordDelimiterFilter.IsSubwordDelim(CharType(text[startBounds])))) { startBounds++; } while (endBounds > startBounds && (WordDelimiterFilter.IsSubwordDelim(CharType(text[endBounds - 1])))) { endBounds--; } if (EndsWithPossessive(endBounds)) { hasFinalPossessive = true; } current = startBounds; }
/// <summary> /// Advance to the next subword in the string. /// </summary> /// <returns> index of the next subword, or <see cref="DONE"/> if all subwords have been returned </returns> internal int Next() { current = end; if (current == DONE) { return(DONE); } if (skipPossessive) { current += 2; skipPossessive = false; } int lastType = 0; while (current < endBounds && (WordDelimiterFilter.IsSubwordDelim(lastType = CharType(text[current])))) { current++; } if (current >= endBounds) { return(end = DONE); } for (end = current + 1; end < endBounds; end++) { int type = CharType(text[end]); if (IsBreak(lastType, type)) { break; } lastType = type; } if (end < endBounds - 1 && EndsWithPossessive(end + 2)) { skipPossessive = true; } return(end); }
public WordDelimiterConcatenation(WordDelimiterFilter outerInstance) { this.outerInstance = outerInstance; }
public OffsetSorter(WordDelimiterFilter outerInstance) { this.outerInstance = outerInstance; }