static private isSubwordDelim ( int type ) : bool | ||
type | int | Word type to check |
return | bool |
/// <summary> /// Set the internal word bounds (remove leading and trailing delimiters). Note, if a possessive is found, don't remove /// it yet, simply note it. /// </summary> private void setBounds() { while (startBounds < length && (WordDelimiterFilter.isSubwordDelim(charType(text[startBounds])))) { startBounds++; } while (endBounds > startBounds && (WordDelimiterFilter.isSubwordDelim(charType(text[endBounds - 1])))) { endBounds--; } if (endsWithPossessive(endBounds)) { hasFinalPossessive = true; } current = startBounds; }
/// <summary> /// Advance to the next subword in the string. /// </summary> /// <returns> index of the next subword, or <seealso cref="#DONE"/> if all subwords have been returned </returns> internal int next() { current = end; if (current == DONE) { return(DONE); } if (skipPossessive) { current += 2; skipPossessive = false; } int lastType = 0; while (current < endBounds && (WordDelimiterFilter.isSubwordDelim(lastType = charType(text[current])))) { current++; } if (current >= endBounds) { return(end = DONE); } for (end = current + 1; end < endBounds; end++) { int type_Renamed = charType(text[end]); if (isBreak(lastType, type_Renamed)) { break; } lastType = type_Renamed; } if (end < endBounds - 1 && endsWithPossessive(end + 2)) { skipPossessive = true; } return(end); }
/// <summary> /// Determines if the text at the given position indicates an English possessive which should be removed /// </summary> /// <param name="pos"> Position in the text to check if it indicates an English possessive </param> /// <returns> {@code true} if the text at the position indicates an English posessive, {@code false} otherwise </returns> private bool endsWithPossessive(int pos) { return(stemEnglishPossessive && pos > 2 && text[pos - 2] == '\'' && (text[pos - 1] == 's' || text[pos - 1] == 'S') && WordDelimiterFilter.isAlpha(charType(text[pos - 3])) && (pos == endBounds || WordDelimiterFilter.isSubwordDelim(charType(text[pos])))); }