/// <summary> /// Returns the next, stemmed, input Token. /// </summary> /// <returns> /// The stemed form of a token. /// </returns> /// <throws>IOException</throws> public override Token Next() { Token token = input.Next(); if (token == null) { return(null); } else { string str = stemmer.stem(token.TermText()); //if ((System.Object) str != token.TermText()) if (!str.Equals(token.TermText())) { // Yes, I mean object reference comparison here //token.TermText() = str; return(new Token(str, token.StartOffset(), token.EndOffset(), token.Type())); } return(token); } }
/// <summary>Returns the next token in the stream, or null at EOS. /// <p>Removes <tt>'s</tt> from the end of words. /// <p>Removes dots from acronyms. /// </summary> public override Token Next(/* in */ Token reusableToken) { System.Diagnostics.Debug.Assert(reusableToken != null); Token nextToken = input.Next(reusableToken); if (nextToken == null) { return(null); } char[] buffer = nextToken.TermBuffer(); int bufferLength = nextToken.TermLength(); System.String type = nextToken.Type(); if (type == APOSTROPHE_TYPE && bufferLength >= 2 && buffer[bufferLength - 2] == '\'' && (buffer[bufferLength - 1] == 's' || buffer[bufferLength - 1] == 'S')) { // Strip last 2 characters off nextToken.SetTermLength(bufferLength - 2); } else if (type == ACRONYM_TYPE) { // remove dots int upto = 0; for (int i = 0; i < bufferLength; i++) { char c = buffer[i]; if (c != '.') { buffer[upto++] = c; } } nextToken.SetTermLength(upto); } return(nextToken); }
/// <summary>Returns the next token in the stream, or null at EOS. /// <p>Removes <tt>'s</tt> from the end of words. /// <p>Removes dots from acronyms. /// </summary> public override Token Next(Token result) { Token t = input.Next(result); if (t == null) { return(null); } char[] buffer = t.TermBuffer(); int bufferLength = t.TermLength(); System.String type = t.Type(); if (type == APOSTROPHE_TYPE && bufferLength >= 2 && buffer[bufferLength - 2] == '\'' && (buffer[bufferLength - 1] == 's' || buffer[bufferLength - 1] == 'S')) { // Strip last 2 characters off t.SetTermLength(bufferLength - 2); } else if (type == ACRONYM_TYPE) { // remove dots int upto = 0; for (int i = 0; i < bufferLength; i++) { char c = buffer[i]; if (c != '.') { buffer[upto++] = c; } } t.SetTermLength(upto); } return(t); }
/// <summary>Returns the next input Token, after being stemmed </summary> public override Token Next() { Token token = input.Next(); if (token == null) { return(null); } stemmer.SetCurrent(token.TermText()); try { stemMethod.Invoke(stemmer, (System.Object[])EMPTY_ARGS); } catch (System.Exception e) { throw new System.SystemException(e.ToString()); } Token newToken = new Token(stemmer.GetCurrent(), token.StartOffset(), token.EndOffset(), token.Type()); newToken.SetPositionIncrement(token.GetPositionIncrement()); return(newToken); }