/// <returns> Returns the next token in the stream, or null at EOS /// </returns> public override Token Next() { if ((token = input.Next()) == null) { return null; } else { System.String s = stemmer.Stem(token.TermText()); if (!s.Equals(token.TermText())) { return new Token(s, token.StartOffset(), token.EndOffset(), token.Type()); } return token; } }
// Tokenizes the fields of a document into Postings. private void InvertDocument(Document doc) { foreach (Field field in doc.Fields()) { System.String fieldName = field.Name(); int fieldNumber = fieldInfos.FieldNumber(fieldName); int length = fieldLengths[fieldNumber]; // length of Field int position = fieldPositions[fieldNumber]; // position in Field if (field.IsIndexed()) { if (!field.IsTokenized()) { // un-tokenized Field AddPosition(fieldName, field.StringValue(), position++); length++; } else { System.IO.TextReader reader; // find or make Reader if (field.ReaderValue() != null) { reader = field.ReaderValue(); } else if (field.StringValue() != null) { reader = new System.IO.StringReader(field.StringValue()); } else { throw new System.ArgumentException("Field must have either String or Reader value"); } // Tokenize Field and add to postingTable TokenStream stream = analyzer.TokenStream(fieldName, reader); try { for (Token t = stream.Next(); t != null; t = stream.Next()) { position += (t.GetPositionIncrement() - 1); AddPosition(fieldName, t.TermText(), position++); if (++length > maxFieldLength) { break; } } } finally { stream.Close(); } } fieldLengths[fieldNumber] = length; // save Field length fieldPositions[fieldNumber] = position; // save Field position fieldBoosts[fieldNumber] *= field.GetBoost(); } } }
/// <returns> Returns the next token in the stream, or null at EOS /// </returns> public override Token Next() { if ((token = input.Next()) == null) { return(null); } else { System.String s = stemmer.Stem(token.TermText()); if (!s.Equals(token.TermText())) { return(new Token(s, token.StartOffset(), token.EndOffset(), token.Type())); } return(token); } }
/// <returns> Returns the next token in the stream, or null at EOS /// </returns> public override Token Next() { if ((token = input.Next()) == null) { return(null); } // Check the exclusiontable else if (exclusionSet != null && exclusionSet.Contains(token.TermText())) { return(token); } else { System.String s = stemmer.Stem(token.TermText()); // If not stemmed, dont waste the time creating a new token if (!s.Equals(token.TermText())) { return(new Token(s, token.StartOffset(), token.EndOffset(), token.Type())); } return(token); } }
/// <returns> Returns the next token in the stream, or null at EOS /// </returns> public override Token Next() { if ((token = input.Next()) == null) { return null; } // Check the exclusiontable else if (exclusionSet != null && exclusionSet.Contains(token.TermText())) { return token; } else { System.String s = stemmer.Stem(token.TermText()); // If not stemmed, dont waste the time creating a new token if (!s.Equals(token.TermText())) { return new Token(s, token.StartOffset(), token.EndOffset(), token.Type()); } return token; } }
public QueryTermVector(System.String queryString, Analyzer analyzer) { if (analyzer != null) { TokenStream stream = analyzer.TokenStream("", new System.IO.StringReader(queryString)); if (stream != null) { Token next = null; System.Collections.ArrayList terms = new System.Collections.ArrayList(); try { while ((next = stream.Next()) != null) { terms.Add(next.TermText()); } ProcessTerms((System.String[])terms.ToArray(typeof(System.String))); } catch (System.IO.IOException) { } } } }
public override Token Next() { Token t = input.Next(); if (t == null) { return(null); } System.String txt = t.TermText(); char[] chArray = txt.ToCharArray(); for (int i = 0; i < chArray.Length; i++) { chArray[i] = RussianCharsets.ToLowerCase(chArray[i], charset); } System.String newTxt = new System.String(chArray); // create new token Token newToken = new Token(newTxt, t.StartOffset(), t.EndOffset()); return(newToken); }