/// <summary> /// Final touch of a shingle token before it is passed on to the consumer from method {@link #next(org.apache.lucene.analysis.Token)}. /// /// Calculates and sets type, flags, position increment, start/end offsets and weight. /// </summary> /// <param name="token">Shingle Token</param> /// <param name="shingle">Tokens used to produce the shingle token.</param> /// <param name="currentPermutationStartOffset">Start offset in parameter currentPermutationTokens</param> /// <param name="currentPermutationRows">index to Matrix.Column.Row from the position of tokens in parameter currentPermutationTokens</param> /// <param name="currentPermuationTokens">tokens of the current permutation of rows in the matrix. </param> public void UpdateToken(Token token, List<Token> shingle, int currentPermutationStartOffset, List<Row> currentPermutationRows, List<Token> currentPermuationTokens) { token.SetType(typeof(ShingleMatrixFilter).Name); token.SetFlags(0); token.SetPositionIncrement(1); token.SetStartOffset((shingle[0]).StartOffset()); token.SetEndOffset(shingle[shingle.Count - 1].EndOffset()); _settingsCodec.SetWeight( token, CalculateShingleWeight(token, shingle, currentPermutationStartOffset, currentPermutationRows, currentPermuationTokens) ); }
public override Token Next(/* in */ Token reusableToken) { System.Diagnostics.Debug.Assert(reusableToken != null); reusableToken.Clear(); int length = 0; int start = bufferIndex; char[] buffer = reusableToken.TermBuffer(); while (true) { if (bufferIndex >= dataLen) { offset += dataLen; dataLen = input is Lucene.Net.Index.ReusableStringReader ? ((Lucene.Net.Index.ReusableStringReader)input).Read(ioBuffer) : input.Read((System.Char[])ioBuffer, 0, ioBuffer.Length); if (dataLen <= 0) { if (length > 0) { break; } else { return(null); } } bufferIndex = 0; } char c = ioBuffer[bufferIndex++]; if (IsTokenChar(c)) { // if it's a token char if (length == 0) { // start of token start = offset + bufferIndex - 1; } else if (length == buffer.Length) { buffer = reusableToken.ResizeTermBuffer(1 + length); } buffer[length++] = Normalize(c); // buffer it, normalized if (length == MAX_WORD_LEN) { // buffer overflow! break; } } else if (length > 0) { // at non-Letter w/ chars break; // return 'em } } reusableToken.SetTermLength(length); reusableToken.SetStartOffset(start); reusableToken.SetEndOffset(start + length); return(reusableToken); }
/// <summary> /// The default implementation adds last prefix token end offset to the suffix token start and end offsets. /// </summary> /// <param name="suffixToken">a token from the suffix stream</param> /// <param name="lastPrefixToken">the last token from the prefix stream</param> /// <returns>consumer token</returns> public virtual Token UpdateSuffixToken(Token suffixToken, Token lastPrefixToken) { suffixToken.SetStartOffset(lastPrefixToken.EndOffset() + suffixToken.StartOffset()); suffixToken.SetEndOffset(lastPrefixToken.EndOffset() + suffixToken.EndOffset()); return suffixToken; }
public override Token Next(/* in */ Token reusableToken) { System.Diagnostics.Debug.Assert(reusableToken != null); reusableToken.Clear(); int length = 0; int start = bufferIndex; char[] buffer = reusableToken.TermBuffer(); while (true) { if (bufferIndex >= dataLen) { offset += dataLen; dataLen = input is Lucene.Net.Index.ReusableStringReader ? ((Lucene.Net.Index.ReusableStringReader) input).Read(ioBuffer) : input.Read((System.Char[]) ioBuffer, 0, ioBuffer.Length); if (dataLen <= 0) { if (length > 0) break; else return null; } bufferIndex = 0; } char c = ioBuffer[bufferIndex++]; if (IsTokenChar(c)) { // if it's a token char if (length == 0) // start of token start = offset + bufferIndex - 1; else if (length == buffer.Length) buffer = reusableToken.ResizeTermBuffer(1 + length); buffer[length++] = Normalize(c); // buffer it, normalized if (length == MAX_WORD_LEN) // buffer overflow! break; } else if (length > 0) // at non-Letter w/ chars break; // return 'em } reusableToken.SetTermLength(length); reusableToken.SetStartOffset(start); reusableToken.SetEndOffset(start + length); return reusableToken; }
public Token UpdateSuffixToken(Token suffixToken, Token lastInputToken) { suffixToken.SetStartOffset(lastInputToken.EndOffset() + suffixToken.StartOffset()); suffixToken.SetEndOffset(lastInputToken.EndOffset() + suffixToken.EndOffset()); return suffixToken; }
public Token UpdateInputToken(Token inputToken, Token lastPrefixToken) { inputToken.SetStartOffset(lastPrefixToken.EndOffset() + inputToken.StartOffset()); inputToken.SetEndOffset(lastPrefixToken.EndOffset() + inputToken.EndOffset()); return inputToken; }