/// <summary>
        /// Final touch of a shingle token before it is passed on to the consumer from method {@link #next(org.apache.lucene.analysis.Token)}.
        /// 
        /// Calculates and sets type, flags, position increment, start/end offsets and weight.
        /// </summary>
        /// <param name="token">Shingle Token</param>
        /// <param name="shingle">Tokens used to produce the shingle token.</param>
        /// <param name="currentPermutationStartOffset">Start offset in parameter currentPermutationTokens</param>
        /// <param name="currentPermutationRows">index to Matrix.Column.Row from the position of tokens in parameter currentPermutationTokens</param>
        /// <param name="currentPermuationTokens">tokens of the current permutation of rows in the matrix. </param>
        public void UpdateToken(Token token, List<Token> shingle, int currentPermutationStartOffset, List<Row> currentPermutationRows, List<Token> currentPermuationTokens)
        {
            token.SetType(typeof(ShingleMatrixFilter).Name);
            token.SetFlags(0);
            token.SetPositionIncrement(1);
            token.SetStartOffset((shingle[0]).StartOffset());
            token.SetEndOffset(shingle[shingle.Count - 1].EndOffset());

            _settingsCodec.SetWeight(
                token, 
                CalculateShingleWeight(token, shingle, currentPermutationStartOffset, currentPermutationRows, currentPermuationTokens)
                );
        }
Ejemplo n.º 2
0
        public override Token Next(/* in */ Token reusableToken)
        {
            System.Diagnostics.Debug.Assert(reusableToken != null);
            reusableToken.Clear();
            int length = 0;
            int start  = bufferIndex;

            char[] buffer = reusableToken.TermBuffer();
            while (true)
            {
                if (bufferIndex >= dataLen)
                {
                    offset += dataLen;
                    dataLen = input is Lucene.Net.Index.ReusableStringReader ? ((Lucene.Net.Index.ReusableStringReader)input).Read(ioBuffer) : input.Read((System.Char[])ioBuffer, 0, ioBuffer.Length);
                    if (dataLen <= 0)
                    {
                        if (length > 0)
                        {
                            break;
                        }
                        else
                        {
                            return(null);
                        }
                    }
                    bufferIndex = 0;
                }

                char c = ioBuffer[bufferIndex++];

                if (IsTokenChar(c))
                {
                    // if it's a token char

                    if (length == 0)
                    {
                        // start of token
                        start = offset + bufferIndex - 1;
                    }
                    else if (length == buffer.Length)
                    {
                        buffer = reusableToken.ResizeTermBuffer(1 + length);
                    }

                    buffer[length++] = Normalize(c); // buffer it, normalized

                    if (length == MAX_WORD_LEN)
                    {
                        // buffer overflow!
                        break;
                    }
                }
                else if (length > 0)
                {
                    // at non-Letter w/ chars
                    break; // return 'em
                }
            }

            reusableToken.SetTermLength(length);
            reusableToken.SetStartOffset(start);
            reusableToken.SetEndOffset(start + length);
            return(reusableToken);
        }
 /// <summary>
 /// The default implementation adds last prefix token end offset to the suffix token start and end offsets.
 /// </summary>
 /// <param name="suffixToken">a token from the suffix stream</param>
 /// <param name="lastPrefixToken">the last token from the prefix stream</param>
 /// <returns>consumer token</returns>
 public virtual Token UpdateSuffixToken(Token suffixToken, Token lastPrefixToken)
 {
     suffixToken.SetStartOffset(lastPrefixToken.EndOffset() + suffixToken.StartOffset());
     suffixToken.SetEndOffset(lastPrefixToken.EndOffset() + suffixToken.EndOffset());
     return suffixToken;
 }
Ejemplo n.º 4
0
        public override Token Next(/* in */ Token reusableToken)
        {
            System.Diagnostics.Debug.Assert(reusableToken != null);
            reusableToken.Clear();
            int length = 0;
            int start = bufferIndex;
            char[] buffer = reusableToken.TermBuffer();
            while (true)
            {

                if (bufferIndex >= dataLen)
                {
                    offset += dataLen;
                    dataLen = input is Lucene.Net.Index.ReusableStringReader ? ((Lucene.Net.Index.ReusableStringReader) input).Read(ioBuffer) : input.Read((System.Char[]) ioBuffer, 0, ioBuffer.Length);
                    if (dataLen <= 0)
                    {
                        if (length > 0)
                            break;
                        else
                            return null;
                    }
                    bufferIndex = 0;
                }

                char c = ioBuffer[bufferIndex++];

                if (IsTokenChar(c))
                {
                    // if it's a token char

                    if (length == 0)
                        // start of token
                        start = offset + bufferIndex - 1;
                    else if (length == buffer.Length)
                        buffer = reusableToken.ResizeTermBuffer(1 + length);

                    buffer[length++] = Normalize(c); // buffer it, normalized

                    if (length == MAX_WORD_LEN)
                        // buffer overflow!
                        break;
                }
                else if (length > 0)
                    // at non-Letter w/ chars
                    break; // return 'em
            }

            reusableToken.SetTermLength(length);
            reusableToken.SetStartOffset(start);
            reusableToken.SetEndOffset(start + length);
            return reusableToken;
        }
 public Token UpdateSuffixToken(Token suffixToken, Token lastInputToken)
 {
     suffixToken.SetStartOffset(lastInputToken.EndOffset() + suffixToken.StartOffset());
     suffixToken.SetEndOffset(lastInputToken.EndOffset() + suffixToken.EndOffset());
     return suffixToken;
 }
 public Token UpdateInputToken(Token inputToken, Token lastPrefixToken)
 {
     inputToken.SetStartOffset(lastPrefixToken.EndOffset() + inputToken.StartOffset());
     inputToken.SetEndOffset(lastPrefixToken.EndOffset() + inputToken.EndOffset());
     return inputToken;
 }