예제 #1
0
        public virtual void  TestCtor()
        {
            Token t = new Token();

            char[] content = "hello".ToCharArray();
            t.SetTermBuffer(content, 0, content.Length);
            char[] buf = t.TermBuffer();
            Assert.AreNotEqual(t.TermBuffer(), content);
            Assert.AreEqual("hello", t.Term);
            Assert.AreEqual("word", t.Type);
            Assert.AreEqual(0, t.Flags);

            t = new Token(6, 22);
            t.SetTermBuffer(content, 0, content.Length);
            Assert.AreEqual("hello", t.Term);
            Assert.AreEqual("(hello,6,22)", t.ToString());
            Assert.AreEqual("word", t.Type);
            Assert.AreEqual(0, t.Flags);

            t = new Token(6, 22, 7);
            t.SetTermBuffer(content, 0, content.Length);
            Assert.AreEqual("hello", t.Term);
            Assert.AreEqual("(hello,6,22)", t.ToString());
            Assert.AreEqual(7, t.Flags);

            t = new Token(6, 22, "junk");
            t.SetTermBuffer(content, 0, content.Length);
            Assert.AreEqual("hello", t.Term);
            Assert.AreEqual("(hello,6,22,type=junk)", t.ToString());
            Assert.AreEqual(0, t.Flags);
        }
예제 #2
0
 public virtual void  TestCtor()
 {
     Token t = new Token();
     char[] content = "hello".ToCharArray();
     t.SetTermBuffer(content, 0, content.Length);
     char[] buf = t.TermBuffer();
     Assert.AreNotEqual(t.TermBuffer(), content);
     Assert.AreEqual("hello", t.Term);
     Assert.AreEqual("word", t.Type);
     Assert.AreEqual(0, t.Flags);
     
     t = new Token(6, 22);
     t.SetTermBuffer(content, 0, content.Length);
     Assert.AreEqual("hello", t.Term);
     Assert.AreEqual("(hello,6,22)", t.ToString());
     Assert.AreEqual("word", t.Type);
     Assert.AreEqual(0, t.Flags);
     
     t = new Token(6, 22, 7);
     t.SetTermBuffer(content, 0, content.Length);
     Assert.AreEqual("hello", t.Term);
     Assert.AreEqual("(hello,6,22)", t.ToString());
     Assert.AreEqual(7, t.Flags);
     
     t = new Token(6, 22, "junk");
     t.SetTermBuffer(content, 0, content.Length);
     Assert.AreEqual("hello", t.Term);
     Assert.AreEqual("(hello,6,22,type=junk)", t.ToString());
     Assert.AreEqual(0, t.Flags);
 }
 public override Token Next(/* in */ Token reusableToken)
 {
     System.Diagnostics.Debug.Assert(reusableToken != null);
     if (!done)
     {
         done = true;
         int upto = 0;
         reusableToken.Clear();
         char[] buffer = reusableToken.TermBuffer();
         while (true)
         {
             int length = input.Read(buffer, upto, buffer.Length - upto);
             if (length <= 0)
             {
                 break;
             }
             upto += length;
             if (upto == buffer.Length)
             {
                 buffer = reusableToken.ResizeTermBuffer(1 + buffer.Length);
             }
         }
         reusableToken.SetTermLength(upto);
         return(reusableToken);
     }
     return(null);
 }
예제 #4
0
 public override Token Next(Token result)
 {
     result = input.Next(result);
     if (result != null)
     {
         char[] buffer = result.TermBuffer();
         int    length = result.TermLength();
         // If no characters actually require rewriting then we
         // just return token as-is:
         for (int i = 0; i < length; i++)
         {
             char c = buffer[i];
             if (c >= '\u00c0' && c <= '\u0178')
             {
                 RemoveAccents(buffer, length);
                 result.SetTermBuffer(output, 0, outputPos);
                 break;
             }
         }
         return(result);
     }
     else
     {
         return(null);
     }
 }
예제 #5
0
 public override Token Next(Token result)
 {
     if (!done)
     {
         done = true;
         int upto = 0;
         result.Clear();
         char[] buffer = result.TermBuffer();
         while (true)
         {
             int length = input.Read(buffer, upto, buffer.Length - upto);
             if (length <= 0)
             {
                 break;
             }
             upto += length;
             if (upto == buffer.Length)
             {
                 buffer = result.ResizeTermBuffer(1 + buffer.Length);
             }
         }
         result.termLength = upto;
         return(result);
     }
     return(null);
 }
예제 #6
0
        public override Token Next(/* in */ Token reusableToken)
        {
            System.Diagnostics.Debug.Assert(reusableToken != null);
            Token nextToken = input.Next(reusableToken);

            if (nextToken != null)
            {
                char[] buffer = nextToken.TermBuffer();
                int    length = nextToken.TermLength();
                // If no characters actually require rewriting then we
                // just return token as-is:
                for (int i = 0; i < length; i++)
                {
                    char c = buffer[i];
                    if (c >= '\u00c0' && c <= '\ufb06')
                    {
                        RemoveAccents(buffer, length);
                        nextToken.SetTermBuffer(output, 0, outputPos);
                        break;
                    }
                }
                return(nextToken);
            }
            else
            {
                return(null);
            }
        }
예제 #7
0
		public virtual void  TestMixedStringArray()
		{
			Token t = new Token("hello", 0, 5);
			Assert.AreEqual(t.TermText(), "hello");
			Assert.AreEqual(t.TermLength(), 5);
			Assert.AreEqual(new System.String(t.TermBuffer(), 0, 5), "hello");
			t.SetTermText("hello2");
			Assert.AreEqual(t.TermLength(), 6);
			Assert.AreEqual(new System.String(t.TermBuffer(), 0, 6), "hello2");
			t.SetTermBuffer("hello3".ToCharArray(), 0, 6);
			Assert.AreEqual(t.TermText(), "hello3");
			
			// Make sure if we get the buffer and change a character
			// that termText() reflects the change
			char[] buffer = t.TermBuffer();
			buffer[1] = 'o';
			Assert.AreEqual(t.TermText(), "hollo3");
		}
 void AddToken(Token oriToken, int termBufferOffset, int termBufferLength, byte type)
 {
     Token token = new Token(oriToken.TermBuffer(), termBufferOffset, termBufferLength,
         oriToken.StartOffset + termBufferOffset, oriToken.StartOffset + termBufferOffset + termBufferLength);
     if (type == (byte)UnicodeCategory.DecimalDigitNumber)
         token.Type = Word.TYPE_DIGIT;
     else
         token.Type = Word.TYPE_LETTER;
     tokenQueue.Enqueue(token);
 }
예제 #9
0
 public override Token Next(Token result)
 {
     result = input.Next(result);
     if (result != null)
     {
         if (stemmer.Stem(result.TermBuffer(), 0, result.termLength))
             result.SetTermBuffer(stemmer.GetResultBuffer(), 0, stemmer.GetResultLength());
         return result;
     }
     else
         return null;
 }
예제 #10
0
 public virtual void  TestResize()
 {
     Token t = new Token();
     char[] content = "hello".ToCharArray();
     t.SetTermBuffer(content, 0, content.Length);
     for (int i = 0; i < 2000; i++)
     {
         t.ResizeTermBuffer(i);
         Assert.IsTrue(i <= t.TermBuffer().Length);
         Assert.AreEqual("hello", t.Term);
     }
 }
예제 #11
0
        public override Token Next(/* in */ Token reusableToken)
        {
            System.Diagnostics.Debug.Assert(reusableToken != null);
            reusableToken.Clear();
            int length = 0;
            int start = bufferIndex;
            char[] buffer = reusableToken.TermBuffer();
            while (true)
            {

                if (bufferIndex >= dataLen)
                {
                    offset += dataLen;
                    dataLen = input is Lucene.Net.Index.ReusableStringReader ? ((Lucene.Net.Index.ReusableStringReader) input).Read(ioBuffer) : input.Read((System.Char[]) ioBuffer, 0, ioBuffer.Length);
                    if (dataLen <= 0)
                    {
                        if (length > 0)
                            break;
                        else
                            return null;
                    }
                    bufferIndex = 0;
                }

                char c = ioBuffer[bufferIndex++];

                if (IsTokenChar(c))
                {
                    // if it's a token char

                    if (length == 0)
                        // start of token
                        start = offset + bufferIndex - 1;
                    else if (length == buffer.Length)
                        buffer = reusableToken.ResizeTermBuffer(1 + length);

                    buffer[length++] = Normalize(c); // buffer it, normalized

                    if (length == MAX_WORD_LEN)
                        // buffer overflow!
                        break;
                }
                else if (length > 0)
                    // at non-Letter w/ chars
                    break; // return 'em
            }

            reusableToken.SetTermLength(length);
            reusableToken.SetStartOffset(start);
            reusableToken.SetEndOffset(start + length);
            return reusableToken;
        }
예제 #12
0
        public virtual void  TestResize()
        {
            Token t = new Token();

            char[] content = "hello".ToCharArray();
            t.SetTermBuffer(content, 0, content.Length);
            for (int i = 0; i < 2000; i++)
            {
                t.ResizeTermBuffer(i);
                Assert.IsTrue(i <= t.TermBuffer().Length);
                Assert.AreEqual("hello", t.Term);
            }
        }
예제 #13
0
        public override Token Next(Token token)
        {
            token.Clear();
            int length = 0;
            int start = bufferIndex;
            char[] buffer = token.TermBuffer();
            while (true)
            {

                if (bufferIndex >= dataLen)
                {
                    offset += dataLen;
                    dataLen = input is Lucene.Net.Index.DocumentsWriter.ReusableStringReader ? ((Lucene.Net.Index.DocumentsWriter.ReusableStringReader) input).Read(ioBuffer) : input.Read((System.Char[]) ioBuffer, 0, ioBuffer.Length);
                    if (dataLen <= 0)
                    {
                        if (length > 0)
                            break;
                        else
                            return null;
                    }
                    bufferIndex = 0;
                }

                char c = ioBuffer[bufferIndex++];

                if (IsTokenChar(c))
                {
                    // if it's a token char

                    if (length == 0)
                        // start of token
                        start = offset + bufferIndex - 1;
                    else if (length == buffer.Length)
                        buffer = token.ResizeTermBuffer(1 + length);

                    buffer[length++] = Normalize(c); // buffer it, normalized

                    if (length == MAX_WORD_LEN)
                        // buffer overflow!
                        break;
                }
                else if (length > 0)
                    // at non-Letter w/ chars
                    break; // return 'em
            }

            token.termLength = length;
            token.startOffset = start;
            token.endOffset = start + length;
            return token;
        }
예제 #14
0
        public virtual void  TestMixedStringArray()
        {
            Token t = new Token("hello", 0, 5);

            Assert.AreEqual(t.TermLength(), 5);
            Assert.AreEqual(t.Term, "hello");
            t.SetTermBuffer("hello2");
            Assert.AreEqual(t.TermLength(), 6);
            Assert.AreEqual(t.Term, "hello2");
            t.SetTermBuffer("hello3".ToCharArray(), 0, 6);
            Assert.AreEqual(t.Term, "hello3");

            char[] buffer = t.TermBuffer();
            buffer[1] = 'o';
            Assert.AreEqual(t.Term, "hollo3");
        }
예제 #15
0
 public override Token Next(Token result)
 {
     result = input.Next(result);
     if (result != null)
     {
         if (stemmer.Stem(result.TermBuffer(), 0, result.termLength))
         {
             result.SetTermBuffer(stemmer.GetResultBuffer(), 0, stemmer.GetResultLength());
         }
         return(result);
     }
     else
     {
         return(null);
     }
 }
예제 #16
0
        public override Token Next(Token result)
        {
            result = input.Next(result);
            if (result != null)
            {

                char[] buffer = result.TermBuffer();
                int length = result.termLength;
                for (int i = 0; i < length; i++)
                    buffer[i] = System.Char.ToLower(buffer[i]);

                return result;
            }
            else
                return null;
        }
예제 #17
0
        public override Token Next(/* in */ Token reusableToken)
        {
            System.Diagnostics.Debug.Assert(reusableToken != null);
            Token nextToken = input.Next(reusableToken);

            if (nextToken == null)
            {
                return(null);
            }

            if (stemmer.Stem(nextToken.TermBuffer(), 0, nextToken.TermLength()))
            {
                nextToken.SetTermBuffer(stemmer.GetResultBuffer(), 0, stemmer.GetResultLength());
            }

            return(nextToken);
        }
예제 #18
0
        public override Token Next(Token result)
        {
            result = input.Next(result);
            if (result != null)
            {
                char[] buffer = result.TermBuffer();
                int    length = result.termLength;
                for (int i = 0; i < length; i++)
                {
                    buffer[i] = System.Char.ToLower(buffer[i]);
                }

                return(result);
            }
            else
            {
                return(null);
            }
        }
예제 #19
0
        public virtual void  TestMixedStringArray()
        {
            Token t = new Token("hello", 0, 5);

            Assert.AreEqual(t.TermText(), "hello");
            Assert.AreEqual(t.TermLength(), 5);
            Assert.AreEqual(t.Term(), "hello");
            t.SetTermText("hello2");
            Assert.AreEqual(t.TermLength(), 6);
            Assert.AreEqual(t.Term(), "hello2");
            t.SetTermBuffer("hello3".ToCharArray(), 0, 6);
            Assert.AreEqual(t.TermText(), "hello3");

            // Make sure if we get the buffer and change a character
            // that termText() reflects the change
            char[] buffer = t.TermBuffer();
            buffer[1] = 'o';
            Assert.AreEqual(t.TermText(), "hollo3");
        }
예제 #20
0
        public virtual void  TestClone()
        {
            Token t = new Token(0, 5);

            char[] content = "hello".ToCharArray();
            t.SetTermBuffer(content, 0, 5);
            char[] buf  = t.TermBuffer();
            Token  copy = (Token)TestSimpleAttributeImpls.AssertCloneIsEqual(t);

            Assert.AreEqual(t.Term, copy.Term);
            Assert.AreNotSame(buf, copy.TermBuffer());

            Payload pl = new Payload(new byte[] { 1, 2, 3, 4 });

            t.Payload = pl;
            copy      = (Token)TestSimpleAttributeImpls.AssertCloneIsEqual(t);
            Assert.AreEqual(pl, copy.Payload);
            Assert.AreNotSame(pl, copy.Payload);
        }
예제 #21
0
        public override Token Next(Token reusableToken)
        {
            Token nextToken = input.Next(reusableToken);

            if (nextToken != null)
            {
                char[] buffer = nextToken.TermBuffer();
                int    length = nextToken.TermLength();
                for (int i = 0; i < length; i++)
                {
                    buffer[i] = System.Char.ToLower(buffer[i]);
                }

                return(nextToken);
            }
            else
            {
                return(null);
            }
        }
예제 #22
0
        /// <summary> Returns the next input Token whose termText() is not a stop word.</summary>
        public override Token Next(Token result)
        {
            // return the first non-stop word found
            int skippedPositions = 0;

            while ((result = input.Next(result)) != null)
            {
                if (!stopWords.Contains(result.TermBuffer(), 0, result.termLength))
                {
                    if (enablePositionIncrements)
                    {
                        result.SetPositionIncrement(result.GetPositionIncrement() + skippedPositions);
                    }
                    return(result);
                }
                skippedPositions += result.GetPositionIncrement();
            }
            // reached EOS -- return null
            return(null);
        }
예제 #23
0
        /// <summary> Returns the next input Token whose term() is not a stop word.</summary>
        public override Token Next(/* in */ Token reusableToken)
        {
            System.Diagnostics.Debug.Assert(reusableToken != null);
            // return the first non-stop word found
            int skippedPositions = 0;

            for (Token nextToken = input.Next(reusableToken); nextToken != null; nextToken = input.Next(reusableToken))
            {
                if (!stopWords.Contains(nextToken.TermBuffer(), 0, nextToken.TermLength()))
                {
                    if (enablePositionIncrements)
                    {
                        nextToken.SetPositionIncrement(nextToken.GetPositionIncrement() + skippedPositions);
                    }
                    return(nextToken);
                }
                skippedPositions += nextToken.GetPositionIncrement();
            }
            // reached EOS -- return null
            return(null);
        }
예제 #24
0
 public override Token Next(Token result)
 {
     if (!done)
     {
         done = true;
         int upto = 0;
         result.Clear();
         char[] buffer = result.TermBuffer();
         while (true)
         {
             int length = input.Read(buffer, upto, buffer.Length - upto);
             if (length <= 0)
                 break;
             upto += length;
             if (upto == buffer.Length)
                 buffer = result.ResizeTermBuffer(1 + buffer.Length);
         }
         result.termLength = upto;
         return result;
     }
     return null;
 }
예제 #25
0
 public override Token Next(/* in */ Token reusableToken)
 {
     System.Diagnostics.Debug.Assert(reusableToken != null);
     if (!done)
     {
         done = true;
         int upto = 0;
         reusableToken.Clear();
         char[] buffer = reusableToken.TermBuffer();
         while (true)
         {
             int length = input.Read(buffer, upto, buffer.Length - upto);
             if (length <= 0)
                 break;
             upto += length;
             if (upto == buffer.Length)
                 buffer = reusableToken.ResizeTermBuffer(1 + buffer.Length);
         }
         reusableToken.SetTermLength(upto);
         return reusableToken;
     }
     return null;
 }
        public override Token Next(Token result)
		{
            result = input.Next(result);
            if (result != null)
            {
                char[] buffer = result.TermBuffer();
                int length = result.TermLength();
                // If no characters actually require rewriting then we
                // just return token as-is:
                for (int i = 0; i < length; i++)
                {
                    char c = buffer[i];
                    if (c >= '\u00c0' && c <= '\u0178')
                    {
                        RemoveAccents(buffer, length);
                        result.SetTermBuffer(output, 0, outputPos);
                        break;
                    }
                }
                return result;
            }
            else
                return null;
        }
예제 #27
0
		/// <summary> Returns the next input Token whose termText() is not a stop word.</summary>
		public override Token Next(Token result)
		{
			// return the first non-stop word found
			int skippedPositions = 0;
			while ((result = input.Next(result)) != null)
			{
				if (!stopWords.Contains(result.TermBuffer(), 0, result.termLength))
				{
					if (enablePositionIncrements)
					{
						result.SetPositionIncrement(result.GetPositionIncrement() + skippedPositions);
					}
					return result;
				}
				skippedPositions += result.GetPositionIncrement();
			}
			// reached EOS -- return null
			return null;
		}
예제 #28
0
        public virtual void TestGrow()
        {
            Token t = new Token();

            System.Text.StringBuilder buf = new System.Text.StringBuilder("ab");
            for (int i = 0; i < 20; i++)
            {
                char[] content = buf.ToString().ToCharArray();
                t.SetTermBuffer(content, 0, content.Length);
                Assert.AreEqual(buf.Length, t.TermLength());
                Assert.AreEqual(buf.ToString(), t.Term);
                buf.Append(buf.ToString());
            }
            Assert.AreEqual(1048576, t.TermLength());
            Assert.AreEqual(1048576, t.TermBuffer().Length);

            // now as a string, first variant
            t   = new Token();
            buf = new System.Text.StringBuilder("ab");
            for (int i = 0; i < 20; i++)
            {
                System.String content = buf.ToString();
                t.SetTermBuffer(content, 0, content.Length);
                Assert.AreEqual(content.Length, t.TermLength());
                Assert.AreEqual(content, t.Term);
                buf.Append(content);
            }
            Assert.AreEqual(1048576, t.TermLength());
            Assert.AreEqual(1048576, t.TermBuffer().Length);

            // now as a string, second variant
            t   = new Token();
            buf = new System.Text.StringBuilder("ab");
            for (int i = 0; i < 20; i++)
            {
                System.String content = buf.ToString();
                t.SetTermBuffer(content);
                Assert.AreEqual(content.Length, t.TermLength());
                Assert.AreEqual(content, t.Term);
                buf.Append(content);
            }
            Assert.AreEqual(1048576, t.TermLength());
            Assert.AreEqual(1048576, t.TermBuffer().Length);

            // Test for slow growth to a long term
            t   = new Token();
            buf = new System.Text.StringBuilder("a");
            for (int i = 0; i < 20000; i++)
            {
                System.String content = buf.ToString();
                t.SetTermBuffer(content);
                Assert.AreEqual(content.Length, t.TermLength());
                Assert.AreEqual(content, t.Term);
                buf.Append("a");
            }
            Assert.AreEqual(20000, t.TermLength());
            Assert.AreEqual(32768, t.TermBuffer().Length);

            // Test for slow growth to a long term
            t   = new Token();
            buf = new System.Text.StringBuilder("a");
            for (int i = 0; i < 20000; i++)
            {
                System.String content = buf.ToString();
                t.SetTermBuffer(content);
                Assert.AreEqual(content.Length, t.TermLength());
                Assert.AreEqual(content, t.Term);
                buf.Append("a");
            }
            Assert.AreEqual(20000, t.TermLength());
            Assert.AreEqual(32768, t.TermBuffer().Length);
        }
 private void SetCurrentToken(Token token)
 {
     if (token == null) return;
     ClearAttributes();
     _termAtt.SetTermBuffer(token.TermBuffer(), 0, token.TermLength());
     _posIncrAtt.SetPositionIncrement(token.GetPositionIncrement());
     _flagsAtt.SetFlags(token.GetFlags());
     _offsetAtt.SetOffset(token.StartOffset(), token.EndOffset());
     _typeAtt.SetType(token.Type());
     _payloadAtt.SetPayload(token.GetPayload());
 }
예제 #30
0
 public char[] TermBuffer()
 {
     return(delegate_Renamed.TermBuffer());
 }
예제 #31
0
 public virtual void  TestGrow()
 {
     Token t = new Token();
     System.Text.StringBuilder buf = new System.Text.StringBuilder("ab");
     for (int i = 0; i < 20; i++)
     {
         char[] content = buf.ToString().ToCharArray();
         t.SetTermBuffer(content, 0, content.Length);
         Assert.AreEqual(buf.Length, t.TermLength());
         Assert.AreEqual(buf.ToString(), t.Term);
         buf.Append(buf.ToString());
     }
     Assert.AreEqual(1048576, t.TermLength());
     Assert.AreEqual(1179654, t.TermBuffer().Length);
     
     // now as a string, first variant
     t = new Token();
     buf = new System.Text.StringBuilder("ab");
     for (int i = 0; i < 20; i++)
     {
         System.String content = buf.ToString();
         t.SetTermBuffer(content, 0, content.Length);
         Assert.AreEqual(content.Length, t.TermLength());
         Assert.AreEqual(content, t.Term);
         buf.Append(content);
     }
     Assert.AreEqual(1048576, t.TermLength());
     Assert.AreEqual(1179654, t.TermBuffer().Length);
     
     // now as a string, second variant
     t = new Token();
     buf = new System.Text.StringBuilder("ab");
     for (int i = 0; i < 20; i++)
     {
         System.String content = buf.ToString();
         t.SetTermBuffer(content);
         Assert.AreEqual(content.Length, t.TermLength());
         Assert.AreEqual(content, t.Term);
         buf.Append(content);
     }
     Assert.AreEqual(1048576, t.TermLength());
     Assert.AreEqual(1179654, t.TermBuffer().Length);
     
     // Test for slow growth to a long term
     t = new Token();
     buf = new System.Text.StringBuilder("a");
     for (int i = 0; i < 20000; i++)
     {
         System.String content = buf.ToString();
         t.SetTermBuffer(content);
         Assert.AreEqual(content.Length, t.TermLength());
         Assert.AreEqual(content, t.Term);
         buf.Append("a");
     }
     Assert.AreEqual(20000, t.TermLength());
     Assert.AreEqual(20167, t.TermBuffer().Length);
     
     // Test for slow growth to a long term
     t = new Token();
     buf = new System.Text.StringBuilder("a");
     for (int i = 0; i < 20000; i++)
     {
         System.String content = buf.ToString();
         t.SetTermBuffer(content);
         Assert.AreEqual(content.Length, t.TermLength());
         Assert.AreEqual(content, t.Term);
         buf.Append("a");
     }
     Assert.AreEqual(20000, t.TermLength());
     Assert.AreEqual(20167, t.TermBuffer().Length);
 }
예제 #32
0
        public override Token Next(/* in */ Token reusableToken)
        {
            System.Diagnostics.Debug.Assert(reusableToken != null);
            reusableToken.Clear();
            int length = 0;
            int start  = bufferIndex;

            char[] buffer = reusableToken.TermBuffer();
            while (true)
            {
                if (bufferIndex >= dataLen)
                {
                    offset += dataLen;
                    dataLen = input is Lucene.Net.Index.ReusableStringReader ? ((Lucene.Net.Index.ReusableStringReader)input).Read(ioBuffer) : input.Read((System.Char[])ioBuffer, 0, ioBuffer.Length);
                    if (dataLen <= 0)
                    {
                        if (length > 0)
                        {
                            break;
                        }
                        else
                        {
                            return(null);
                        }
                    }
                    bufferIndex = 0;
                }

                char c = ioBuffer[bufferIndex++];

                if (IsTokenChar(c))
                {
                    // if it's a token char

                    if (length == 0)
                    {
                        // start of token
                        start = offset + bufferIndex - 1;
                    }
                    else if (length == buffer.Length)
                    {
                        buffer = reusableToken.ResizeTermBuffer(1 + length);
                    }

                    buffer[length++] = Normalize(c); // buffer it, normalized

                    if (length == MAX_WORD_LEN)
                    {
                        // buffer overflow!
                        break;
                    }
                }
                else if (length > 0)
                {
                    // at non-Letter w/ chars
                    break; // return 'em
                }
            }

            reusableToken.SetTermLength(length);
            reusableToken.SetStartOffset(start);
            reusableToken.SetEndOffset(start + length);
            return(reusableToken);
        }
예제 #33
0
        public override Token Next(Token token)
        {
            token.Clear();
            int length = 0;
            int start  = bufferIndex;

            char[] buffer = token.TermBuffer();
            while (true)
            {
                if (bufferIndex >= dataLen)
                {
                    offset += dataLen;
                    dataLen = input is Lucene.Net.Index.DocumentsWriter.ReusableStringReader ? ((Lucene.Net.Index.DocumentsWriter.ReusableStringReader)input).Read(ioBuffer) : input.Read((System.Char[])ioBuffer, 0, ioBuffer.Length);
                    if (dataLen <= 0)
                    {
                        if (length > 0)
                        {
                            break;
                        }
                        else
                        {
                            return(null);
                        }
                    }
                    bufferIndex = 0;
                }

                char c = ioBuffer[bufferIndex++];

                if (IsTokenChar(c))
                {
                    // if it's a token char

                    if (length == 0)
                    {
                        // start of token
                        start = offset + bufferIndex - 1;
                    }
                    else if (length == buffer.Length)
                    {
                        buffer = token.ResizeTermBuffer(1 + length);
                    }

                    buffer[length++] = Normalize(c); // buffer it, normalized

                    if (length == MAX_WORD_LEN)
                    {
                        // buffer overflow!
                        break;
                    }
                }
                else if (length > 0)
                {
                    // at non-Letter w/ chars
                    break; // return 'em
                }
            }

            token.termLength  = length;
            token.startOffset = start;
            token.endOffset   = start + length;
            return(token);
        }
예제 #34
0
 public virtual void  TestCopyTo()
 {
     Token t = new Token();
     Token copy = (Token) TestSimpleAttributeImpls.AssertCopyIsEqual(t);
     Assert.AreEqual("", t.Term);
     Assert.AreEqual("", copy.Term);
     
     t = new Token(0, 5);
     char[] content = "hello".ToCharArray();
     t.SetTermBuffer(content, 0, 5);
     char[] buf = t.TermBuffer();
     copy = (Token) TestSimpleAttributeImpls.AssertCopyIsEqual(t);
     Assert.AreEqual(t.Term, copy.Term);
     Assert.AreNotSame(buf, copy.TermBuffer());
     
     Payload pl = new Payload(new byte[]{1, 2, 3, 4});
     t.Payload = pl;
     copy = (Token) TestSimpleAttributeImpls.AssertCopyIsEqual(t);
     Assert.AreEqual(pl, copy.Payload);
     Assert.AreNotSame(pl, copy.Payload);
 }
예제 #35
0
 public virtual void  TestMixedStringArray()
 {
     Token t = new Token("hello", 0, 5);
     Assert.AreEqual(t.TermLength(), 5);
     Assert.AreEqual(t.Term, "hello");
     t.SetTermBuffer("hello2");
     Assert.AreEqual(t.TermLength(), 6);
     Assert.AreEqual(t.Term, "hello2");
     t.SetTermBuffer("hello3".ToCharArray(), 0, 6);
     Assert.AreEqual(t.Term, "hello3");
     
     char[] buffer = t.TermBuffer();
     buffer[1] = 'o';
     Assert.AreEqual(t.Term, "hollo3");
 }