Beispiel #1
0
        public virtual void  TestMixedStringArray()
        {
            Token t = new Token("hello", 0, 5);

            Assert.AreEqual(t.TermLength(), 5);
            Assert.AreEqual(t.Term, "hello");
            t.SetTermBuffer("hello2");
            Assert.AreEqual(t.TermLength(), 6);
            Assert.AreEqual(t.Term, "hello2");
            t.SetTermBuffer("hello3".ToCharArray(), 0, 6);
            Assert.AreEqual(t.Term, "hello3");

            char[] buffer = t.TermBuffer();
            buffer[1] = 'o';
            Assert.AreEqual(t.Term, "hollo3");
        }
Beispiel #2
0
        public override Token Next(/* in */ Token reusableToken)
        {
            System.Diagnostics.Debug.Assert(reusableToken != null);
            Token nextToken = input.Next(reusableToken);

            if (nextToken != null)
            {
                char[] buffer = nextToken.TermBuffer();
                int    length = nextToken.TermLength();
                // If no characters actually require rewriting then we
                // just return token as-is:
                for (int i = 0; i < length; i++)
                {
                    char c = buffer[i];
                    if (c >= '\u00c0' && c <= '\ufb06')
                    {
                        RemoveAccents(buffer, length);
                        nextToken.SetTermBuffer(output, 0, outputPos);
                        break;
                    }
                }
                return(nextToken);
            }
            else
            {
                return(null);
            }
        }
 public override Token Next(Token result)
 {
     result = input.Next(result);
     if (result != null)
     {
         char[] buffer = result.TermBuffer();
         int    length = result.TermLength();
         // If no characters actually require rewriting then we
         // just return token as-is:
         for (int i = 0; i < length; i++)
         {
             char c = buffer[i];
             if (c >= '\u00c0' && c <= '\u0178')
             {
                 RemoveAccents(buffer, length);
                 result.SetTermBuffer(output, 0, outputPos);
                 break;
             }
         }
         return(result);
     }
     else
     {
         return(null);
     }
 }
Beispiel #4
0
		public virtual void  TestMixedStringArray()
		{
			Token t = new Token("hello", 0, 5);
			Assert.AreEqual(t.TermText(), "hello");
			Assert.AreEqual(t.TermLength(), 5);
			Assert.AreEqual(new System.String(t.TermBuffer(), 0, 5), "hello");
			t.SetTermText("hello2");
			Assert.AreEqual(t.TermLength(), 6);
			Assert.AreEqual(new System.String(t.TermBuffer(), 0, 6), "hello2");
			t.SetTermBuffer("hello3".ToCharArray(), 0, 6);
			Assert.AreEqual(t.TermText(), "hello3");
			
			// Make sure if we get the buffer and change a character
			// that termText() reflects the change
			char[] buffer = t.TermBuffer();
			buffer[1] = 'o';
			Assert.AreEqual(t.TermText(), "hollo3");
		}
Beispiel #5
0
        public virtual void  TestMixedStringArray()
        {
            Token t = new Token("hello", 0, 5);

            Assert.AreEqual(t.TermText(), "hello");
            Assert.AreEqual(t.TermLength(), 5);
            Assert.AreEqual(t.Term(), "hello");
            t.SetTermText("hello2");
            Assert.AreEqual(t.TermLength(), 6);
            Assert.AreEqual(t.Term(), "hello2");
            t.SetTermBuffer("hello3".ToCharArray(), 0, 6);
            Assert.AreEqual(t.TermText(), "hello3");

            // Make sure if we get the buffer and change a character
            // that termText() reflects the change
            char[] buffer = t.TermBuffer();
            buffer[1] = 'o';
            Assert.AreEqual(t.TermText(), "hollo3");
        }
 /// <summary> Returns the next input Token whose term() is the right len</summary>
 public override Token Next(/* in */ Token reusableToken)
 {
     System.Diagnostics.Debug.Assert(reusableToken != null);
     // return the first non-stop word found
     for (Token nextToken = input.Next(reusableToken); nextToken != null; nextToken = input.Next(reusableToken))
     {
         int len = nextToken.TermLength();
         if (len >= min && len <= max)
         {
             return(nextToken);
         }
         // note: else we ignore it but should we index each part of it?
     }
     // reached EOS -- return null
     return(null);
 }
Beispiel #7
0
        public override Token Next(/* in */ Token reusableToken)
        {
            System.Diagnostics.Debug.Assert(reusableToken != null);
            Token nextToken = input.Next(reusableToken);

            if (nextToken == null)
            {
                return(null);
            }

            if (stemmer.Stem(nextToken.TermBuffer(), 0, nextToken.TermLength()))
            {
                nextToken.SetTermBuffer(stemmer.GetResultBuffer(), 0, stemmer.GetResultLength());
            }

            return(nextToken);
        }
        public override Token Next(Token reusableToken)
        {
            Token nextToken = input.Next(reusableToken);

            if (nextToken != null)
            {
                char[] buffer = nextToken.TermBuffer();
                int    length = nextToken.TermLength();
                for (int i = 0; i < length; i++)
                {
                    buffer[i] = System.Char.ToLower(buffer[i]);
                }

                return(nextToken);
            }
            else
            {
                return(null);
            }
        }
        /// <summary> Returns the next input Token whose term() is not a stop word.</summary>
        public override Token Next(/* in */ Token reusableToken)
        {
            System.Diagnostics.Debug.Assert(reusableToken != null);
            // return the first non-stop word found
            int skippedPositions = 0;

            for (Token nextToken = input.Next(reusableToken); nextToken != null; nextToken = input.Next(reusableToken))
            {
                if (!stopWords.Contains(nextToken.TermBuffer(), 0, nextToken.TermLength()))
                {
                    if (enablePositionIncrements)
                    {
                        nextToken.SetPositionIncrement(nextToken.GetPositionIncrement() + skippedPositions);
                    }
                    return(nextToken);
                }
                skippedPositions += nextToken.GetPositionIncrement();
            }
            // reached EOS -- return null
            return(null);
        }
        public override Token Next(Token result)
		{
            result = input.Next(result);
            if (result != null)
            {
                char[] buffer = result.TermBuffer();
                int length = result.TermLength();
                // If no characters actually require rewriting then we
                // just return token as-is:
                for (int i = 0; i < length; i++)
                {
                    char c = buffer[i];
                    if (c >= '\u00c0' && c <= '\u0178')
                    {
                        RemoveAccents(buffer, length);
                        result.SetTermBuffer(output, 0, outputPos);
                        break;
                    }
                }
                return result;
            }
            else
                return null;
        }
Beispiel #11
0
        public virtual void TestGrow()
        {
            Token t = new Token();

            System.Text.StringBuilder buf = new System.Text.StringBuilder("ab");
            for (int i = 0; i < 20; i++)
            {
                char[] content = buf.ToString().ToCharArray();
                t.SetTermBuffer(content, 0, content.Length);
                Assert.AreEqual(buf.Length, t.TermLength());
                Assert.AreEqual(buf.ToString(), t.Term);
                buf.Append(buf.ToString());
            }
            Assert.AreEqual(1048576, t.TermLength());
            Assert.AreEqual(1048576, t.TermBuffer().Length);

            // now as a string, first variant
            t   = new Token();
            buf = new System.Text.StringBuilder("ab");
            for (int i = 0; i < 20; i++)
            {
                System.String content = buf.ToString();
                t.SetTermBuffer(content, 0, content.Length);
                Assert.AreEqual(content.Length, t.TermLength());
                Assert.AreEqual(content, t.Term);
                buf.Append(content);
            }
            Assert.AreEqual(1048576, t.TermLength());
            Assert.AreEqual(1048576, t.TermBuffer().Length);

            // now as a string, second variant
            t   = new Token();
            buf = new System.Text.StringBuilder("ab");
            for (int i = 0; i < 20; i++)
            {
                System.String content = buf.ToString();
                t.SetTermBuffer(content);
                Assert.AreEqual(content.Length, t.TermLength());
                Assert.AreEqual(content, t.Term);
                buf.Append(content);
            }
            Assert.AreEqual(1048576, t.TermLength());
            Assert.AreEqual(1048576, t.TermBuffer().Length);

            // Test for slow growth to a long term
            t   = new Token();
            buf = new System.Text.StringBuilder("a");
            for (int i = 0; i < 20000; i++)
            {
                System.String content = buf.ToString();
                t.SetTermBuffer(content);
                Assert.AreEqual(content.Length, t.TermLength());
                Assert.AreEqual(content, t.Term);
                buf.Append("a");
            }
            Assert.AreEqual(20000, t.TermLength());
            Assert.AreEqual(32768, t.TermBuffer().Length);

            // Test for slow growth to a long term
            t   = new Token();
            buf = new System.Text.StringBuilder("a");
            for (int i = 0; i < 20000; i++)
            {
                System.String content = buf.ToString();
                t.SetTermBuffer(content);
                Assert.AreEqual(content.Length, t.TermLength());
                Assert.AreEqual(content, t.Term);
                buf.Append("a");
            }
            Assert.AreEqual(20000, t.TermLength());
            Assert.AreEqual(32768, t.TermBuffer().Length);
        }
Beispiel #12
0
 public int TermLength()
 {
     return(delegate_Renamed.TermLength());
 }
Beispiel #13
0
 public virtual void  TestGrow()
 {
     Token t = new Token();
     System.Text.StringBuilder buf = new System.Text.StringBuilder("ab");
     for (int i = 0; i < 20; i++)
     {
         char[] content = buf.ToString().ToCharArray();
         t.SetTermBuffer(content, 0, content.Length);
         Assert.AreEqual(buf.Length, t.TermLength());
         Assert.AreEqual(buf.ToString(), t.Term);
         buf.Append(buf.ToString());
     }
     Assert.AreEqual(1048576, t.TermLength());
     Assert.AreEqual(1179654, t.TermBuffer().Length);
     
     // now as a string, first variant
     t = new Token();
     buf = new System.Text.StringBuilder("ab");
     for (int i = 0; i < 20; i++)
     {
         System.String content = buf.ToString();
         t.SetTermBuffer(content, 0, content.Length);
         Assert.AreEqual(content.Length, t.TermLength());
         Assert.AreEqual(content, t.Term);
         buf.Append(content);
     }
     Assert.AreEqual(1048576, t.TermLength());
     Assert.AreEqual(1179654, t.TermBuffer().Length);
     
     // now as a string, second variant
     t = new Token();
     buf = new System.Text.StringBuilder("ab");
     for (int i = 0; i < 20; i++)
     {
         System.String content = buf.ToString();
         t.SetTermBuffer(content);
         Assert.AreEqual(content.Length, t.TermLength());
         Assert.AreEqual(content, t.Term);
         buf.Append(content);
     }
     Assert.AreEqual(1048576, t.TermLength());
     Assert.AreEqual(1179654, t.TermBuffer().Length);
     
     // Test for slow growth to a long term
     t = new Token();
     buf = new System.Text.StringBuilder("a");
     for (int i = 0; i < 20000; i++)
     {
         System.String content = buf.ToString();
         t.SetTermBuffer(content);
         Assert.AreEqual(content.Length, t.TermLength());
         Assert.AreEqual(content, t.Term);
         buf.Append("a");
     }
     Assert.AreEqual(20000, t.TermLength());
     Assert.AreEqual(20167, t.TermBuffer().Length);
     
     // Test for slow growth to a long term
     t = new Token();
     buf = new System.Text.StringBuilder("a");
     for (int i = 0; i < 20000; i++)
     {
         System.String content = buf.ToString();
         t.SetTermBuffer(content);
         Assert.AreEqual(content.Length, t.TermLength());
         Assert.AreEqual(content, t.Term);
         buf.Append("a");
     }
     Assert.AreEqual(20000, t.TermLength());
     Assert.AreEqual(20167, t.TermBuffer().Length);
 }
Beispiel #14
0
 public virtual void  TestMixedStringArray()
 {
     Token t = new Token("hello", 0, 5);
     Assert.AreEqual(t.TermLength(), 5);
     Assert.AreEqual(t.Term, "hello");
     t.SetTermBuffer("hello2");
     Assert.AreEqual(t.TermLength(), 6);
     Assert.AreEqual(t.Term, "hello2");
     t.SetTermBuffer("hello3".ToCharArray(), 0, 6);
     Assert.AreEqual(t.Term, "hello3");
     
     char[] buffer = t.TermBuffer();
     buffer[1] = 'o';
     Assert.AreEqual(t.Term, "hollo3");
 }
 private void SetCurrentToken(Token token)
 {
     if (token == null) return;
     ClearAttributes();
     _termAtt.SetTermBuffer(token.TermBuffer(), 0, token.TermLength());
     _posIncrAtt.SetPositionIncrement(token.GetPositionIncrement());
     _flagsAtt.SetFlags(token.GetFlags());
     _offsetAtt.SetOffset(token.StartOffset(), token.EndOffset());
     _typeAtt.SetType(token.Type());
     _payloadAtt.SetPayload(token.GetPayload());
 }