IncrementToken() public method

Returns the next input Token whose term() is not a stop word.
public IncrementToken ( ) : bool
return bool
Beispiel #1
0
		public virtual void  TestIgnoreCase()
		{
			System.IO.StringReader reader = new System.IO.StringReader("Now is The Time");
			System.String[] stopWords = new System.String[]{"is", "the", "Time"};
			TokenStream stream = new StopFilter(false, new WhitespaceTokenizer(reader), stopWords, true);
			TermAttribute termAtt = (TermAttribute) stream.GetAttribute(typeof(TermAttribute));
			Assert.IsTrue(stream.IncrementToken());
			Assert.AreEqual("Now", termAtt.Term());
			Assert.IsFalse(stream.IncrementToken());
		}
        public virtual void  TestIgnoreCase()
        {
            System.IO.StringReader reader    = new System.IO.StringReader("Now is The Time");
            System.String[]        stopWords = new System.String[] { "is", "the", "Time" };
            TokenStream            stream    = new StopFilter(false, new WhitespaceTokenizer(reader), stopWords, true);
            TermAttribute          termAtt   = (TermAttribute)stream.GetAttribute(typeof(TermAttribute));

            Assert.IsTrue(stream.IncrementToken());
            Assert.AreEqual("Now", termAtt.Term());
            Assert.IsFalse(stream.IncrementToken());
        }
		public virtual void  TestIgnoreCase()
		{
			System.IO.StringReader reader = new System.IO.StringReader("Now is The Time");
            var stopWords = Support.Compatibility.SetFactory.GetSet<string>();
            stopWords.UnionWith(new[] {"is", "the", "Time"});

			TokenStream stream = new StopFilter(false, new WhitespaceTokenizer(reader), stopWords, true);
            ITermAttribute termAtt = stream.GetAttribute<ITermAttribute>();
			Assert.IsTrue(stream.IncrementToken());
			Assert.AreEqual("Now", termAtt.Term);
			Assert.IsFalse(stream.IncrementToken());
		}
		public virtual void  TestStopFilt()
		{
			System.IO.StringReader reader = new System.IO.StringReader("Now is The Time");
			System.String[] stopWords = new System.String[]{"is", "the", "Time"};
			var stopSet = StopFilter.MakeStopSet(stopWords);
			TokenStream stream = new StopFilter(false, new WhitespaceTokenizer(reader), stopSet);
            ITermAttribute termAtt = stream.GetAttribute<ITermAttribute>();
			Assert.IsTrue(stream.IncrementToken());
			Assert.AreEqual("Now", termAtt.Term);
			Assert.IsTrue(stream.IncrementToken());
			Assert.AreEqual("The", termAtt.Term);
			Assert.IsFalse(stream.IncrementToken());
		}
Beispiel #5
0
        public virtual void  TestStopFilt()
        {
            System.IO.StringReader reader    = new System.IO.StringReader("Now is The Time");
            System.String[]        stopWords = new System.String[] { "is", "the", "Time" };
            var            stopSet           = StopFilter.MakeStopSet(stopWords);
            TokenStream    stream            = new StopFilter(false, new WhitespaceTokenizer(reader), stopSet);
            ITermAttribute termAtt           = stream.GetAttribute <ITermAttribute>();

            Assert.IsTrue(stream.IncrementToken());
            Assert.AreEqual("Now", termAtt.Term);
            Assert.IsTrue(stream.IncrementToken());
            Assert.AreEqual("The", termAtt.Term);
            Assert.IsFalse(stream.IncrementToken());
        }
Beispiel #6
0
        public virtual void  TestIgnoreCase()
        {
            System.IO.StringReader reader = new System.IO.StringReader("Now is The Time");
            var stopWords = Support.Compatibility.SetFactory.CreateHashSet <string>();

            stopWords.UnionWith(new[] { "is", "the", "Time" });

            TokenStream    stream  = new StopFilter(false, new WhitespaceTokenizer(reader), stopWords, true);
            ITermAttribute termAtt = stream.GetAttribute <ITermAttribute>();

            Assert.IsTrue(stream.IncrementToken());
            Assert.AreEqual("Now", termAtt.Term);
            Assert.IsFalse(stream.IncrementToken());
        }
Beispiel #7
0
        private void  DoTestStopPositons(StopFilter stpf, bool enableIcrements)
        {
            Log("---> test with enable-increments-" + (enableIcrements?"enabled":"disabled"));
            stpf.EnablePositionIncrements = enableIcrements;
            ITermAttribute termAtt = stpf.GetAttribute <ITermAttribute>();
            IPositionIncrementAttribute posIncrAtt = stpf.GetAttribute <IPositionIncrementAttribute>();

            for (int i = 0; i < 20; i += 3)
            {
                Assert.IsTrue(stpf.IncrementToken());
                Log("Token " + i + ": " + stpf);
                System.String w = English.IntToEnglish(i).Trim();
                Assert.AreEqual(w, termAtt.Term, "expecting token " + i + " to be " + w);
                Assert.AreEqual(enableIcrements?(i == 0?1:3):1, posIncrAtt.PositionIncrement, "all but first token must have position increment of 3");
            }
            Assert.IsFalse(stpf.IncrementToken());
        }
 public virtual void  TestOverridesAny()
 {
     try
     {
         TokenStream stream = new WhitespaceTokenizer(new System.IO.StringReader(doc));
         stream = new AnonymousClassTokenFilter(this, stream);
         stream = new LowerCaseFilter(stream);
         stream = new StopFilter(stream, stopwords);
         while (stream.IncrementToken())
         {
             ;
         }
         Assert.Fail("One TokenFilter does not override any of the required methods, so it should fail.");
     }
     catch (System.NotSupportedException uoe)
     {
         Assert.IsTrue(uoe.Message.EndsWith("does not implement any of incrementToken(), next(Token), next()."));
     }
 }
		public virtual void  TestOverridesAny()
		{
			try
			{
				TokenStream stream = new WhitespaceTokenizer(new System.IO.StringReader(doc));
				stream = new AnonymousClassTokenFilter(this, stream);
				stream = new LowerCaseFilter(stream);
				stream = new StopFilter(stream, stopwords);
				while (stream.IncrementToken())
					;
				Assert.Fail("One TokenFilter does not override any of the required methods, so it should fail.");
			}
			catch (System.NotSupportedException uoe)
			{
				Assert.IsTrue(uoe.Message.EndsWith("does not implement any of incrementToken(), next(Token), next()."));
			}
		}
		public virtual void  TestOnlyNewAPI()
		{
			TokenStream.SetOnlyUseNewAPI(true);
			try
			{
				
				// this should fail with UOE
				try
				{
					TokenStream stream = new WhitespaceTokenizer(new System.IO.StringReader(doc));
					stream = new PartOfSpeechTaggingFilter(stream); // <-- this one is evil!
					stream = new LowerCaseFilter(stream);
					stream = new StopFilter(stream, stopwords);
					while (stream.IncrementToken())
						;
					Assert.Fail("If only the new API is allowed, this should fail with an UOE");
				}
				catch (System.NotSupportedException uoe)
				{
					Assert.IsTrue((typeof(PartOfSpeechTaggingFilter).FullName + " does not implement incrementToken() which is needed for onlyUseNewAPI.").Equals(uoe.Message));
				}
				
				// this should pass, as all core token streams support the new API
				TokenStream stream2 = new WhitespaceTokenizer(new System.IO.StringReader(doc));
				stream2 = new LowerCaseFilter(stream2);
				stream2 = new StopFilter(stream2, stopwords);
				while (stream2.IncrementToken())
					;
				
				// Test, if all attributes are implemented by their implementation, not Token/TokenWrapper
				Assert.IsTrue(stream2.AddAttribute(typeof(TermAttribute)) is TermAttributeImpl, "TermAttribute is implemented by TermAttributeImpl");
				Assert.IsTrue(stream2.AddAttribute(typeof(OffsetAttribute)) is OffsetAttributeImpl, "OffsetAttribute is implemented by OffsetAttributeImpl");
				Assert.IsTrue(stream2.AddAttribute(typeof(Lucene.Net.Analysis.Tokenattributes.FlagsAttribute)) is FlagsAttributeImpl, "FlagsAttribute is implemented by FlagsAttributeImpl");
				Assert.IsTrue(stream2.AddAttribute(typeof(PayloadAttribute)) is PayloadAttributeImpl, "PayloadAttribute is implemented by PayloadAttributeImpl");
				Assert.IsTrue(stream2.AddAttribute(typeof(PositionIncrementAttribute)) is PositionIncrementAttributeImpl, "PositionIncrementAttribute is implemented by PositionIncrementAttributeImpl");
				Assert.IsTrue(stream2.AddAttribute(typeof(TypeAttribute)) is TypeAttributeImpl, "TypeAttribute is implemented by TypeAttributeImpl");
				Assert.IsTrue(stream2.AddAttribute(typeof(SenselessAttribute)) is SenselessAttributeImpl, "SenselessAttribute is not implemented by SenselessAttributeImpl");
				
				// try to call old API, this should fail
				try
				{
					stream2.Reset();
					Token reusableToken = new Token();
					while ((reusableToken = stream2.Next(reusableToken)) != null)
						;
					Assert.Fail("If only the new API is allowed, this should fail with an UOE");
				}
				catch (System.NotSupportedException uoe)
				{
					Assert.IsTrue("This TokenStream only supports the new Attributes API.".Equals(uoe.Message));
				}
				try
				{
					stream2.Reset();
					while (stream2.Next() != null)
						;
					Assert.Fail("If only the new API is allowed, this should fail with an UOE");
				}
				catch (System.NotSupportedException uoe)
				{
					Assert.IsTrue("This TokenStream only supports the new Attributes API.".Equals(uoe.Message));
				}
				
				// Test if the wrapper API (onlyUseNewAPI==false) uses TokenWrapper
				// as attribute instance.
				// TokenWrapper encapsulates a Token instance that can be exchanged
				// by another Token instance without changing the AttributeImpl instance
				// itsself.
				TokenStream.SetOnlyUseNewAPI(false);
				stream2 = new WhitespaceTokenizer(new System.IO.StringReader(doc));
				Assert.IsTrue(stream2.AddAttribute(typeof(TermAttribute)) is TokenWrapper, "TermAttribute is implemented by TokenWrapper");
				Assert.IsTrue(stream2.AddAttribute(typeof(OffsetAttribute)) is TokenWrapper, "OffsetAttribute is implemented by TokenWrapper");
				Assert.IsTrue(stream2.AddAttribute(typeof(Lucene.Net.Analysis.Tokenattributes.FlagsAttribute)) is TokenWrapper, "FlagsAttribute is implemented by TokenWrapper");
				Assert.IsTrue(stream2.AddAttribute(typeof(PayloadAttribute)) is TokenWrapper, "PayloadAttribute is implemented by TokenWrapper");
				Assert.IsTrue(stream2.AddAttribute(typeof(PositionIncrementAttribute)) is TokenWrapper, "PositionIncrementAttribute is implemented by TokenWrapper");
				Assert.IsTrue(stream2.AddAttribute(typeof(TypeAttribute)) is TokenWrapper, "TypeAttribute is implemented by TokenWrapper");
				// This one is not implemented by TokenWrapper:
				Assert.IsTrue(stream2.AddAttribute(typeof(SenselessAttribute)) is SenselessAttributeImpl, "SenselessAttribute is not implemented by SenselessAttributeImpl");
			}
			finally
			{
				TokenStream.SetOnlyUseNewAPI(false);
			}
		}
        public virtual void  TestOnlyNewAPI()
        {
            TokenStream.SetOnlyUseNewAPI(true);
            try
            {
                // this should fail with UOE
                try
                {
                    TokenStream stream = new WhitespaceTokenizer(new System.IO.StringReader(doc));
                    stream = new PartOfSpeechTaggingFilter(stream);                     // <-- this one is evil!
                    stream = new LowerCaseFilter(stream);
                    stream = new StopFilter(stream, stopwords);
                    while (stream.IncrementToken())
                    {
                        ;
                    }
                    Assert.Fail("If only the new API is allowed, this should fail with an UOE");
                }
                catch (System.NotSupportedException uoe)
                {
                    Assert.IsTrue((typeof(PartOfSpeechTaggingFilter).FullName + " does not implement incrementToken() which is needed for onlyUseNewAPI.").Equals(uoe.Message));
                }

                // this should pass, as all core token streams support the new API
                TokenStream stream2 = new WhitespaceTokenizer(new System.IO.StringReader(doc));
                stream2 = new LowerCaseFilter(stream2);
                stream2 = new StopFilter(stream2, stopwords);
                while (stream2.IncrementToken())
                {
                    ;
                }

                // Test, if all attributes are implemented by their implementation, not Token/TokenWrapper
                Assert.IsTrue(stream2.AddAttribute(typeof(TermAttribute)) is TermAttributeImpl, "TermAttribute is implemented by TermAttributeImpl");
                Assert.IsTrue(stream2.AddAttribute(typeof(OffsetAttribute)) is OffsetAttributeImpl, "OffsetAttribute is implemented by OffsetAttributeImpl");
                Assert.IsTrue(stream2.AddAttribute(typeof(Lucene.Net.Analysis.Tokenattributes.FlagsAttribute)) is FlagsAttributeImpl, "FlagsAttribute is implemented by FlagsAttributeImpl");
                Assert.IsTrue(stream2.AddAttribute(typeof(PayloadAttribute)) is PayloadAttributeImpl, "PayloadAttribute is implemented by PayloadAttributeImpl");
                Assert.IsTrue(stream2.AddAttribute(typeof(PositionIncrementAttribute)) is PositionIncrementAttributeImpl, "PositionIncrementAttribute is implemented by PositionIncrementAttributeImpl");
                Assert.IsTrue(stream2.AddAttribute(typeof(TypeAttribute)) is TypeAttributeImpl, "TypeAttribute is implemented by TypeAttributeImpl");
                Assert.IsTrue(stream2.AddAttribute(typeof(SenselessAttribute)) is SenselessAttributeImpl, "SenselessAttribute is not implemented by SenselessAttributeImpl");

                // try to call old API, this should fail
                try
                {
                    stream2.Reset();
                    Token reusableToken = new Token();
                    while ((reusableToken = stream2.Next(reusableToken)) != null)
                    {
                        ;
                    }
                    Assert.Fail("If only the new API is allowed, this should fail with an UOE");
                }
                catch (System.NotSupportedException uoe)
                {
                    Assert.IsTrue("This TokenStream only supports the new Attributes API.".Equals(uoe.Message));
                }
                try
                {
                    stream2.Reset();
                    while (stream2.Next() != null)
                    {
                        ;
                    }
                    Assert.Fail("If only the new API is allowed, this should fail with an UOE");
                }
                catch (System.NotSupportedException uoe)
                {
                    Assert.IsTrue("This TokenStream only supports the new Attributes API.".Equals(uoe.Message));
                }

                // Test if the wrapper API (onlyUseNewAPI==false) uses TokenWrapper
                // as attribute instance.
                // TokenWrapper encapsulates a Token instance that can be exchanged
                // by another Token instance without changing the AttributeImpl instance
                // itsself.
                TokenStream.SetOnlyUseNewAPI(false);
                stream2 = new WhitespaceTokenizer(new System.IO.StringReader(doc));
                Assert.IsTrue(stream2.AddAttribute(typeof(TermAttribute)) is TokenWrapper, "TermAttribute is implemented by TokenWrapper");
                Assert.IsTrue(stream2.AddAttribute(typeof(OffsetAttribute)) is TokenWrapper, "OffsetAttribute is implemented by TokenWrapper");
                Assert.IsTrue(stream2.AddAttribute(typeof(Lucene.Net.Analysis.Tokenattributes.FlagsAttribute)) is TokenWrapper, "FlagsAttribute is implemented by TokenWrapper");
                Assert.IsTrue(stream2.AddAttribute(typeof(PayloadAttribute)) is TokenWrapper, "PayloadAttribute is implemented by TokenWrapper");
                Assert.IsTrue(stream2.AddAttribute(typeof(PositionIncrementAttribute)) is TokenWrapper, "PositionIncrementAttribute is implemented by TokenWrapper");
                Assert.IsTrue(stream2.AddAttribute(typeof(TypeAttribute)) is TokenWrapper, "TypeAttribute is implemented by TokenWrapper");
                // This one is not implemented by TokenWrapper:
                Assert.IsTrue(stream2.AddAttribute(typeof(SenselessAttribute)) is SenselessAttributeImpl, "SenselessAttribute is not implemented by SenselessAttributeImpl");
            }
            finally
            {
                TokenStream.SetOnlyUseNewAPI(false);
            }
        }
Beispiel #12
0
		private void  DoTestStopPositons(StopFilter stpf, bool enableIcrements)
		{
			Log("---> test with enable-increments-" + (enableIcrements?"enabled":"disabled"));
			stpf.EnablePositionIncrements = enableIcrements;
            ITermAttribute termAtt = stpf.GetAttribute<ITermAttribute>();
            IPositionIncrementAttribute posIncrAtt = stpf.GetAttribute<IPositionIncrementAttribute>();
			for (int i = 0; i < 20; i += 3)
			{
				Assert.IsTrue(stpf.IncrementToken());
				Log("Token " + i + ": " + stpf);
				System.String w = English.IntToEnglish(i).Trim();
				Assert.AreEqual(w, termAtt.Term, "expecting token " + i + " to be " + w);
				Assert.AreEqual(enableIcrements?(i == 0?1:3):1, posIncrAtt.PositionIncrement, "all but first token must have position increment of 3");
			}
			Assert.IsFalse(stpf.IncrementToken());
		}