public virtual void TestIgnoreCase() { System.IO.StringReader reader = new System.IO.StringReader("Now is The Time"); System.String[] stopWords = new System.String[]{"is", "the", "Time"}; TokenStream stream = new StopFilter(false, new WhitespaceTokenizer(reader), stopWords, true); TermAttribute termAtt = (TermAttribute) stream.GetAttribute(typeof(TermAttribute)); Assert.IsTrue(stream.IncrementToken()); Assert.AreEqual("Now", termAtt.Term()); Assert.IsFalse(stream.IncrementToken()); }
public virtual void TestIgnoreCase() { System.IO.StringReader reader = new System.IO.StringReader("Now is The Time"); System.String[] stopWords = new System.String[] { "is", "the", "Time" }; TokenStream stream = new StopFilter(false, new WhitespaceTokenizer(reader), stopWords, true); TermAttribute termAtt = (TermAttribute)stream.GetAttribute(typeof(TermAttribute)); Assert.IsTrue(stream.IncrementToken()); Assert.AreEqual("Now", termAtt.Term()); Assert.IsFalse(stream.IncrementToken()); }
public virtual void TestIgnoreCase() { System.IO.StringReader reader = new System.IO.StringReader("Now is The Time"); var stopWords = Support.Compatibility.SetFactory.GetSet<string>(); stopWords.UnionWith(new[] {"is", "the", "Time"}); TokenStream stream = new StopFilter(false, new WhitespaceTokenizer(reader), stopWords, true); ITermAttribute termAtt = stream.GetAttribute<ITermAttribute>(); Assert.IsTrue(stream.IncrementToken()); Assert.AreEqual("Now", termAtt.Term); Assert.IsFalse(stream.IncrementToken()); }
public virtual void TestStopFilt() { System.IO.StringReader reader = new System.IO.StringReader("Now is The Time"); System.String[] stopWords = new System.String[]{"is", "the", "Time"}; var stopSet = StopFilter.MakeStopSet(stopWords); TokenStream stream = new StopFilter(false, new WhitespaceTokenizer(reader), stopSet); ITermAttribute termAtt = stream.GetAttribute<ITermAttribute>(); Assert.IsTrue(stream.IncrementToken()); Assert.AreEqual("Now", termAtt.Term); Assert.IsTrue(stream.IncrementToken()); Assert.AreEqual("The", termAtt.Term); Assert.IsFalse(stream.IncrementToken()); }
public virtual void TestStopFilt() { System.IO.StringReader reader = new System.IO.StringReader("Now is The Time"); System.String[] stopWords = new System.String[] { "is", "the", "Time" }; var stopSet = StopFilter.MakeStopSet(stopWords); TokenStream stream = new StopFilter(false, new WhitespaceTokenizer(reader), stopSet); ITermAttribute termAtt = stream.GetAttribute <ITermAttribute>(); Assert.IsTrue(stream.IncrementToken()); Assert.AreEqual("Now", termAtt.Term); Assert.IsTrue(stream.IncrementToken()); Assert.AreEqual("The", termAtt.Term); Assert.IsFalse(stream.IncrementToken()); }
public virtual void TestIgnoreCase() { System.IO.StringReader reader = new System.IO.StringReader("Now is The Time"); var stopWords = Support.Compatibility.SetFactory.CreateHashSet <string>(); stopWords.UnionWith(new[] { "is", "the", "Time" }); TokenStream stream = new StopFilter(false, new WhitespaceTokenizer(reader), stopWords, true); ITermAttribute termAtt = stream.GetAttribute <ITermAttribute>(); Assert.IsTrue(stream.IncrementToken()); Assert.AreEqual("Now", termAtt.Term); Assert.IsFalse(stream.IncrementToken()); }
private void DoTestStopPositons(StopFilter stpf, bool enableIcrements) { Log("---> test with enable-increments-" + (enableIcrements?"enabled":"disabled")); stpf.EnablePositionIncrements = enableIcrements; ITermAttribute termAtt = stpf.GetAttribute <ITermAttribute>(); IPositionIncrementAttribute posIncrAtt = stpf.GetAttribute <IPositionIncrementAttribute>(); for (int i = 0; i < 20; i += 3) { Assert.IsTrue(stpf.IncrementToken()); Log("Token " + i + ": " + stpf); System.String w = English.IntToEnglish(i).Trim(); Assert.AreEqual(w, termAtt.Term, "expecting token " + i + " to be " + w); Assert.AreEqual(enableIcrements?(i == 0?1:3):1, posIncrAtt.PositionIncrement, "all but first token must have position increment of 3"); } Assert.IsFalse(stpf.IncrementToken()); }
public virtual void TestOverridesAny() { try { TokenStream stream = new WhitespaceTokenizer(new System.IO.StringReader(doc)); stream = new AnonymousClassTokenFilter(this, stream); stream = new LowerCaseFilter(stream); stream = new StopFilter(stream, stopwords); while (stream.IncrementToken()) { ; } Assert.Fail("One TokenFilter does not override any of the required methods, so it should fail."); } catch (System.NotSupportedException uoe) { Assert.IsTrue(uoe.Message.EndsWith("does not implement any of incrementToken(), next(Token), next().")); } }
public virtual void TestOverridesAny() { try { TokenStream stream = new WhitespaceTokenizer(new System.IO.StringReader(doc)); stream = new AnonymousClassTokenFilter(this, stream); stream = new LowerCaseFilter(stream); stream = new StopFilter(stream, stopwords); while (stream.IncrementToken()) ; Assert.Fail("One TokenFilter does not override any of the required methods, so it should fail."); } catch (System.NotSupportedException uoe) { Assert.IsTrue(uoe.Message.EndsWith("does not implement any of incrementToken(), next(Token), next().")); } }
public virtual void TestOnlyNewAPI() { TokenStream.SetOnlyUseNewAPI(true); try { // this should fail with UOE try { TokenStream stream = new WhitespaceTokenizer(new System.IO.StringReader(doc)); stream = new PartOfSpeechTaggingFilter(stream); // <-- this one is evil! stream = new LowerCaseFilter(stream); stream = new StopFilter(stream, stopwords); while (stream.IncrementToken()) ; Assert.Fail("If only the new API is allowed, this should fail with an UOE"); } catch (System.NotSupportedException uoe) { Assert.IsTrue((typeof(PartOfSpeechTaggingFilter).FullName + " does not implement incrementToken() which is needed for onlyUseNewAPI.").Equals(uoe.Message)); } // this should pass, as all core token streams support the new API TokenStream stream2 = new WhitespaceTokenizer(new System.IO.StringReader(doc)); stream2 = new LowerCaseFilter(stream2); stream2 = new StopFilter(stream2, stopwords); while (stream2.IncrementToken()) ; // Test, if all attributes are implemented by their implementation, not Token/TokenWrapper Assert.IsTrue(stream2.AddAttribute(typeof(TermAttribute)) is TermAttributeImpl, "TermAttribute is implemented by TermAttributeImpl"); Assert.IsTrue(stream2.AddAttribute(typeof(OffsetAttribute)) is OffsetAttributeImpl, "OffsetAttribute is implemented by OffsetAttributeImpl"); Assert.IsTrue(stream2.AddAttribute(typeof(Lucene.Net.Analysis.Tokenattributes.FlagsAttribute)) is FlagsAttributeImpl, "FlagsAttribute is implemented by FlagsAttributeImpl"); Assert.IsTrue(stream2.AddAttribute(typeof(PayloadAttribute)) is PayloadAttributeImpl, "PayloadAttribute is implemented by PayloadAttributeImpl"); Assert.IsTrue(stream2.AddAttribute(typeof(PositionIncrementAttribute)) is PositionIncrementAttributeImpl, "PositionIncrementAttribute is implemented by PositionIncrementAttributeImpl"); Assert.IsTrue(stream2.AddAttribute(typeof(TypeAttribute)) is TypeAttributeImpl, "TypeAttribute is implemented by TypeAttributeImpl"); Assert.IsTrue(stream2.AddAttribute(typeof(SenselessAttribute)) is SenselessAttributeImpl, "SenselessAttribute is not implemented by SenselessAttributeImpl"); // try to call old API, this should fail try { stream2.Reset(); Token reusableToken = new Token(); while ((reusableToken = stream2.Next(reusableToken)) != null) ; Assert.Fail("If only the new API is allowed, this should fail with an UOE"); } catch (System.NotSupportedException uoe) { Assert.IsTrue("This TokenStream only supports the new Attributes API.".Equals(uoe.Message)); } try { stream2.Reset(); while (stream2.Next() != null) ; Assert.Fail("If only the new API is allowed, this should fail with an UOE"); } catch (System.NotSupportedException uoe) { Assert.IsTrue("This TokenStream only supports the new Attributes API.".Equals(uoe.Message)); } // Test if the wrapper API (onlyUseNewAPI==false) uses TokenWrapper // as attribute instance. // TokenWrapper encapsulates a Token instance that can be exchanged // by another Token instance without changing the AttributeImpl instance // itsself. TokenStream.SetOnlyUseNewAPI(false); stream2 = new WhitespaceTokenizer(new System.IO.StringReader(doc)); Assert.IsTrue(stream2.AddAttribute(typeof(TermAttribute)) is TokenWrapper, "TermAttribute is implemented by TokenWrapper"); Assert.IsTrue(stream2.AddAttribute(typeof(OffsetAttribute)) is TokenWrapper, "OffsetAttribute is implemented by TokenWrapper"); Assert.IsTrue(stream2.AddAttribute(typeof(Lucene.Net.Analysis.Tokenattributes.FlagsAttribute)) is TokenWrapper, "FlagsAttribute is implemented by TokenWrapper"); Assert.IsTrue(stream2.AddAttribute(typeof(PayloadAttribute)) is TokenWrapper, "PayloadAttribute is implemented by TokenWrapper"); Assert.IsTrue(stream2.AddAttribute(typeof(PositionIncrementAttribute)) is TokenWrapper, "PositionIncrementAttribute is implemented by TokenWrapper"); Assert.IsTrue(stream2.AddAttribute(typeof(TypeAttribute)) is TokenWrapper, "TypeAttribute is implemented by TokenWrapper"); // This one is not implemented by TokenWrapper: Assert.IsTrue(stream2.AddAttribute(typeof(SenselessAttribute)) is SenselessAttributeImpl, "SenselessAttribute is not implemented by SenselessAttributeImpl"); } finally { TokenStream.SetOnlyUseNewAPI(false); } }
public virtual void TestOnlyNewAPI() { TokenStream.SetOnlyUseNewAPI(true); try { // this should fail with UOE try { TokenStream stream = new WhitespaceTokenizer(new System.IO.StringReader(doc)); stream = new PartOfSpeechTaggingFilter(stream); // <-- this one is evil! stream = new LowerCaseFilter(stream); stream = new StopFilter(stream, stopwords); while (stream.IncrementToken()) { ; } Assert.Fail("If only the new API is allowed, this should fail with an UOE"); } catch (System.NotSupportedException uoe) { Assert.IsTrue((typeof(PartOfSpeechTaggingFilter).FullName + " does not implement incrementToken() which is needed for onlyUseNewAPI.").Equals(uoe.Message)); } // this should pass, as all core token streams support the new API TokenStream stream2 = new WhitespaceTokenizer(new System.IO.StringReader(doc)); stream2 = new LowerCaseFilter(stream2); stream2 = new StopFilter(stream2, stopwords); while (stream2.IncrementToken()) { ; } // Test, if all attributes are implemented by their implementation, not Token/TokenWrapper Assert.IsTrue(stream2.AddAttribute(typeof(TermAttribute)) is TermAttributeImpl, "TermAttribute is implemented by TermAttributeImpl"); Assert.IsTrue(stream2.AddAttribute(typeof(OffsetAttribute)) is OffsetAttributeImpl, "OffsetAttribute is implemented by OffsetAttributeImpl"); Assert.IsTrue(stream2.AddAttribute(typeof(Lucene.Net.Analysis.Tokenattributes.FlagsAttribute)) is FlagsAttributeImpl, "FlagsAttribute is implemented by FlagsAttributeImpl"); Assert.IsTrue(stream2.AddAttribute(typeof(PayloadAttribute)) is PayloadAttributeImpl, "PayloadAttribute is implemented by PayloadAttributeImpl"); Assert.IsTrue(stream2.AddAttribute(typeof(PositionIncrementAttribute)) is PositionIncrementAttributeImpl, "PositionIncrementAttribute is implemented by PositionIncrementAttributeImpl"); Assert.IsTrue(stream2.AddAttribute(typeof(TypeAttribute)) is TypeAttributeImpl, "TypeAttribute is implemented by TypeAttributeImpl"); Assert.IsTrue(stream2.AddAttribute(typeof(SenselessAttribute)) is SenselessAttributeImpl, "SenselessAttribute is not implemented by SenselessAttributeImpl"); // try to call old API, this should fail try { stream2.Reset(); Token reusableToken = new Token(); while ((reusableToken = stream2.Next(reusableToken)) != null) { ; } Assert.Fail("If only the new API is allowed, this should fail with an UOE"); } catch (System.NotSupportedException uoe) { Assert.IsTrue("This TokenStream only supports the new Attributes API.".Equals(uoe.Message)); } try { stream2.Reset(); while (stream2.Next() != null) { ; } Assert.Fail("If only the new API is allowed, this should fail with an UOE"); } catch (System.NotSupportedException uoe) { Assert.IsTrue("This TokenStream only supports the new Attributes API.".Equals(uoe.Message)); } // Test if the wrapper API (onlyUseNewAPI==false) uses TokenWrapper // as attribute instance. // TokenWrapper encapsulates a Token instance that can be exchanged // by another Token instance without changing the AttributeImpl instance // itsself. TokenStream.SetOnlyUseNewAPI(false); stream2 = new WhitespaceTokenizer(new System.IO.StringReader(doc)); Assert.IsTrue(stream2.AddAttribute(typeof(TermAttribute)) is TokenWrapper, "TermAttribute is implemented by TokenWrapper"); Assert.IsTrue(stream2.AddAttribute(typeof(OffsetAttribute)) is TokenWrapper, "OffsetAttribute is implemented by TokenWrapper"); Assert.IsTrue(stream2.AddAttribute(typeof(Lucene.Net.Analysis.Tokenattributes.FlagsAttribute)) is TokenWrapper, "FlagsAttribute is implemented by TokenWrapper"); Assert.IsTrue(stream2.AddAttribute(typeof(PayloadAttribute)) is TokenWrapper, "PayloadAttribute is implemented by TokenWrapper"); Assert.IsTrue(stream2.AddAttribute(typeof(PositionIncrementAttribute)) is TokenWrapper, "PositionIncrementAttribute is implemented by TokenWrapper"); Assert.IsTrue(stream2.AddAttribute(typeof(TypeAttribute)) is TokenWrapper, "TypeAttribute is implemented by TokenWrapper"); // This one is not implemented by TokenWrapper: Assert.IsTrue(stream2.AddAttribute(typeof(SenselessAttribute)) is SenselessAttributeImpl, "SenselessAttribute is not implemented by SenselessAttributeImpl"); } finally { TokenStream.SetOnlyUseNewAPI(false); } }
private void DoTestStopPositons(StopFilter stpf, bool enableIcrements) { Log("---> test with enable-increments-" + (enableIcrements?"enabled":"disabled")); stpf.EnablePositionIncrements = enableIcrements; ITermAttribute termAtt = stpf.GetAttribute<ITermAttribute>(); IPositionIncrementAttribute posIncrAtt = stpf.GetAttribute<IPositionIncrementAttribute>(); for (int i = 0; i < 20; i += 3) { Assert.IsTrue(stpf.IncrementToken()); Log("Token " + i + ": " + stpf); System.String w = English.IntToEnglish(i).Trim(); Assert.AreEqual(w, termAtt.Term, "expecting token " + i + " to be " + w); Assert.AreEqual(enableIcrements?(i == 0?1:3):1, posIncrAtt.PositionIncrement, "all but first token must have position increment of 3"); } Assert.IsFalse(stpf.IncrementToken()); }