public virtual void TestExactCase() { System.IO.StringReader reader = new System.IO.StringReader("Now is The Time"); System.String[] stopWords = new System.String[] { "is", "the", "Time" }; TokenStream stream = new StopFilter(false, new WhitespaceTokenizer(reader), stopWords); TermAttribute termAtt = (TermAttribute)stream.GetAttribute(typeof(TermAttribute)); Assert.IsTrue(stream.IncrementToken()); Assert.AreEqual("Now", termAtt.Term()); Assert.IsTrue(stream.IncrementToken()); Assert.AreEqual("The", termAtt.Term()); Assert.IsFalse(stream.IncrementToken()); }
public virtual void TestFilter() { TokenStream stream = new WhitespaceTokenizer(new System.IO.StringReader("short toolong evenmuchlongertext a ab toolong foo")); LengthFilter filter = new LengthFilter(stream, 2, 6); TermAttribute termAtt = (TermAttribute)filter.GetAttribute(typeof(TermAttribute)); Assert.IsTrue(filter.IncrementToken()); Assert.AreEqual("short", termAtt.Term()); Assert.IsTrue(filter.IncrementToken()); Assert.AreEqual("ab", termAtt.Term()); Assert.IsTrue(filter.IncrementToken()); Assert.AreEqual("foo", termAtt.Term()); Assert.IsFalse(filter.IncrementToken()); }
public virtual void TestStopListPositions() { bool defaultEnable = StopFilter.GetEnablePositionIncrementsDefault(); StopFilter.SetEnablePositionIncrementsDefault(true); try { System.Collections.Hashtable stopWordsSet = new System.Collections.Hashtable(); stopWordsSet.Add("good", "good"); stopWordsSet.Add("test", "test"); stopWordsSet.Add("analyzer", "analyzer"); StopAnalyzer newStop = new StopAnalyzer(stopWordsSet); System.IO.StringReader reader = new System.IO.StringReader("This is a good test of the english stop analyzer with positions"); int[] expectedIncr = new int[] { 1, 1, 1, 3, 1, 1, 1, 2, 1 }; TokenStream stream = newStop.TokenStream("test", reader); Assert.IsNotNull(stream); int i = 0; TermAttribute termAtt = (TermAttribute)stream.GetAttribute(typeof(TermAttribute)); PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute)stream.AddAttribute(typeof(PositionIncrementAttribute)); while (stream.IncrementToken()) { System.String text = termAtt.Term(); Assert.IsFalse(stopWordsSet.Contains(text)); Assert.AreEqual(expectedIncr[i++], posIncrAtt.GetPositionIncrement()); } } finally { StopFilter.SetEnablePositionIncrementsDefault(defaultEnable); } }
public QueryTermVector(System.String queryString, Analyzer analyzer) { if (analyzer != null) { TokenStream stream = analyzer.TokenStream("", new System.IO.StringReader(queryString)); if (stream != null) { List <string> terms = new List <string>(); try { bool hasMoreTokens = false; stream.Reset(); TermAttribute termAtt = (TermAttribute)stream.AddAttribute(typeof(TermAttribute)); hasMoreTokens = stream.IncrementToken(); while (hasMoreTokens) { terms.Add(termAtt.Term()); hasMoreTokens = stream.IncrementToken(); } ProcessTerms(terms.ToArray()); } catch (System.IO.IOException e) { } } } }
public virtual void TestPerField() { System.String text = "Qwerty"; PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer()); analyzer.AddAnalyzer("special", new SimpleAnalyzer()); TokenStream tokenStream = analyzer.TokenStream("field", new System.IO.StringReader(text)); TermAttribute termAtt = (TermAttribute)tokenStream.GetAttribute(typeof(TermAttribute)); Assert.IsTrue(tokenStream.IncrementToken()); Assert.AreEqual("Qwerty", termAtt.Term(), "WhitespaceAnalyzer does not lowercase"); tokenStream = analyzer.TokenStream("special", new System.IO.StringReader(text)); termAtt = (TermAttribute)tokenStream.GetAttribute(typeof(TermAttribute)); Assert.IsTrue(tokenStream.IncrementToken()); Assert.AreEqual("qwerty", termAtt.Term(), "SimpleAnalyzer lowercases"); }
public override bool IncrementToken() { while (input.IncrementToken()) { if (termAtt.Term().Equals("the")) { // stopword, do nothing } else if (termAtt.Term().Equals("quick")) { posIncrAtt.SetPositionIncrement(2); return(true); } else { posIncrAtt.SetPositionIncrement(1); return(true); } } return(false); }
public virtual void TestDefaults() { Assert.IsTrue(stop != null); System.IO.StringReader reader = new System.IO.StringReader("This is a test of the english stop analyzer"); TokenStream stream = stop.TokenStream("test", reader); Assert.IsTrue(stream != null); TermAttribute termAtt = (TermAttribute)stream.GetAttribute(typeof(TermAttribute)); while (stream.IncrementToken()) { Assert.IsFalse(inValidTokens.Contains(termAtt.Term())); } }
public virtual void TestIntStream() { NumericTokenStream stream = new NumericTokenStream().SetIntValue(ivalue); // use getAttribute to test if attributes really exist, if not an IAE will be throwed TermAttribute termAtt = (TermAttribute)stream.GetAttribute(typeof(TermAttribute)); TypeAttribute typeAtt = (TypeAttribute)stream.GetAttribute(typeof(TypeAttribute)); for (int shift = 0; shift < 32; shift += NumericUtils.PRECISION_STEP_DEFAULT) { Assert.IsTrue(stream.IncrementToken(), "New token is available"); Assert.AreEqual(NumericUtils.IntToPrefixCoded(ivalue, shift), termAtt.Term(), "Term is correctly encoded"); Assert.AreEqual((shift == 0)?NumericTokenStream.TOKEN_TYPE_FULL_PREC:NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.Type(), "Type correct"); } Assert.IsFalse(stream.IncrementToken(), "No more tokens available"); }
private void checkTokens(TokenStream stream) { int count = 0; TermAttribute termAtt = (TermAttribute)stream.GetAttribute(typeof(TermAttribute)); Assert.IsNotNull(termAtt); while (stream.IncrementToken()) { Assert.IsTrue(count < tokens.Length); Assert.AreEqual(tokens[count], termAtt.Term()); count++; } Assert.AreEqual(tokens.Length, count); }
private void DoTestStopPositons(StopFilter stpf, bool enableIcrements) { Log("---> test with enable-increments-" + (enableIcrements?"enabled":"disabled")); stpf.SetEnablePositionIncrements(enableIcrements); TermAttribute termAtt = (TermAttribute)stpf.GetAttribute(typeof(TermAttribute)); PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute)stpf.GetAttribute(typeof(PositionIncrementAttribute)); for (int i = 0; i < 20; i += 3) { Assert.IsTrue(stpf.IncrementToken()); Log("Token " + i + ": " + stpf); System.String w = English.IntToEnglish(i).Trim(); Assert.AreEqual(w, termAtt.Term(), "expecting token " + i + " to be " + w); Assert.AreEqual(enableIcrements?(i == 0?1:3):1, posIncrAtt.GetPositionIncrement(), "all but first token must have position increment of 3"); } Assert.IsFalse(stpf.IncrementToken()); }
public virtual void TestStopList() { System.Collections.Hashtable stopWordsSet = new System.Collections.Hashtable(); stopWordsSet.Add("good", "good"); stopWordsSet.Add("test", "test"); stopWordsSet.Add("analyzer", "analyzer"); StopAnalyzer newStop = new StopAnalyzer(stopWordsSet); System.IO.StringReader reader = new System.IO.StringReader("This is a good test of the english stop analyzer"); TokenStream stream = newStop.TokenStream("test", reader); Assert.IsNotNull(stream); TermAttribute termAtt = (TermAttribute)stream.GetAttribute(typeof(TermAttribute)); PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute)stream.AddAttribute(typeof(PositionIncrementAttribute)); while (stream.IncrementToken()) { System.String text = termAtt.Term(); Assert.IsFalse(stopWordsSet.Contains(text)); Assert.AreEqual(1, posIncrAtt.GetPositionIncrement()); // by default stop tokenizer does not apply increments. } }
public override bool IncrementToken() { if (Lucene.Net.QueryParsers.TestMultiAnalyzer.multiToken > 0) { termAtt.SetTermBuffer("multi" + (Lucene.Net.QueryParsers.TestMultiAnalyzer.multiToken + 1)); offsetAtt.SetOffset(prevStartOffset, prevEndOffset); typeAtt.SetType(prevType); posIncrAtt.SetPositionIncrement(0); Lucene.Net.QueryParsers.TestMultiAnalyzer.multiToken--; return(true); } else { bool next = input.IncrementToken(); if (next == false) { return(false); } prevType = typeAtt.Type(); prevStartOffset = offsetAtt.StartOffset(); prevEndOffset = offsetAtt.EndOffset(); System.String text = termAtt.Term(); if (text.Equals("triplemulti")) { Lucene.Net.QueryParsers.TestMultiAnalyzer.multiToken = 2; return(true); } else if (text.Equals("multi")) { Lucene.Net.QueryParsers.TestMultiAnalyzer.multiToken = 1; return(true); } else { return(true); } } }
public override bool Accept(AttributeSource a) { TermAttribute termAtt = (TermAttribute)a.GetAttribute(typeof(TermAttribute)); return(termAtt.Term().ToUpper().Equals("Dogs".ToUpper())); }
internal virtual void AssertTermEquals(System.String expected, TokenStream stream, TermAttribute termAtt) { Assert.IsTrue(stream.IncrementToken()); Assert.AreEqual(expected, termAtt.Term()); }
internal virtual void AssertTermEquals(System.String expected, TokenStream stream, TermAttribute termAtt) { Assert.IsTrue(stream.IncrementToken()); Assert.AreEqual(expected, termAtt.Term()); }