public virtual void TestStopFilt() { System.IO.StringReader reader = new System.IO.StringReader("Now is The Time"); System.String[] stopWords = new System.String[] { "is", "the", "Time" }; var stopSet = StopFilter.MakeStopSet(stopWords); TokenStream stream = new StopFilter(false, new WhitespaceTokenizer(reader), stopSet); ITermAttribute termAtt = stream.GetAttribute <ITermAttribute>(); Assert.IsTrue(stream.IncrementToken()); Assert.AreEqual("Now", termAtt.Term); Assert.IsTrue(stream.IncrementToken()); Assert.AreEqual("The", termAtt.Term); Assert.IsFalse(stream.IncrementToken()); }
/// <summary>Builds an analyzer which removes words in the provided array. </summary> public StopAnalyzer(System.String[] stopWords) { this.stopWords = StopFilter.MakeStopSet(stopWords); }
/// <summary>Builds an analyzer which removes words in ENGLISH_STOP_WORDS. </summary> public StopAnalyzer() { stopWords = StopFilter.MakeStopSet(ENGLISH_STOP_WORDS); }
public virtual void TestStopPositons() { System.Text.StringBuilder sb = new System.Text.StringBuilder(); System.Collections.Generic.List <string> a = new System.Collections.Generic.List <string>(); for (int i = 0; i < 20; i++) { System.String w = English.IntToEnglish(i).Trim(); sb.Append(w).Append(" "); if (i % 3 != 0) { a.Add(w); } } Log(sb.ToString()); System.String[] stopWords = (System.String[])a.ToArray(); for (int i = 0; i < a.Count; i++) { Log("Stop: " + stopWords[i]); } var stopSet = StopFilter.MakeStopSet(stopWords); // with increments System.IO.StringReader reader = new System.IO.StringReader(sb.ToString()); StopFilter stpf = new StopFilter(false, new WhitespaceTokenizer(reader), stopSet); DoTestStopPositons(stpf, true); // without increments reader = new System.IO.StringReader(sb.ToString()); stpf = new StopFilter(false, new WhitespaceTokenizer(reader), stopSet); DoTestStopPositons(stpf, false); // with increments, concatenating two stop filters System.Collections.Generic.List <System.String> a0 = new System.Collections.Generic.List <System.String>(); System.Collections.Generic.List <System.String> a1 = new System.Collections.Generic.List <System.String>(); for (int i = 0; i < a.Count; i++) { if (i % 2 == 0) { a0.Add(a[i]); } else { a1.Add(a[i]); } } System.String[] stopWords0 = (System.String[])a0.ToArray(); for (int i = 0; i < a0.Count; i++) { Log("Stop0: " + stopWords0[i]); } System.String[] stopWords1 = (System.String[])a1.ToArray(); for (int i = 0; i < a1.Count; i++) { Log("Stop1: " + stopWords1[i]); } var stopSet0 = StopFilter.MakeStopSet(stopWords0); var stopSet1 = StopFilter.MakeStopSet(stopWords1); reader = new System.IO.StringReader(sb.ToString()); StopFilter stpf0 = new StopFilter(false, new WhitespaceTokenizer(reader), stopSet0); // first part of the set stpf0.EnablePositionIncrements = true; StopFilter stpf01 = new StopFilter(false, stpf0, stopSet1); // two stop filters concatenated! DoTestStopPositons(stpf01, true); }
public StopAnalyzer(System.String[] stopWords, bool enablePositionIncrements) { this.stopWords = StopFilter.MakeStopSet(stopWords); this.enablePositionIncrements = enablePositionIncrements; useDefaultStopPositionIncrement = false; }