MakeStopSet() public static method

Builds a Set from an array of stop words, appropriate for passing into the StopFilter constructor. This permits this stopWords construction to be cached once when an Analyzer is constructed.
public static MakeStopSet ( ) : ISet
return ISet
Exemplo n.º 1
0
        public virtual void  TestStopFilt()
        {
            System.IO.StringReader reader    = new System.IO.StringReader("Now is The Time");
            System.String[]        stopWords = new System.String[] { "is", "the", "Time" };
            var            stopSet           = StopFilter.MakeStopSet(stopWords);
            TokenStream    stream            = new StopFilter(false, new WhitespaceTokenizer(reader), stopSet);
            ITermAttribute termAtt           = stream.GetAttribute <ITermAttribute>();

            Assert.IsTrue(stream.IncrementToken());
            Assert.AreEqual("Now", termAtt.Term);
            Assert.IsTrue(stream.IncrementToken());
            Assert.AreEqual("The", termAtt.Term);
            Assert.IsFalse(stream.IncrementToken());
        }
Exemplo n.º 2
0
 /// <summary>Builds an analyzer which removes words in the provided array. </summary>
 public StopAnalyzer(System.String[] stopWords)
 {
     this.stopWords = StopFilter.MakeStopSet(stopWords);
 }
Exemplo n.º 3
0
 /// <summary>Builds an analyzer which removes words in ENGLISH_STOP_WORDS. </summary>
 public StopAnalyzer()
 {
     stopWords = StopFilter.MakeStopSet(ENGLISH_STOP_WORDS);
 }
Exemplo n.º 4
0
        public virtual void  TestStopPositons()
        {
            System.Text.StringBuilder sb = new System.Text.StringBuilder();
            System.Collections.Generic.List <string> a = new System.Collections.Generic.List <string>();
            for (int i = 0; i < 20; i++)
            {
                System.String w = English.IntToEnglish(i).Trim();
                sb.Append(w).Append(" ");
                if (i % 3 != 0)
                {
                    a.Add(w);
                }
            }
            Log(sb.ToString());
            System.String[] stopWords = (System.String[])a.ToArray();
            for (int i = 0; i < a.Count; i++)
            {
                Log("Stop: " + stopWords[i]);
            }
            var stopSet = StopFilter.MakeStopSet(stopWords);

            // with increments
            System.IO.StringReader reader = new System.IO.StringReader(sb.ToString());
            StopFilter             stpf   = new StopFilter(false, new WhitespaceTokenizer(reader), stopSet);

            DoTestStopPositons(stpf, true);
            // without increments
            reader = new System.IO.StringReader(sb.ToString());
            stpf   = new StopFilter(false, new WhitespaceTokenizer(reader), stopSet);
            DoTestStopPositons(stpf, false);
            // with increments, concatenating two stop filters
            System.Collections.Generic.List <System.String> a0 = new System.Collections.Generic.List <System.String>();
            System.Collections.Generic.List <System.String> a1 = new System.Collections.Generic.List <System.String>();
            for (int i = 0; i < a.Count; i++)
            {
                if (i % 2 == 0)
                {
                    a0.Add(a[i]);
                }
                else
                {
                    a1.Add(a[i]);
                }
            }
            System.String[] stopWords0 = (System.String[])a0.ToArray();
            for (int i = 0; i < a0.Count; i++)
            {
                Log("Stop0: " + stopWords0[i]);
            }
            System.String[] stopWords1 = (System.String[])a1.ToArray();
            for (int i = 0; i < a1.Count; i++)
            {
                Log("Stop1: " + stopWords1[i]);
            }
            var stopSet0 = StopFilter.MakeStopSet(stopWords0);
            var stopSet1 = StopFilter.MakeStopSet(stopWords1);

            reader = new System.IO.StringReader(sb.ToString());
            StopFilter stpf0 = new StopFilter(false, new WhitespaceTokenizer(reader), stopSet0);             // first part of the set

            stpf0.EnablePositionIncrements = true;
            StopFilter stpf01 = new StopFilter(false, stpf0, stopSet1);             // two stop filters concatenated!

            DoTestStopPositons(stpf01, true);
        }
Exemplo n.º 5
0
 public StopAnalyzer(System.String[] stopWords, bool enablePositionIncrements)
 {
     this.stopWords = StopFilter.MakeStopSet(stopWords);
     this.enablePositionIncrements   = enablePositionIncrements;
     useDefaultStopPositionIncrement = false;
 }