A TokenFilter that only keeps tokens with text contained in the required words. This filter behaves like the inverse of StopFilter. @since solr 1.3
Inheritance: Lucene.Net.Analysis.Util.FilteringTokenFilter
Exemplo n.º 1
0
        public virtual void TestStopAndGo()
        {
            ISet <string> words = new HashSet <string>();

            words.Add("aaa");
            words.Add("bbb");

            string input = "xxx yyy aaa zzz BBB ccc ddd EEE";

            // Test Stopwords
            TokenStream stream = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);

            stream = new KeepWordFilter(TEST_VERSION_CURRENT, stream, new CharArraySet(TEST_VERSION_CURRENT, words, true));
            AssertTokenStreamContents(stream, new string[] { "aaa", "BBB" }, new int[] { 3, 2 });

            // Now force case
            stream = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
            stream = new KeepWordFilter(TEST_VERSION_CURRENT, stream, new CharArraySet(TEST_VERSION_CURRENT, words, false));
            AssertTokenStreamContents(stream, new string[] { "aaa" }, new int[] { 3 });

            // Test Stopwords
            stream = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
#pragma warning disable 612, 618
            stream = new KeepWordFilter(LuceneVersion.LUCENE_43, false, stream, new CharArraySet(TEST_VERSION_CURRENT, words, true));
            AssertTokenStreamContents(stream, new string[] { "aaa", "BBB" }, new int[] { 1, 1 });

            // Now force case
            stream = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
            stream = new KeepWordFilter(LuceneVersion.LUCENE_43, false, stream, new CharArraySet(TEST_VERSION_CURRENT, words, false));
#pragma warning restore 612, 618
            AssertTokenStreamContents(stream, new string[] { "aaa" }, new int[] { 1 });
        }
Exemplo n.º 2
0
            protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
            {
                Tokenizer   tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
                TokenStream stream    = new KeepWordFilter(TEST_VERSION_CURRENT, tokenizer, new CharArraySet(TEST_VERSION_CURRENT, words, true));

                return(new TokenStreamComponents(tokenizer, stream));
            }
        public virtual void TestStopAndGo()
        {
            ISet<string> words = new HashSet<string>();
            words.Add("aaa");
            words.Add("bbb");

            string input = "xxx yyy aaa zzz BBB ccc ddd EEE";

            // Test Stopwords
            TokenStream stream = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
            stream = new KeepWordFilter(TEST_VERSION_CURRENT, stream, new CharArraySet(TEST_VERSION_CURRENT, words, true));
            AssertTokenStreamContents(stream, new string[] { "aaa", "BBB" }, new int[] { 3, 2 });

            // Now force case
            stream = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
            stream = new KeepWordFilter(TEST_VERSION_CURRENT, stream, new CharArraySet(TEST_VERSION_CURRENT, words, false));
            AssertTokenStreamContents(stream, new string[] { "aaa" }, new int[] { 3 });

            // Test Stopwords
            stream = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
#pragma warning disable 612, 618
            stream = new KeepWordFilter(LuceneVersion.LUCENE_43, false, stream, new CharArraySet(TEST_VERSION_CURRENT, words, true));
            AssertTokenStreamContents(stream, new string[] { "aaa", "BBB" }, new int[] { 1, 1 });

            // Now force case
            stream = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
            stream = new KeepWordFilter(LuceneVersion.LUCENE_43, false, stream, new CharArraySet(TEST_VERSION_CURRENT, words, false));
#pragma warning restore 612, 618
            AssertTokenStreamContents(stream, new string[] { "aaa" }, new int[] { 1 });
        }
 public override TokenStream Create(TokenStream input)
 {
     // if the set is null, it means it was empty
     if (words == null)
     {
         return(input);
     }
     else
     {
         TokenStream filter = new KeepWordFilter(luceneMatchVersion, enablePositionIncrements, input, words);
         return(filter);
     }
 }
Exemplo n.º 5
0
        public override TokenStream Create(TokenStream input)
        {
            // if the set is null, it means it was empty
            if (words == null)
            {
                return(input);
            }
            else
            {
#pragma warning disable 612, 618
                TokenStream filter = new KeepWordFilter(m_luceneMatchVersion, enablePositionIncrements, input, words);
#pragma warning restore 612, 618
                return(filter);
            }
        }
Exemplo n.º 6
0
        public virtual void TestRandomStrings()
        {
            ISet <string> words = new JCG.HashSet <string>();

            words.Add("a");
            words.Add("b");

            Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
            {
                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
                TokenStream stream  = new KeepWordFilter(TEST_VERSION_CURRENT, tokenizer, new CharArraySet(TEST_VERSION_CURRENT, words, true));
                return(new TokenStreamComponents(tokenizer, stream));
            });

            CheckRandomData(Random, a, 1000 * RandomMultiplier);
        }
Exemplo n.º 7
0
 public override TokenStream Create(TokenStream input)
 {
     // if the set is null, it means it was empty
     if (words == null)
     {
         return input;
     }
     else
     {
         TokenStream filter = new KeepWordFilter(luceneMatchVersion, enablePositionIncrements, input, words);
         return filter;
     }
 }
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
     TokenStream stream = new KeepWordFilter(TEST_VERSION_CURRENT, tokenizer, new CharArraySet(TEST_VERSION_CURRENT, words, true));
     return new TokenStreamComponents(tokenizer, stream);
 }