public virtual void TestRandomStrings()
        {
            int numIters = AtLeast(10);

            for (int i = 0; i < numIters; i++)
            {
                SynonymMap.Builder b = new SynonymMap.Builder(Random.nextBoolean());
                int numEntries       = AtLeast(10);
                for (int j = 0; j < numEntries; j++)
                {
                    Add(b, RandomNonEmptyString(), RandomNonEmptyString(), Random.nextBoolean());
                }
                SynonymMap map        = b.Build();
                bool       ignoreCase = Random.nextBoolean();

                Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
                {
                    Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
                    TokenStream stream  = new SynonymFilter(tokenizer, map, ignoreCase);
                    return(new TokenStreamComponents(tokenizer, new RemoveDuplicatesTokenFilter(stream)));
                });

                CheckRandomData(Random, analyzer, 200);
            }
        }
Esempio n. 2
0
        public virtual void TestMaxPosition3WithSynomyms()
        {
            foreach (bool consumeAll in new bool[] { true, false })
            {
                MockTokenizer tokenizer = new MockTokenizer(new StringReader("one two three four five"), MockTokenizer.WHITESPACE, false);
                // if we are consuming all tokens, we can use the checks, otherwise we can't
                tokenizer.EnableChecks = consumeAll;

                SynonymMap.Builder builder = new SynonymMap.Builder(true);
                builder.Add(new CharsRef("one"), new CharsRef("first"), true);
                builder.Add(new CharsRef("one"), new CharsRef("alpha"), true);
                builder.Add(new CharsRef("one"), new CharsRef("beguine"), true);
                CharsRef multiWordCharsRef = new CharsRef();
                SynonymMap.Builder.Join(new string[] { "and", "indubitably", "single", "only" }, multiWordCharsRef);
                builder.Add(new CharsRef("one"), multiWordCharsRef, true);
                SynonymMap.Builder.Join(new string[] { "dopple", "ganger" }, multiWordCharsRef);
                builder.Add(new CharsRef("two"), multiWordCharsRef, true);
                SynonymMap  synonymMap = builder.Build();
                TokenStream stream     = new SynonymFilter(tokenizer, synonymMap, true);
                stream = new LimitTokenPositionFilter(stream, 3, consumeAll);

                // "only", the 4th word of multi-word synonym "and indubitably single only" is not emitted, since its position is greater than 3.
                AssertTokenStreamContents(stream, new string[] { "one", "first", "alpha", "beguine", "and", "two", "indubitably", "dopple", "three", "single", "ganger" }, new int[] { 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0 });
            }
        }
            protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
            {
                Tokenizer   tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
                TokenStream stream    = new SynonymFilter(tokenizer, map, ignoreCase);

                return(new TokenStreamComponents(tokenizer, new RemoveDuplicatesTokenFilter(stream)));
            }
Esempio n. 4
0
        /// <summary>
        ///
        /// </summary>
        /// <remarks></remarks>
        /// <seealso cref=""/>
        /// <param name="fieldName"></param>
        /// <param name="reader"></param>
        /// <returns></returns>
        public override TokenStream TokenStream(String fieldName, System.IO.TextReader reader)
        {
            TokenStream result = new StandardTokenizer(Lucene.Net.Util.Version.LUCENE_30, reader);

            result = new StandardFilter(result);
            result = new LowerCaseFilter(result);
            result = new StopFilter(true, result, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
            result = new StopFilter(true, result, stoptable);
            //result = new GermanStemFilter(result, excltable);
            //result = new PorterStemFilter(result);
            result = new SynonymFilter(result, SynonymEngine); // injects the synonyms.
            return(result);
        }
    public override TokenStream TokenStream(string fieldName, System.IO.TextReader reader)
    {
        TokenStream result = new SynonymFilter(
                                    new StopFilter(true,
                                        new LowerCaseFilter(
                                            new StandardFilter(
                                               new StandardTokenizer(this.version, reader))),
                                               StopAnalyzer.ENGLISH_STOP_WORDS_SET),
                                               engine
                                               );

            return result;
    }
    public override TokenStream TokenStream
        (string fieldName, System.IO.TextReader reader)
    {
        //create the tokenizer
        TokenStream result = new StandardTokenizer(reader);

        //add in filters
        // first normalize the StandardTokenizer
        result = new StandardFilter(result);
        // makes sure everything is lower case
        result = new LowerCaseFilter(result);
        // use the default list of Stop Words, provided by the StopAnalyzer class.
        result = new StopFilter(result, StopAnalyzer.ENGLISH_STOP_WORDS);
        // injects the synonyms.
        result = new SynonymFilter(result, SynonymEngine);
        //return the built token stream.
        return(result);
    }
 public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
 {
     TokenStream result = new StandardTokenizer(Lucene.Net.Util.Version.LUCENE_30, reader);
     result = new LowerCaseFilter(result);
     result = new SynonymFilter(result, new SynonymEngine());
     return result;
 }