示例#1
0
        public override TokenStream ReusableTokenStream(string fieldName, TextReader reader)
        {
            if (overridesTokenStreamMethod)
            {
                return(TokenStream(fieldName, reader));
            }

            var savedStreams = (SavedStreams)PreviousTokenStream;

            if (savedStreams == null)
            {
                savedStreams = new SavedStreams {
                    Source = new StandardTokenizer(MatchVersion, reader)
                };
                savedStreams.Result = new StandardFilter(savedStreams.Source);
                savedStreams.Result = new LowerCaseFilter(savedStreams.Result);
                // TODO: Lucene.Net.Analysis.Compound.HyphenationCompoundWordTokenFilter
                savedStreams.Result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(MatchVersion), savedStreams.Result, StopTable);
                savedStreams.Result = new DanishStemFilter(savedStreams.Result, ExclusionTable);

                PreviousTokenStream = savedStreams;
            }
            else
            {
                savedStreams.Source.Reset(reader);
            }

            return(savedStreams.Result);
        }
示例#2
0
        /* Returns a (possibly reused) {@link StandardTokenizer} filtered by a
         * {@link StandardFilter}, a {@link LowerCaseFilter},
         * a {@link StopFilter}, and a {@link SnowballFilter} */

        public override TokenStream ReusableTokenStream(String fieldName, TextReader reader)
        {
            if (overridesTokenStreamMethod)
            {
                // LUCENE-1678: force fallback to tokenStream() if we
                // have been subclassed and that subclass overrides
                // tokenStream but not reusableTokenStream
                return(TokenStream(fieldName, reader));
            }

            SavedStreams streams = (SavedStreams)PreviousTokenStream;

            if (streams == null)
            {
                streams        = new SavedStreams();
                streams.source = new StandardTokenizer(matchVersion, reader);
                streams.result = new StandardFilter(streams.source);
                streams.result = new LowerCaseFilter(streams.result);
                if (stopSet != null)
                {
                    streams.result = new StopFilter(enableStopIncrements,
                                                    streams.result, stopSet);
                }
                streams.result      = new SnowballFilter(streams.result, name);
                PreviousTokenStream = streams;
            }
            else
            {
                streams.source.Reset(reader);
            }
            return(streams.result);
        }
示例#3
0
        public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader)
        {
            if (overridesTokenStreamMethod)
            {
                // LUCENE-1678: force fallback to tokenStream() if we
                // have been subclassed and that subclass overrides
                // tokenStream but not reusableTokenStream
                return(TokenStream(fieldName, reader));
            }
            SavedStreams streams = (SavedStreams)PreviousTokenStream;

            if (streams == null)
            {
                streams                     = new SavedStreams();
                PreviousTokenStream         = streams;
                streams.tokenStream         = new StandardTokenizer(matchVersion, reader);
                streams.filteredTokenStream = new StandardFilter(streams.tokenStream);
                streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream);
                streams.filteredTokenStream = new StopFilter(enableStopPositionIncrements,
                                                             streams.filteredTokenStream, stopSet);
            }
            else
            {
                streams.tokenStream.Reset(reader);
            }
            streams.tokenStream.MaxTokenLength = maxTokenLength;

            streams.tokenStream.SetReplaceInvalidAcronym(replaceInvalidAcronym);

            return(streams.filteredTokenStream);
        }
示例#4
0
        public override TokenStream ReusableTokenStream(String fieldName, TextReader reader)
        {
            if (overridesTokenStreamMethod)
            {
                // LUCENE-1678: force fallback to tokenStream() if we
                // have been subclassed and that subclass overrides
                // tokenStream but not reusableTokenStream
                return TokenStream(fieldName, reader);
            }

            SavedStreams streams = (SavedStreams)PreviousTokenStream;
            if (streams == null)
            {
                streams = new SavedStreams();
                streams.source = new StandardTokenizer(matchVersion, reader);
                streams.result = new StandardFilter(streams.source);
                streams.result = new ThaiWordFilter(streams.result);
                streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
                                                streams.result, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
                PreviousTokenStream = streams;
            }
            else
            {
                streams.source.Reset(reader);
                streams.result.Reset(); // reset the ThaiWordFilter's state
            }
            return streams.result;
        }
示例#5
0
        /*
         * Returns a (possibly reused) {@link TokenStream} which tokenizes all the
         * text in the provided {@link Reader}.
         *
         * @return A {@link TokenStream} built from a {@link StandardTokenizer}
         *   filtered with {@link StandardFilter}, {@link StopFilter},
         *   and {@link DutchStemFilter}
         */
        public override TokenStream ReusableTokenStream(String fieldName, TextReader reader)
        {
            if (overridesTokenStreamMethod)
            {
                // LUCENE-1678: force fallback to tokenStream() if we
                // have been subclassed and that subclass overrides
                // tokenStream but not reusableTokenStream
                return(TokenStream(fieldName, reader));
            }

            SavedStreams streams = (SavedStreams)PreviousTokenStream;

            if (streams == null)
            {
                streams        = new SavedStreams();
                streams.source = new StandardTokenizer(matchVersion, reader);
                streams.result = new StandardFilter(streams.source);
                streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
                                                streams.result, stoptable);
                streams.result      = new DutchStemFilter(streams.result, excltable, stemdict);
                PreviousTokenStream = streams;
            }
            else
            {
                streams.source.Reset(reader);
            }
            return(streams.result);
        }
示例#6
0
        /* Returns a (possibly reused) {@link StandardTokenizer} filtered by a
         * {@link StandardFilter}, a {@link LowerCaseFilter},
         * a {@link StopFilter}, and a {@link SnowballFilter} */

        public override TokenStream ReusableTokenStream(String fieldName, TextReader reader)
        {
            if (overridesTokenStreamMethod)
            {
                // LUCENE-1678: force fallback to tokenStream() if we
                // have been subclassed and that subclass overrides
                // tokenStream but not reusableTokenStream
                return(TokenStream(fieldName, reader));
            }

            SavedStreams streams = (SavedStreams)PreviousTokenStream;

            if (streams == null)
            {
                streams        = new SavedStreams();
                streams.source = new StandardTokenizer(matchVersion, reader);
                streams.result = new StandardFilter(streams.source);
                streams.result = new LowerCaseFilter(streams.result);
                if (stopSet != null)
                {
                    streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
                                                    streams.result, stopSet);
                }
                streams.result = new BestBetsWordFormsFilter(streams.result);
                //This will remove duplicate keywords - bad for best bets/term count matching
                streams.result = new RemoveDuplicatesTokenFilter(streams.result);

                PreviousTokenStream = streams;
            }
            else
            {
                streams.source.Reset(reader);
            }
            return(streams.result);
        }
示例#7
0
 private static async Task ReadAsyncStream(IAsyncEnumerable <bool> stream, int streamIndex)
 {
     try
     {
         await foreach (var input in stream)
         {
             var value = Convert.ToInt32(input);
             //Console.WriteLine($"We got {value} for {streamIndex}");
             var isStreamSaved = SavedStreams.TryGetValue(streamIndex, out _);
             if (isStreamSaved)
             {
                 SavedStreams.AddOrUpdate(streamIndex, -1, (index, oldValue) => oldValue * (1 - C) + value);
                 SavedStreams.TryGetValue(streamIndex, out double updatedSum);
                 if (updatedSum < 0.5)
                 {
                     SavedStreams.TryRemove(streamIndex, out _);
                 }
             }
             else if (value == 1)
             {
                 SavedStreams.AddOrUpdate(streamIndex, 1, (index, oldValue) => 1);
             }
         }
     }
     catch (Exception ex)
     {
         Console.WriteLine($"{streamIndex}, {ex.Message}");
     }
 }
示例#8
0
        /*
         * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text
         * in the provided {@link Reader}.
         *
         * @return A {@link TokenStream} built from a {@link ArabicLetterTokenizer}
         *         filtered with {@link LowerCaseFilter},
         *         {@link ArabicNormalizationFilter},
         *         {@link PersianNormalizationFilter} and Persian Stop words
         */
        public override TokenStream ReusableTokenStream(String fieldName, TextReader reader)
        {
            SavedStreams streams = (SavedStreams)PreviousTokenStream;

            if (streams == null)
            {
                streams        = new SavedStreams();
                streams.source = new ArabicLetterTokenizer(reader);
                streams.result = new LowerCaseFilter(streams.source);
                streams.result = new ArabicNormalizationFilter(streams.result);
                /* additional persian-specific normalization */
                streams.result = new PersianNormalizationFilter(streams.result);

                /*
                 * the order here is important: the stopword list is normalized with the
                 * above!
                 */
                streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
                                                streams.result, stoptable);
                PreviousTokenStream = streams;
            }
            else
            {
                streams.source.Reset(reader);
            }
            return(streams.result);
        }
示例#9
0
        public override TokenStream ReusableTokenStream(string fieldName, System.IO.TextReader reader)
        {
            SavedStreams streams = GetPreviousTokenStream() as SavedStreams;
            if (streams == null)
            {
                streams = new SavedStreams();

                streams.source = new HebrewTokenizer(reader, PrefixTree);

                // Niqqud normalization
                streams.result = new NiqqudFilter(streams.source);

                // TODO: should we ignoreCase in StopFilter?
                streams.result = new StopFilter(enableStopPositionIncrements, streams.result, STOP_WORDS_SET);

                // TODO: Apply LowerCaseFilter to NonHebrew tokens only
                streams.result = new LowerCaseFilter(streams.result);

                if (suffixByTokenType != null && suffixByTokenType.Count > 0)
                    streams.result = new AddSuffixFilter(streams.result, suffixByTokenType);

                SetPreviousTokenStream(streams);
            }
            else
            {
                streams.source.Reset(reader);
            }
            return streams.result;
        }
示例#10
0
        public override TokenStream ReusableTokenStream(string fieldName, System.IO.TextReader reader)
        {
            if (overridesTokenStreamMethod)
            {
                // LUCENE-1678: force fallback to tokenStream() if we
                // have been subclassed and that subclass overrides
                // tokenStream but not reusableTokenStream
                return(TokenStream(fieldName, reader));
            }
            SavedStreams streams = PreviousTokenStream as SavedStreams;

            if (streams == null)
            {
                streams             = new SavedStreams();
                PreviousTokenStream = streams;
                streams.source      = new StreamLemmasFilter(reader, hebMorphLemmatizer,
                                                             lemmaFilter, alwaysSaveMarkedOriginal);

                // This stop filter is here temporarily, until HebMorph is smart enough to clear stop words
                // all by itself
                streams.result = new StopFilter(enableStopPositionIncrements, streams.source, STOP_WORDS_SET);
            }
            else
            {
                streams.source.Reset(reader);
            }
            return(streams.result);
        }
示例#11
0
        public override TokenStream ReusableTokenStream(String fieldName, TextReader reader)
        {
            if (overridesTokenStreamMethod)
            {
                // LUCENE-1678: force fallback to tokenStream() if we
                // have been subclassed and that subclass overrides
                // tokenStream but not reusableTokenStream
                return(TokenStream(fieldName, reader));
            }

            SavedStreams streams = (SavedStreams)PreviousTokenStream;

            if (streams == null)
            {
                streams        = new SavedStreams();
                streams.source = new StandardTokenizer(matchVersion, reader);
                streams.result = new StandardFilter(streams.source);
                streams.result = new ThaiWordFilter(streams.result);
                streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
                                                streams.result, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
                PreviousTokenStream = streams;
            }
            else
            {
                streams.source.Reset(reader);
                streams.result.Reset(); // reset the ThaiWordFilter's state
            }
            return(streams.result);
        }
示例#12
0
 public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader)
 {
     SavedStreams streams = (SavedStreams) GetPreviousTokenStream();
     if (streams == null)
     {
         streams = new SavedStreams(this);
         streams.source = new LowerCaseTokenizer(reader);
         streams.result = new StopFilter(streams.source, stopWords);
         SetPreviousTokenStream(streams);
     }
     else
         streams.source.Reset(reader);
     return streams.result;
 }
            public override TokenStream ReusableTokenStream(string fieldName, TextReader reader) {
                var streams = (SavedStreams)PreviousTokenStream;
                if (streams == null) {
                    streams = new SavedStreams();
                    streams.Tokenizer = new StandardTokenizer(LuceneVersion.LUCENE_29, reader);
                    streams.Filter = new HunspellStemFilter(new LowerCaseFilter(streams.Tokenizer), _dictionary);
                    PreviousTokenStream = streams;
                } else {
                    streams.Tokenizer.Reset(reader);
                    streams.Filter.Reset();
                }

                return streams.Filter;
            }
示例#14
0
 public override TokenStream ReusableTokenStream(string fieldName, TextReader reader)
 {
     var streams = (SavedStreams)PreviousTokenStream;
     if (streams == null)
     {
         streams = new SavedStreams();
         streams.source = new KeywordTokenizer(reader);
         streams.result = new CollationKeyFilter(streams.source, cultureInfo);
         PreviousTokenStream = streams;
     }
     else
     {
         streams.source.Reset(reader);
     }
     return streams.result;
 }
示例#15
0
        public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader)
        {
            SavedStreams streams = (SavedStreams)GetPreviousTokenStream();

            if (streams == null)
            {
                streams        = new SavedStreams(this);
                streams.source = new LowerCaseTokenizer(reader);
                streams.result = new StopFilter(streams.source, stopWords);
                SetPreviousTokenStream(streams);
            }
            else
            {
                streams.source.Reset(reader);
            }
            return(streams.result);
        }
    public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader)
    {
        SavedStreams streams = (SavedStreams)GetPreviousTokenStream();

        if (streams == null)
        {
            streams = new SavedStreams();
            SetPreviousTokenStream(streams);
            streams.tokenStream         = new WhiteSpaceTokenizer(reader);
            streams.filteredTokenStream = new LowerCaseFilter(streams.tokenStream);
        }
        else
        {
            streams.tokenStream.Reset(reader);
        }
        return(streams.filteredTokenStream);
    }
示例#17
0
 /// <summary>
 /// Returns a (possibly reused) <see cref="TokenStream"/> which tokenizes all the text in the
 /// provided <see cref="TextReader"/>.
 /// </summary>
 /// <returns>
 ///   A <see cref="TokenStream"/> built from a <see cref="ChineseTokenizer"/> 
 ///   filtered with <see cref="ChineseFilter"/>.
 /// </returns>
 public override TokenStream ReusableTokenStream(String fieldName, TextReader reader)
 {
     /* tokenStream() is final, no back compat issue */
     SavedStreams streams = (SavedStreams) PreviousTokenStream;
     if (streams == null)
     {
         streams = new SavedStreams();
         streams.source = new ChineseTokenizer(reader);
         streams.result = new ChineseFilter(streams.source);
         PreviousTokenStream = streams;
     }
     else
     {
         streams.source.Reset(reader);
     }
     return streams.result;
 }
示例#18
0
        public override TokenStream ReusableTokenStream(string fieldName, TextReader reader)
        {
            var streams = (SavedStreams)PreviousTokenStream;

            if (streams == null)
            {
                streams             = new SavedStreams();
                streams.Source      = new KeywordTokenizer(reader);
                streams.Result      = new CollationKeyFilter(streams.Source, _cultureInfo);
                PreviousTokenStream = streams;
            }
            else
            {
                streams.Source.Reset(reader);
            }
            return(streams.Result);
        }
示例#19
0
        /// <summary>
        /// Returns a (possibly reused) <see cref="TokenStream"/> which tokenizes all the text in the
        /// provided <see cref="TextReader"/>.
        /// </summary>
        /// <returns>
        ///   A <see cref="TokenStream"/> built from a <see cref="ChineseTokenizer"/>
        ///   filtered with <see cref="ChineseFilter"/>.
        /// </returns>
        public override TokenStream ReusableTokenStream(String fieldName, TextReader reader)
        {
            /* tokenStream() is final, no back compat issue */
            SavedStreams streams = (SavedStreams)PreviousTokenStream;

            if (streams == null)
            {
                streams             = new SavedStreams();
                streams.source      = new ChineseTokenizer(reader);
                streams.result      = new ChineseFilter(streams.source);
                PreviousTokenStream = streams;
            }
            else
            {
                streams.source.Reset(reader);
            }
            return(streams.result);
        }
示例#20
0
    public override TokenStream ReusableTokenStream([NotNull] string fieldName, [NotNull] TextReader reader)
    {
      SavedStreams previousTokenStream = (SavedStreams)this.PreviousTokenStream;
      if (previousTokenStream == null)
      {
        previousTokenStream = new SavedStreams();
        this.PreviousTokenStream = previousTokenStream;
        previousTokenStream.TokenStream = new WhitespaceTokenizer(reader);
        previousTokenStream.FilteredTokenStream = new StandardFilter(previousTokenStream.TokenStream);
        previousTokenStream.FilteredTokenStream = new LowerCaseFilter(previousTokenStream.FilteredTokenStream);
      }
      else
      {
        previousTokenStream.TokenStream.Reset(reader);
      }

      return previousTokenStream.FilteredTokenStream;
    }
        public override TokenStream ReusableTokenStream(String fieldName, TextReader reader)
        {
            var streams = (SavedStreams)PreviousTokenStream;

            if (streams == null)
            {
                streams                     = new SavedStreams();
                PreviousTokenStream         = streams;
                streams.tokenStream         = new LetterOrDigitTokenizer(reader);
                streams.filteredTokenStream = new LowerCaseFilter(streams.tokenStream);
                streams.filteredTokenStream = new ASCIIFoldingFilter(streams.filteredTokenStream);
            }
            else
            {
                streams.tokenStream.Reset(reader);
            }
            return(streams.filteredTokenStream);
        }
            public override TokenStream ReusableTokenStream(string fieldName, TextReader reader)
            {
                var streams = (SavedStreams)PreviousTokenStream;

                if (streams == null)
                {
                    streams             = new SavedStreams();
                    streams.Tokenizer   = new StandardTokenizer(LuceneVersion.LUCENE_29, reader);
                    streams.Filter      = new HunspellStemFilter(new LowerCaseFilter(streams.Tokenizer), _dictionary);
                    PreviousTokenStream = streams;
                }
                else
                {
                    streams.Tokenizer.Reset(reader);
                    streams.Filter.Reset();
                }

                return(streams.Filter);
            }
示例#23
0
        /*
         * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text
         * in the provided {@link Reader}.
         *
         * @return  A {@link TokenStream} built from a {@link StandardTokenizer} filtered with
         *                  {@link GreekLowerCaseFilter} and {@link StopFilter}
         */
        public override TokenStream ReusableTokenStream(String fieldName, TextReader reader)
        {
            SavedStreams streams = (SavedStreams)PreviousTokenStream;

            if (streams == null)
            {
                streams        = new SavedStreams();
                streams.source = new StandardTokenizer(matchVersion, reader);
                streams.result = new GreekLowerCaseFilter(streams.source);
                streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
                                                streams.result, stopSet);
                PreviousTokenStream = streams;
            }
            else
            {
                streams.source.Reset(reader);
            }
            return(streams.result);
        }
示例#24
0
        public override TokenStream ReusableTokenStream([NotNull] string fieldName, [NotNull] TextReader reader)
        {
            SavedStreams previousTokenStream = (SavedStreams)this.PreviousTokenStream;

            if (previousTokenStream == null)
            {
                previousTokenStream                     = new SavedStreams();
                this.PreviousTokenStream                = previousTokenStream;
                previousTokenStream.TokenStream         = new WhitespaceTokenizer(reader);
                previousTokenStream.FilteredTokenStream = new StandardFilter(previousTokenStream.TokenStream);
                previousTokenStream.FilteredTokenStream = new LowerCaseFilter(previousTokenStream.FilteredTokenStream);
            }
            else
            {
                previousTokenStream.TokenStream.Reset(reader);
            }

            return(previousTokenStream.FilteredTokenStream);
        }
示例#25
0
        /*
         * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text
         * in the provided {@link Reader}.
         *
         * @param fieldName lucene field name
         * @param reader    Input {@link Reader}
         * @return A {@link TokenStream} built from {@link CJKTokenizer}, filtered with
         *    {@link StopFilter}
         */
        public override sealed TokenStream ReusableTokenStream(String fieldName, TextReader reader)
        {
            /* tokenStream() is final, no back compat issue */
            SavedStreams streams = (SavedStreams)PreviousTokenStream;

            if (streams == null)
            {
                streams        = new SavedStreams();
                streams.source = new CJKTokenizer(reader);
                streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
                                                streams.source, stopTable);
                PreviousTokenStream = streams;
            }
            else
            {
                streams.source.Reset(reader);
            }
            return(streams.result);
        }
示例#26
0
        public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader)
        {
            var streams = (SavedStreams)PreviousTokenStream;

            if (streams == null)
            {
                streams = new SavedStreams(this)
                {
                    source = new LowerCaseTokenizer(reader)
                };
                streams.result      = new StopFilter(enablePositionIncrements, streams.source, stopWords);
                PreviousTokenStream = streams;
            }
            else
            {
                streams.source.Reset(reader);
            }
            return(streams.result);
        }
示例#27
0
        static async Task Main(string[] args)
        {
            try
            {
                if (!ThreadPool.SetMaxThreads(300, 300))
                {
                    Console.WriteLine("Not able to set max thread count");
                    return;
                }
                Task.Run(async() =>
                {
                    while (true)
                    {
                        await Task.Delay(1000);
                        var mostPopularStream = SavedStreams.OrderByDescending(kvp => kvp.Value).FirstOrDefault();
                        if (mostPopularStream.Value == 0)
                        {
                            Console.WriteLine("There is no popular streams right now");
                        }
                        else
                        {
                            Console.WriteLine($"Stream {mostPopularStream.Key} is most popular with sum {mostPopularStream.Value}");
                        }
                    }
                });

                var readStreamTasks = new List <Task>();
                for (int i = 0; i < Streams.Count(); i++)
                {
                    int temp = i;
                    readStreamTasks.Add(Task.Run(async() =>
                    {
                        await ReadAsyncStream(Streams[temp], temp);
                    }));
                }
                await Task.WhenAll(readStreamTasks);
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
            }
        }
示例#28
0
        /**
         * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text
         * in the provided {@link Reader}.
         *
         * @return  A {@link TokenStream} built from an {@link ArabicLetterTokenizer} filtered with
         *            {@link LowerCaseFilter}, {@link StopFilter}, {@link ArabicNormalizationFilter}
         *            and {@link ArabicStemFilter}.
         */
        public override TokenStream ReusableTokenStream(string fieldName, TextReader reader)
        {
            SavedStreams streams = (SavedStreams)GetPreviousTokenStream();

            if (streams == null)
            {
                streams        = new SavedStreams();
                streams.Source = new ArabicLetterTokenizer(reader);
                streams.Result = new LowerCaseFilter(streams.Source);
                streams.Result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
                                                streams.Result, stoptable);
                streams.Result = new ArabicNormalizationFilter(streams.Result);
                streams.Result = new ArabicStemFilter(streams.Result);
                SetPreviousTokenStream(streams);
            }
            else
            {
                streams.Source.Reset(reader);
            }
            return(streams.Result);
        }
示例#29
0
        public override TokenStream ReusableTokenStream(string fieldName, TextReader reader)
        {
            SavedStreams savedStreams = (SavedStreams)PreviousTokenStream;

            if (savedStreams != null)
            {
                savedStreams.tokenStream.Reset(reader);
            }
            else
            {
                savedStreams                     = new SavedStreams();
                PreviousTokenStream              = savedStreams;
                savedStreams.tokenStream         = new StandardTokenizer(matchVersion, reader);
                savedStreams.filteredTokenStream = new StandardFilter(savedStreams.tokenStream);
                savedStreams.filteredTokenStream = new LowerCaseFilter(savedStreams.filteredTokenStream);
                savedStreams.filteredTokenStream = new StopFilter(enableStopPositionIncrements, savedStreams.filteredTokenStream, stopSet);
            }
            savedStreams.tokenStream.MaxTokenLength = MaxTokenLength;
            savedStreams.tokenStream.SetReplaceInvalidAcronym(replaceInvalidAcronym);
            return(savedStreams.filteredTokenStream);
        }
示例#30
0
        public override TokenStream ReusableTokenStream(string fieldName, TextReader reader)
        {
            var savedStreams = (SavedStreams)PreviousTokenStream;

            if (savedStreams == null)
            {
                savedStreams                     = new SavedStreams();
                this.PreviousTokenStream         = savedStreams;
                savedStreams.TokenStream         = new StandardTokenizer(_matchVersion, reader);
                savedStreams.FilteredTokenStream = new StandardFilter(savedStreams.TokenStream);
                savedStreams.FilteredTokenStream = new LowerCaseFilter(savedStreams.FilteredTokenStream);
                savedStreams.FilteredTokenStream = new ASCIIFoldingFilter(savedStreams.FilteredTokenStream);
                savedStreams.FilteredTokenStream = new StopFilter(_enableStopPositionIncrements, savedStreams.FilteredTokenStream, _stopWords);
            }
            else
            {
                savedStreams.TokenStream.Reset(reader);
            }
            savedStreams.TokenStream.MaxTokenLength = MaxTokenLength;
            return(savedStreams.FilteredTokenStream);
        }
示例#31
0
        public override TokenStream ReusableTokenStream(string fieldName, System.IO.TextReader reader)
        {
            SavedStreams streams = GetPreviousTokenStream() as SavedStreams;
            if (streams == null)
            {
                streams = new SavedStreams();
                streams.source = new StreamLemmasFilter(reader, hebMorphLemmatizer,
                    lemmaFilter, alwaysSaveMarkedOriginal);

                // This stop filter is here temporarily, until HebMorph is smart enough to clear stop words
                // all by itself
                streams.result = new StopFilter(enableStopPositionIncrements, streams.source, STOP_WORDS_SET);

                SetPreviousTokenStream(streams);
            }
            else
            {
                streams.source.Reset(reader);
            }
            return streams.result;
        }
        public override TokenStream ReusableTokenStream(string fieldName, TextReader reader)
        {
            SavedStreams streams = this.PreviousTokenStream as SavedStreams;

            if (streams == null)
            {
                streams                  = new SavedStreams();
                streams.Source           = new StandardTokenizer(matchVersion, reader);
                streams.Result           = new StandardFilter(streams.Source);
                streams.Result           = new LowerCaseFilter(streams.Result);
                streams.Result           = new StopFilter(false, streams.Result, stoptable);
                streams.Result           = new BulgarianStemFilter(streams.Result);
                this.PreviousTokenStream = streams;
            }
            else
            {
                streams.Source.Reset(reader);
            }

            return(streams.Result);
        }
示例#33
0
        public override TokenStream ReusableTokenStream(string fieldName, System.IO.TextReader reader)
        {
            if (overridesTokenStreamMethod)
            {
                // LUCENE-1678: force fallback to tokenStream() if we
                // have been subclassed and that subclass overrides
                // tokenStream but not reusableTokenStream
                return(TokenStream(fieldName, reader));
            }
            SavedStreams streams = PreviousTokenStream as SavedStreams;

            if (streams == null)
            {
                streams = new SavedStreams();

                streams.source = new HebrewTokenizer(reader, PrefixTree);

                // Niqqud normalization
                streams.result = new NiqqudFilter(streams.source);

                // TODO: should we ignoreCase in StopFilter?
                streams.result = new StopFilter(enableStopPositionIncrements, streams.result, STOP_WORDS_SET);

                // TODO: Apply LowerCaseFilter to NonHebrew tokens only
                streams.result = new LowerCaseFilter(streams.result);

                if (suffixByTokenType != null && suffixByTokenType.Count > 0)
                {
                    streams.result = new AddSuffixFilter(streams.result, suffixByTokenType);
                }

                PreviousTokenStream = streams;
            }
            else
            {
                streams.source.Reset(reader);
            }
            return(streams.result);
        }
示例#34
0
        /*
         * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text
         * in the provided {@link Reader}.
         *
         * @return  A {@link TokenStream} built from a {@link StandardTokenizer} filtered with
         *          {@link LowerCaseFilter}, {@link StandardFilter}, {@link StopFilter}, and
         *          {@link BrazilianStemFilter}.
         */

        public override TokenStream ReusableTokenStream(string fieldName, TextReader reader)
        {
            var streams = (SavedStreams)this.PreviousTokenStream;

            if (streams == null)
            {
                streams        = new SavedStreams();
                streams.Source = new StandardTokenizer(this.matchVersion, reader);

                streams.Result           = new LowerCaseFilter(streams.Source);
                streams.Result           = new StandardFilter(streams.Result);
                streams.Result           = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(this.matchVersion), streams.Result, this.stoptable);
                streams.Result           = new BrazilianStemFilterCustom(streams.Result, this.excltable);
                this.PreviousTokenStream = streams;
            }
            else
            {
                streams.Source.Reset(reader);
            }

            return(streams.Result);
        }
示例#35
0
        /*
         * Returns a (possibly reused) {@link TokenStream} which tokenizes all the
         * text in the provided {@link Reader}.
         *
         * @return A {@link TokenStream} built from a {@link StandardTokenizer}
         *         filtered with {@link StandardFilter}, {@link StopFilter},
         *         {@link FrenchStemFilter} and {@link LowerCaseFilter}
         */
        public override TokenStream ReusableTokenStream(String fieldName, TextReader reader)
        {
            SavedStreams streams = (SavedStreams)PreviousTokenStream;

            if (streams == null)
            {
                streams        = new SavedStreams();
                streams.source = new StandardTokenizer(matchVersion, reader);
                streams.result = new StandardFilter(streams.source);
                streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
                                                streams.result, stoptable);
                streams.result = new FrenchStemFilter(streams.result, excltable);
                // Convert to lowercase after stemming!
                streams.result      = new LowerCaseFilter(streams.result);
                PreviousTokenStream = streams;
            }
            else
            {
                streams.source.Reset(reader);
            }
            return(streams.result);
        }
示例#36
0
        public override TokenStream ReusableTokenStream(String fieldName, TextReader reader)
        {
            if (overridesTokenStreamMethod)
            {
                // LUCENE-1678: force fallback to tokenStream() if we
                // have been subclassed and that subclass overrides
                // tokenStream but not reusableTokenStream
                return(TokenStream(fieldName, reader));
            }

            SavedStreams streams = (SavedStreams)PreviousTokenStream;

            if (streams == null)
            {
                streams             = new SavedStreams();
                streams.wrapped     = defaultAnalyzer.ReusableTokenStream(fieldName, reader);
                streams.shingle     = new ShingleFilter(streams.wrapped);
                PreviousTokenStream = streams;
            }
            else
            {
                TokenStream result = defaultAnalyzer.ReusableTokenStream(fieldName, reader);
                if (result == streams.wrapped)
                {
                    /* the wrapped analyzer reused the stream */
                    streams.shingle.Reset();
                }
                else
                {
                    /* the wrapped analyzer did not, create a new shingle around the new one */
                    streams.wrapped = result;
                    streams.shingle = new ShingleFilter(streams.wrapped);
                }
            }
            streams.shingle.SetMaxShingleSize(maxShingleSize);
            streams.shingle.SetOutputUnigrams(outputUnigrams);
            return(streams.shingle);
        }
        public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader)
        {
            SavedStreams streams = (SavedStreams)GetPreviousTokenStream();

            if (streams == null)
            {
                streams = new SavedStreams();
                SetPreviousTokenStream(streams);
                streams.tokenStream         = new StandardTokenizer(reader);
                streams.filteredTokenStream = new StandardFilter(streams.tokenStream);
                streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream);
                streams.filteredTokenStream = new StopFilter(streams.filteredTokenStream, stopSet);
            }
            else
            {
                streams.tokenStream.Reset(reader);
            }
            streams.tokenStream.SetMaxTokenLength(maxTokenLength);

            streams.tokenStream.SetReplaceInvalidAcronym(replaceInvalidAcronym);

            return(streams.filteredTokenStream);
        }
示例#38
0
        public override TokenStream ReusableTokenStream(string fieldName, System.IO.TextReader reader)
        {
			if (overridesTokenStreamMethod)
			{
				// LUCENE-1678: force fallback to tokenStream() if we
				// have been subclassed and that subclass overrides
				// tokenStream but not reusableTokenStream
				return TokenStream(fieldName, reader);
			}
            SavedStreams streams = PreviousTokenStream as SavedStreams;
            if (streams == null)
            {
                streams = new SavedStreams();

                streams.source = new HebrewTokenizer(reader, PrefixTree);

                // Niqqud normalization
                streams.result = new NiqqudFilter(streams.source);

                // TODO: should we ignoreCase in StopFilter?
                streams.result = new StopFilter(enableStopPositionIncrements, streams.result, STOP_WORDS_SET);

                // TODO: Apply LowerCaseFilter to NonHebrew tokens only
                streams.result = new LowerCaseFilter(streams.result);

                if (suffixByTokenType != null && suffixByTokenType.Count > 0)
                    streams.result = new AddSuffixFilter(streams.result, suffixByTokenType);

                PreviousTokenStream = streams;
            }
            else
            {
                streams.source.Reset(reader);
            }
            return streams.result;
        }
		public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader)
		{
			if (overridesTokenStreamMethod)
			{
				// LUCENE-1678: force fallback to tokenStream() if we
				// have been subclassed and that subclass overrides
				// tokenStream but not reusableTokenStream
				return TokenStream(fieldName, reader);
			}
			SavedStreams streams = (SavedStreams) GetPreviousTokenStream();
			if (streams == null)
			{
				streams = new SavedStreams();
				SetPreviousTokenStream(streams);
				streams.tokenStream = new StandardTokenizer(reader);
				streams.filteredTokenStream = new StandardFilter(streams.tokenStream);
				streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream);
				if (useDefaultStopPositionIncrements)
				{
					streams.filteredTokenStream = new StopFilter(streams.filteredTokenStream, stopSet);
				}
				else
				{
					streams.filteredTokenStream = new StopFilter(enableStopPositionIncrements, streams.filteredTokenStream, stopSet);
				}
			}
			else
			{
				streams.tokenStream.Reset(reader);
			}
			streams.tokenStream.SetMaxTokenLength(maxTokenLength);
			
			streams.tokenStream.SetReplaceInvalidAcronym(replaceInvalidAcronym);
			
			return streams.filteredTokenStream;
		}
        public override TokenStream ReusableTokenStream(String fieldName, TextReader reader)
        {
            if (overridesTokenStreamMethod) {
              // LUCENE-1678: force fallback to tokenStream() if we
              // have been subclassed and that subclass overrides
              // tokenStream but not reusableTokenStream
              return TokenStream(fieldName, reader);
            }

            /* map of SavedStreams for each field */
            IDictionary<String, SavedStreams> streamMap = (IDictionary<String, SavedStreams>)PreviousTokenStream;
            if (streamMap == null) {
              streamMap = new HashMap<String, SavedStreams>();
              PreviousTokenStream = streamMap;
            }

            SavedStreams streams = streamMap[fieldName];
            if (streams == null) {
              /* an entry for this field does not exist, create one */
              streams = new SavedStreams();
              streamMap.Add(fieldName, streams);
              streams.Wrapped = _delegate.ReusableTokenStream(fieldName, reader);

              /* if there are any stopwords for the field, save the stopfilter */
              var stopWords = stopWordsPerField[fieldName];
              if (stopWords != null)
            streams.WithStopFilter = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
                                                streams.Wrapped, stopWords);
              else
            streams.WithStopFilter = streams.Wrapped;

            } else {
              /*
               * an entry for this field exists, verify the wrapped stream has not
               * changed. if it has not, reuse it, otherwise wrap the new stream.
               */
              TokenStream result = _delegate.ReusableTokenStream(fieldName, reader);
              if (result == streams.Wrapped) {
            /* the wrapped analyzer reused the stream */
            streams.WithStopFilter.Reset();
              } else {
            /*
             * the wrapped analyzer did not. if there are any stopwords for the
             * field, create a new StopFilter around the new stream
             */
            streams.Wrapped = result;
            var stopWords = stopWordsPerField[fieldName];
            if (stopWords != null)
              streams.WithStopFilter = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
                                                  streams.Wrapped, stopWords);
            else
              streams.WithStopFilter = streams.Wrapped;
              }
            }

            return streams.WithStopFilter;
        }
        public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader)
        {
            SavedStreams streams = (SavedStreams) GetPreviousTokenStream();
            if (streams == null)
            {
                streams = new SavedStreams();
                SetPreviousTokenStream(streams);
                streams.tokenStream = new StandardTokenizer(reader);
                streams.filteredTokenStream = new StandardFilter(streams.tokenStream);
                streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream);
                streams.filteredTokenStream = new StopFilter(streams.filteredTokenStream, stopSet);
            }
            else
            {
                streams.tokenStream.Reset(reader);
            }
            streams.tokenStream.SetMaxTokenLength(maxTokenLength);

            streams.tokenStream.SetReplaceInvalidAcronym(replaceInvalidAcronym);

            return streams.filteredTokenStream;
        }
示例#42
0
 public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader)
 {
     var streams = (SavedStreams) PreviousTokenStream;
     if (streams == null)
     {
         streams = new SavedStreams(this) {source = new LowerCaseTokenizer(reader)};
         streams.result = new StopFilter(enablePositionIncrements, streams.source, stopWords);
         PreviousTokenStream = streams;
     }
     else
         streams.source.Reset(reader);
     return streams.result;
 }
示例#43
0
 /*
  * Returns a (possibly reused) {@link TokenStream} which tokenizes all the 
  * text in the provided {@link Reader}.
  *
  * @return A {@link TokenStream} built from a {@link StandardTokenizer} 
  *         filtered with {@link StandardFilter}, {@link StopFilter}, 
  *         {@link FrenchStemFilter} and {@link LowerCaseFilter}
  */
 public override TokenStream ReusableTokenStream(String fieldName, TextReader reader)
 {
     SavedStreams streams = (SavedStreams)PreviousTokenStream;
     if (streams == null)
     {
         streams = new SavedStreams();
         streams.source = new StandardTokenizer(matchVersion, reader);
         streams.result = new StandardFilter(streams.source);
         streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
                                         streams.result, stoptable);
         streams.result = new FrenchStemFilter(streams.result, excltable);
         // Convert to lowercase after stemming!
         streams.result = new LowerCaseFilter(streams.result);
         PreviousTokenStream = streams;
     }
     else
     {
         streams.source.Reset(reader);
     }
     return streams.result;
 }
示例#44
0
        /*
         * Returns a (possibly reused) {@link TokenStream} which tokenizes all the
         * text in the provided {@link Reader}.
         *
         * @return A {@link TokenStream} built from a {@link StandardTokenizer}
         *   filtered with {@link StandardFilter}, {@link StopFilter},
         *   and {@link DutchStemFilter}
         */
        public override TokenStream ReusableTokenStream(String fieldName, TextReader reader)
        {
            if (overridesTokenStreamMethod)
            {
                // LUCENE-1678: force fallback to tokenStream() if we
                // have been subclassed and that subclass overrides
                // tokenStream but not reusableTokenStream
                return TokenStream(fieldName, reader);
            }

            SavedStreams streams = (SavedStreams)PreviousTokenStream;
            if (streams == null)
            {
                streams = new SavedStreams();
                streams.source = new StandardTokenizer(matchVersion, reader);
                streams.result = new StandardFilter(streams.source);
                streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
                                                streams.result, stoptable);
                streams.result = new DutchStemFilter(streams.result, excltable, stemdict);
                PreviousTokenStream = streams;
            }
            else
            {
                streams.source.Reset(reader);
            }
            return streams.result;
        }
        public override TokenStream ReusableTokenStream(String fieldName, TextReader reader)
        {
            if (overridesTokenStreamMethod)
            {
                // LUCENE-1678: force fallback to tokenStream() if we
                // have been subclassed and that subclass overrides
                // tokenStream but not reusableTokenStream
                return TokenStream(fieldName, reader);
            }

            SavedStreams streams = (SavedStreams)PreviousTokenStream;
            if (streams == null)
            {
                streams = new SavedStreams();
                streams.wrapped = defaultAnalyzer.ReusableTokenStream(fieldName, reader);
                streams.shingle = new ShingleFilter(streams.wrapped);
                PreviousTokenStream = streams;
            }
            else
            {
                TokenStream result = defaultAnalyzer.ReusableTokenStream(fieldName, reader);
                if (result == streams.wrapped)
                {
                    /* the wrapped analyzer reused the stream */
                    streams.shingle.Reset();
                }
                else
                {
                    /* the wrapped analyzer did not, create a new shingle around the new one */
                    streams.wrapped = result;
                    streams.shingle = new ShingleFilter(streams.wrapped);
                }
            }
            streams.shingle.SetMaxShingleSize(maxShingleSize);
            streams.shingle.SetOutputUnigrams(outputUnigrams);
            return streams.shingle;
        }
示例#46
0
 /*
  * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text 
  * in the provided {@link Reader}.
  *
  * @return  A {@link TokenStream} built from a 
  *   {@link RussianLetterTokenizer} filtered with 
  *   {@link RussianLowerCaseFilter}, {@link StopFilter}, 
  *   and {@link RussianStemFilter}
  */
 public override TokenStream ReusableTokenStream(String fieldName, TextReader reader)
 {
     SavedStreams streams = (SavedStreams)PreviousTokenStream;
     if (streams == null)
     {
         streams = new SavedStreams();
         streams.source = new RussianLetterTokenizer(reader);
         streams.result = new LowerCaseFilter(streams.source);
         streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
                                         streams.result, stopSet);
         streams.result = new RussianStemFilter(streams.result);
         PreviousTokenStream = streams;
     }
     else
     {
         streams.source.Reset(reader);
     }
     return streams.result;
 }
示例#47
0
 /**
  * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text 
  * in the provided {@link Reader}.
  * 
  * @return A {@link TokenStream} built from a {@link ArabicLetterTokenizer}
  *         filtered with {@link LowerCaseFilter}, 
  *         {@link ArabicNormalizationFilter},
  *         {@link PersianNormalizationFilter} and Persian Stop words
  */
 public override TokenStream ReusableTokenStream(String fieldName, TextReader reader)
 {
     SavedStreams streams = (SavedStreams)PreviousTokenStream;
     if (streams == null)
     {
         streams = new SavedStreams();
         streams.source = new ArabicLetterTokenizer(reader);
         streams.result = new LowerCaseFilter(streams.source);
         streams.result = new ArabicNormalizationFilter(streams.result);
         /* additional persian-specific normalization */
         streams.result = new PersianNormalizationFilter(streams.result);
         /*
          * the order here is important: the stopword list is normalized with the
          * above!
          */
         streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
                                         streams.result, stoptable);
         PreviousTokenStream = streams;
     }
     else
     {
         streams.source.Reset(reader);
     }
     return streams.result;
 }
示例#48
0
 /**
  * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text 
  * in the provided {@link Reader}.
  *
  * @param fieldName lucene field name
  * @param reader    Input {@link Reader}
  * @return A {@link TokenStream} built from {@link CJKTokenizer}, filtered with
  *    {@link StopFilter}
  */
 public override sealed TokenStream ReusableTokenStream(String fieldName, TextReader reader)
 {
     /* tokenStream() is final, no back compat issue */
     SavedStreams streams = (SavedStreams) PreviousTokenStream;
     if (streams == null)
     {
         streams = new SavedStreams();
         streams.source = new CJKTokenizer(reader);
         streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
                                         streams.source, stopTable);
         PreviousTokenStream = streams;
     }
     else
     {
         streams.source.Reset(reader);
     }
     return streams.result;
 }
        /* Returns a (possibly reused) {@link StandardTokenizer} filtered by a
            * {@link StandardFilter}, a {@link LowerCaseFilter},
            * a {@link StopFilter}, and a {@link SnowballFilter} */
        public override TokenStream ReusableTokenStream(String fieldName, TextReader reader)
        {
            if (overridesTokenStreamMethod)
                {
                    // LUCENE-1678: force fallback to tokenStream() if we
                    // have been subclassed and that subclass overrides
                    // tokenStream but not reusableTokenStream
                    return TokenStream(fieldName, reader);
                }

                SavedStreams streams = (SavedStreams)this.GetPreviousTokenStream();
                if (streams == null)
                {
                    streams = new SavedStreams();
                    streams.source = new StandardTokenizer(matchVersion, reader);
                    streams.result = new Portal.LuceneInterface.WordDelimiterFilter(streams.source, 1, 1, 1, 1, 1);
                    streams.result = new StandardFilter(streams.result);
                    streams.result = new LowerCaseFilter(streams.result);
                    if (stopSet != null)
                        streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
                        streams.result, stopSet);
                    streams.result = new SnowballFilter(streams.result, "English");
                    this.SetPreviousTokenStream(streams);
                }
                else
                {
                    streams.source.Reset(reader);
                }
                return streams.result;
        }
 public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader)
 {
     SavedStreams streams = (SavedStreams)PreviousTokenStream;
     if (streams == null)
     {
         streams = new SavedStreams(this);
         streams.source = new CharJoinAbbreviationsLowerCaseTokenizer(reader);
         //streams.result = new SynonymFilter(streams.source, engine);
         streams.result = new CharJoinAbbreviationsLowerCaseFilter(streams.source);
         //streams.result = streams.source; //if we dont have a filter above
         PreviousTokenStream = streams;
     }
     else
         streams.source.Reset(reader);
     return streams.result;
 }
示例#51
0
        /*
         * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text
         * in the provided {@link Reader}.
         *
         * @return  A {@link TokenStream} built from a {@link StandardTokenizer} filtered with
         *          {@link LowerCaseFilter}, {@link StandardFilter}, {@link StopFilter}, and
         *          {@link BrazilianStemFilter}.
         */
        public override TokenStream ReusableTokenStream(string fieldName, TextReader reader)
        {
            var streams = (SavedStreams)this.PreviousTokenStream;

            if (streams == null)
            {
                streams = new SavedStreams();
                streams.Source = new StandardTokenizer(this.matchVersion, reader);

                streams.Result = new LowerCaseFilter(streams.Source);
                streams.Result = new StandardFilter(streams.Result);
                streams.Result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(this.matchVersion), streams.Result, this.stoptable);
                streams.Result = new BrazilianStemFilterCustom(streams.Result, this.excltable);
                this.PreviousTokenStream = streams;
            }
            else
            {
                streams.Source.Reset(reader);
            }

            return streams.Result;
        }
示例#52
0
        public override TokenStream ReusableTokenStream(string fieldName, System.IO.TextReader reader)
        {
			if (overridesTokenStreamMethod)
			{
				// LUCENE-1678: force fallback to tokenStream() if we
				// have been subclassed and that subclass overrides
				// tokenStream but not reusableTokenStream
				return TokenStream(fieldName, reader);
			}
            SavedStreams streams = PreviousTokenStream as SavedStreams;
            if (streams == null)
            {
                streams = new SavedStreams();
				PreviousTokenStream = streams;
                streams.source = new StreamLemmasFilter(reader, hebMorphLemmatizer,
                    lemmaFilter, alwaysSaveMarkedOriginal);

                // This stop filter is here temporarily, until HebMorph is smart enough to clear stop words
                // all by itself
                streams.result = new StopFilter(enableStopPositionIncrements, streams.source, STOP_WORDS_SET);
            }
            else
            {
                streams.source.Reset(reader);
            }
            return streams.result;
        }
        public override TokenStream ReusableTokenStream(string fieldName, TextReader reader)
        {
            SavedStreams streams = this.PreviousTokenStream as SavedStreams;
            if (streams == null)
            {
                streams = new SavedStreams();
                streams.Source = new StandardTokenizer(matchVersion, reader);
                streams.Result = new StandardFilter(streams.Source);
                streams.Result = new LowerCaseFilter(streams.Result);
                streams.Result = new StopFilter(false, streams.Result, stoptable);
                streams.Result = new BulgarianStemFilter(streams.Result);
                this.PreviousTokenStream = streams;
            }
            else
            {
                streams.Source.Reset(reader);
            }

            return streams.Result;
        }
示例#54
0
 public void SaveStream(string path, Stream inputStream)
 {
     SavedStreams.Add(path);
 }
示例#55
0
 /**
  * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text 
  * in the provided {@link Reader}.
  *
  * @return  A {@link TokenStream} built from an {@link ArabicLetterTokenizer} filtered with
  *            {@link LowerCaseFilter}, {@link StopFilter}, {@link ArabicNormalizationFilter}
  *            and {@link ArabicStemFilter}.
  */
 public override TokenStream ReusableTokenStream(string fieldName, TextReader reader)
 {
     SavedStreams streams = (SavedStreams)GetPreviousTokenStream();
     if (streams == null)
     {
         streams = new SavedStreams();
         streams.Source = new ArabicLetterTokenizer(reader);
         streams.Result = new LowerCaseFilter(streams.Source);
         streams.Result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
                                         streams.Result, stoptable);
         streams.Result = new ArabicNormalizationFilter(streams.Result);
         streams.Result = new ArabicStemFilter(streams.Result);
         SetPreviousTokenStream(streams);
     }
     else
     {
         streams.Source.Reset(reader);
     }
     return streams.Result;
 }