public override TokenStream ReusableTokenStream(string fieldName, TextReader reader) { if (overridesTokenStreamMethod) { return(TokenStream(fieldName, reader)); } var savedStreams = (SavedStreams)PreviousTokenStream; if (savedStreams == null) { savedStreams = new SavedStreams { Source = new StandardTokenizer(MatchVersion, reader) }; savedStreams.Result = new StandardFilter(savedStreams.Source); savedStreams.Result = new LowerCaseFilter(savedStreams.Result); // TODO: Lucene.Net.Analysis.Compound.HyphenationCompoundWordTokenFilter savedStreams.Result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(MatchVersion), savedStreams.Result, StopTable); savedStreams.Result = new DanishStemFilter(savedStreams.Result, ExclusionTable); PreviousTokenStream = savedStreams; } else { savedStreams.Source.Reset(reader); } return(savedStreams.Result); }
/* Returns a (possibly reused) {@link StandardTokenizer} filtered by a * {@link StandardFilter}, a {@link LowerCaseFilter}, * a {@link StopFilter}, and a {@link SnowballFilter} */ public override TokenStream ReusableTokenStream(String fieldName, TextReader reader) { if (overridesTokenStreamMethod) { // LUCENE-1678: force fallback to tokenStream() if we // have been subclassed and that subclass overrides // tokenStream but not reusableTokenStream return(TokenStream(fieldName, reader)); } SavedStreams streams = (SavedStreams)PreviousTokenStream; if (streams == null) { streams = new SavedStreams(); streams.source = new StandardTokenizer(matchVersion, reader); streams.result = new StandardFilter(streams.source); streams.result = new LowerCaseFilter(streams.result); if (stopSet != null) { streams.result = new StopFilter(enableStopIncrements, streams.result, stopSet); } streams.result = new SnowballFilter(streams.result, name); PreviousTokenStream = streams; } else { streams.source.Reset(reader); } return(streams.result); }
public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader) { if (overridesTokenStreamMethod) { // LUCENE-1678: force fallback to tokenStream() if we // have been subclassed and that subclass overrides // tokenStream but not reusableTokenStream return(TokenStream(fieldName, reader)); } SavedStreams streams = (SavedStreams)PreviousTokenStream; if (streams == null) { streams = new SavedStreams(); PreviousTokenStream = streams; streams.tokenStream = new StandardTokenizer(matchVersion, reader); streams.filteredTokenStream = new StandardFilter(streams.tokenStream); streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream); streams.filteredTokenStream = new StopFilter(enableStopPositionIncrements, streams.filteredTokenStream, stopSet); } else { streams.tokenStream.Reset(reader); } streams.tokenStream.MaxTokenLength = maxTokenLength; streams.tokenStream.SetReplaceInvalidAcronym(replaceInvalidAcronym); return(streams.filteredTokenStream); }
public override TokenStream ReusableTokenStream(String fieldName, TextReader reader) { if (overridesTokenStreamMethod) { // LUCENE-1678: force fallback to tokenStream() if we // have been subclassed and that subclass overrides // tokenStream but not reusableTokenStream return TokenStream(fieldName, reader); } SavedStreams streams = (SavedStreams)PreviousTokenStream; if (streams == null) { streams = new SavedStreams(); streams.source = new StandardTokenizer(matchVersion, reader); streams.result = new StandardFilter(streams.source); streams.result = new ThaiWordFilter(streams.result); streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), streams.result, StopAnalyzer.ENGLISH_STOP_WORDS_SET); PreviousTokenStream = streams; } else { streams.source.Reset(reader); streams.result.Reset(); // reset the ThaiWordFilter's state } return streams.result; }
/* * Returns a (possibly reused) {@link TokenStream} which tokenizes all the * text in the provided {@link Reader}. * * @return A {@link TokenStream} built from a {@link StandardTokenizer} * filtered with {@link StandardFilter}, {@link StopFilter}, * and {@link DutchStemFilter} */ public override TokenStream ReusableTokenStream(String fieldName, TextReader reader) { if (overridesTokenStreamMethod) { // LUCENE-1678: force fallback to tokenStream() if we // have been subclassed and that subclass overrides // tokenStream but not reusableTokenStream return(TokenStream(fieldName, reader)); } SavedStreams streams = (SavedStreams)PreviousTokenStream; if (streams == null) { streams = new SavedStreams(); streams.source = new StandardTokenizer(matchVersion, reader); streams.result = new StandardFilter(streams.source); streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), streams.result, stoptable); streams.result = new DutchStemFilter(streams.result, excltable, stemdict); PreviousTokenStream = streams; } else { streams.source.Reset(reader); } return(streams.result); }
/* Returns a (possibly reused) {@link StandardTokenizer} filtered by a * {@link StandardFilter}, a {@link LowerCaseFilter}, * a {@link StopFilter}, and a {@link SnowballFilter} */ public override TokenStream ReusableTokenStream(String fieldName, TextReader reader) { if (overridesTokenStreamMethod) { // LUCENE-1678: force fallback to tokenStream() if we // have been subclassed and that subclass overrides // tokenStream but not reusableTokenStream return(TokenStream(fieldName, reader)); } SavedStreams streams = (SavedStreams)PreviousTokenStream; if (streams == null) { streams = new SavedStreams(); streams.source = new StandardTokenizer(matchVersion, reader); streams.result = new StandardFilter(streams.source); streams.result = new LowerCaseFilter(streams.result); if (stopSet != null) { streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), streams.result, stopSet); } streams.result = new BestBetsWordFormsFilter(streams.result); //This will remove duplicate keywords - bad for best bets/term count matching streams.result = new RemoveDuplicatesTokenFilter(streams.result); PreviousTokenStream = streams; } else { streams.source.Reset(reader); } return(streams.result); }
private static async Task ReadAsyncStream(IAsyncEnumerable <bool> stream, int streamIndex) { try { await foreach (var input in stream) { var value = Convert.ToInt32(input); //Console.WriteLine($"We got {value} for {streamIndex}"); var isStreamSaved = SavedStreams.TryGetValue(streamIndex, out _); if (isStreamSaved) { SavedStreams.AddOrUpdate(streamIndex, -1, (index, oldValue) => oldValue * (1 - C) + value); SavedStreams.TryGetValue(streamIndex, out double updatedSum); if (updatedSum < 0.5) { SavedStreams.TryRemove(streamIndex, out _); } } else if (value == 1) { SavedStreams.AddOrUpdate(streamIndex, 1, (index, oldValue) => 1); } } } catch (Exception ex) { Console.WriteLine($"{streamIndex}, {ex.Message}"); } }
/* * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text * in the provided {@link Reader}. * * @return A {@link TokenStream} built from a {@link ArabicLetterTokenizer} * filtered with {@link LowerCaseFilter}, * {@link ArabicNormalizationFilter}, * {@link PersianNormalizationFilter} and Persian Stop words */ public override TokenStream ReusableTokenStream(String fieldName, TextReader reader) { SavedStreams streams = (SavedStreams)PreviousTokenStream; if (streams == null) { streams = new SavedStreams(); streams.source = new ArabicLetterTokenizer(reader); streams.result = new LowerCaseFilter(streams.source); streams.result = new ArabicNormalizationFilter(streams.result); /* additional persian-specific normalization */ streams.result = new PersianNormalizationFilter(streams.result); /* * the order here is important: the stopword list is normalized with the * above! */ streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), streams.result, stoptable); PreviousTokenStream = streams; } else { streams.source.Reset(reader); } return(streams.result); }
public override TokenStream ReusableTokenStream(string fieldName, System.IO.TextReader reader) { SavedStreams streams = GetPreviousTokenStream() as SavedStreams; if (streams == null) { streams = new SavedStreams(); streams.source = new HebrewTokenizer(reader, PrefixTree); // Niqqud normalization streams.result = new NiqqudFilter(streams.source); // TODO: should we ignoreCase in StopFilter? streams.result = new StopFilter(enableStopPositionIncrements, streams.result, STOP_WORDS_SET); // TODO: Apply LowerCaseFilter to NonHebrew tokens only streams.result = new LowerCaseFilter(streams.result); if (suffixByTokenType != null && suffixByTokenType.Count > 0) streams.result = new AddSuffixFilter(streams.result, suffixByTokenType); SetPreviousTokenStream(streams); } else { streams.source.Reset(reader); } return streams.result; }
public override TokenStream ReusableTokenStream(string fieldName, System.IO.TextReader reader) { if (overridesTokenStreamMethod) { // LUCENE-1678: force fallback to tokenStream() if we // have been subclassed and that subclass overrides // tokenStream but not reusableTokenStream return(TokenStream(fieldName, reader)); } SavedStreams streams = PreviousTokenStream as SavedStreams; if (streams == null) { streams = new SavedStreams(); PreviousTokenStream = streams; streams.source = new StreamLemmasFilter(reader, hebMorphLemmatizer, lemmaFilter, alwaysSaveMarkedOriginal); // This stop filter is here temporarily, until HebMorph is smart enough to clear stop words // all by itself streams.result = new StopFilter(enableStopPositionIncrements, streams.source, STOP_WORDS_SET); } else { streams.source.Reset(reader); } return(streams.result); }
public override TokenStream ReusableTokenStream(String fieldName, TextReader reader) { if (overridesTokenStreamMethod) { // LUCENE-1678: force fallback to tokenStream() if we // have been subclassed and that subclass overrides // tokenStream but not reusableTokenStream return(TokenStream(fieldName, reader)); } SavedStreams streams = (SavedStreams)PreviousTokenStream; if (streams == null) { streams = new SavedStreams(); streams.source = new StandardTokenizer(matchVersion, reader); streams.result = new StandardFilter(streams.source); streams.result = new ThaiWordFilter(streams.result); streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), streams.result, StopAnalyzer.ENGLISH_STOP_WORDS_SET); PreviousTokenStream = streams; } else { streams.source.Reset(reader); streams.result.Reset(); // reset the ThaiWordFilter's state } return(streams.result); }
public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader) { SavedStreams streams = (SavedStreams) GetPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(this); streams.source = new LowerCaseTokenizer(reader); streams.result = new StopFilter(streams.source, stopWords); SetPreviousTokenStream(streams); } else streams.source.Reset(reader); return streams.result; }
public override TokenStream ReusableTokenStream(string fieldName, TextReader reader) { var streams = (SavedStreams)PreviousTokenStream; if (streams == null) { streams = new SavedStreams(); streams.Tokenizer = new StandardTokenizer(LuceneVersion.LUCENE_29, reader); streams.Filter = new HunspellStemFilter(new LowerCaseFilter(streams.Tokenizer), _dictionary); PreviousTokenStream = streams; } else { streams.Tokenizer.Reset(reader); streams.Filter.Reset(); } return streams.Filter; }
public override TokenStream ReusableTokenStream(string fieldName, TextReader reader) { var streams = (SavedStreams)PreviousTokenStream; if (streams == null) { streams = new SavedStreams(); streams.source = new KeywordTokenizer(reader); streams.result = new CollationKeyFilter(streams.source, cultureInfo); PreviousTokenStream = streams; } else { streams.source.Reset(reader); } return streams.result; }
public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader) { SavedStreams streams = (SavedStreams)GetPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(this); streams.source = new LowerCaseTokenizer(reader); streams.result = new StopFilter(streams.source, stopWords); SetPreviousTokenStream(streams); } else { streams.source.Reset(reader); } return(streams.result); }
public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader) { SavedStreams streams = (SavedStreams)GetPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(); SetPreviousTokenStream(streams); streams.tokenStream = new WhiteSpaceTokenizer(reader); streams.filteredTokenStream = new LowerCaseFilter(streams.tokenStream); } else { streams.tokenStream.Reset(reader); } return(streams.filteredTokenStream); }
/// <summary> /// Returns a (possibly reused) <see cref="TokenStream"/> which tokenizes all the text in the /// provided <see cref="TextReader"/>. /// </summary> /// <returns> /// A <see cref="TokenStream"/> built from a <see cref="ChineseTokenizer"/> /// filtered with <see cref="ChineseFilter"/>. /// </returns> public override TokenStream ReusableTokenStream(String fieldName, TextReader reader) { /* tokenStream() is final, no back compat issue */ SavedStreams streams = (SavedStreams) PreviousTokenStream; if (streams == null) { streams = new SavedStreams(); streams.source = new ChineseTokenizer(reader); streams.result = new ChineseFilter(streams.source); PreviousTokenStream = streams; } else { streams.source.Reset(reader); } return streams.result; }
public override TokenStream ReusableTokenStream(string fieldName, TextReader reader) { var streams = (SavedStreams)PreviousTokenStream; if (streams == null) { streams = new SavedStreams(); streams.Source = new KeywordTokenizer(reader); streams.Result = new CollationKeyFilter(streams.Source, _cultureInfo); PreviousTokenStream = streams; } else { streams.Source.Reset(reader); } return(streams.Result); }
/// <summary> /// Returns a (possibly reused) <see cref="TokenStream"/> which tokenizes all the text in the /// provided <see cref="TextReader"/>. /// </summary> /// <returns> /// A <see cref="TokenStream"/> built from a <see cref="ChineseTokenizer"/> /// filtered with <see cref="ChineseFilter"/>. /// </returns> public override TokenStream ReusableTokenStream(String fieldName, TextReader reader) { /* tokenStream() is final, no back compat issue */ SavedStreams streams = (SavedStreams)PreviousTokenStream; if (streams == null) { streams = new SavedStreams(); streams.source = new ChineseTokenizer(reader); streams.result = new ChineseFilter(streams.source); PreviousTokenStream = streams; } else { streams.source.Reset(reader); } return(streams.result); }
public override TokenStream ReusableTokenStream([NotNull] string fieldName, [NotNull] TextReader reader) { SavedStreams previousTokenStream = (SavedStreams)this.PreviousTokenStream; if (previousTokenStream == null) { previousTokenStream = new SavedStreams(); this.PreviousTokenStream = previousTokenStream; previousTokenStream.TokenStream = new WhitespaceTokenizer(reader); previousTokenStream.FilteredTokenStream = new StandardFilter(previousTokenStream.TokenStream); previousTokenStream.FilteredTokenStream = new LowerCaseFilter(previousTokenStream.FilteredTokenStream); } else { previousTokenStream.TokenStream.Reset(reader); } return previousTokenStream.FilteredTokenStream; }
public override TokenStream ReusableTokenStream(String fieldName, TextReader reader) { var streams = (SavedStreams)PreviousTokenStream; if (streams == null) { streams = new SavedStreams(); PreviousTokenStream = streams; streams.tokenStream = new LetterOrDigitTokenizer(reader); streams.filteredTokenStream = new LowerCaseFilter(streams.tokenStream); streams.filteredTokenStream = new ASCIIFoldingFilter(streams.filteredTokenStream); } else { streams.tokenStream.Reset(reader); } return(streams.filteredTokenStream); }
public override TokenStream ReusableTokenStream(string fieldName, TextReader reader) { var streams = (SavedStreams)PreviousTokenStream; if (streams == null) { streams = new SavedStreams(); streams.Tokenizer = new StandardTokenizer(LuceneVersion.LUCENE_29, reader); streams.Filter = new HunspellStemFilter(new LowerCaseFilter(streams.Tokenizer), _dictionary); PreviousTokenStream = streams; } else { streams.Tokenizer.Reset(reader); streams.Filter.Reset(); } return(streams.Filter); }
/* * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text * in the provided {@link Reader}. * * @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with * {@link GreekLowerCaseFilter} and {@link StopFilter} */ public override TokenStream ReusableTokenStream(String fieldName, TextReader reader) { SavedStreams streams = (SavedStreams)PreviousTokenStream; if (streams == null) { streams = new SavedStreams(); streams.source = new StandardTokenizer(matchVersion, reader); streams.result = new GreekLowerCaseFilter(streams.source); streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), streams.result, stopSet); PreviousTokenStream = streams; } else { streams.source.Reset(reader); } return(streams.result); }
public override TokenStream ReusableTokenStream([NotNull] string fieldName, [NotNull] TextReader reader) { SavedStreams previousTokenStream = (SavedStreams)this.PreviousTokenStream; if (previousTokenStream == null) { previousTokenStream = new SavedStreams(); this.PreviousTokenStream = previousTokenStream; previousTokenStream.TokenStream = new WhitespaceTokenizer(reader); previousTokenStream.FilteredTokenStream = new StandardFilter(previousTokenStream.TokenStream); previousTokenStream.FilteredTokenStream = new LowerCaseFilter(previousTokenStream.FilteredTokenStream); } else { previousTokenStream.TokenStream.Reset(reader); } return(previousTokenStream.FilteredTokenStream); }
/* * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text * in the provided {@link Reader}. * * @param fieldName lucene field name * @param reader Input {@link Reader} * @return A {@link TokenStream} built from {@link CJKTokenizer}, filtered with * {@link StopFilter} */ public override sealed TokenStream ReusableTokenStream(String fieldName, TextReader reader) { /* tokenStream() is final, no back compat issue */ SavedStreams streams = (SavedStreams)PreviousTokenStream; if (streams == null) { streams = new SavedStreams(); streams.source = new CJKTokenizer(reader); streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), streams.source, stopTable); PreviousTokenStream = streams; } else { streams.source.Reset(reader); } return(streams.result); }
public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader) { var streams = (SavedStreams)PreviousTokenStream; if (streams == null) { streams = new SavedStreams(this) { source = new LowerCaseTokenizer(reader) }; streams.result = new StopFilter(enablePositionIncrements, streams.source, stopWords); PreviousTokenStream = streams; } else { streams.source.Reset(reader); } return(streams.result); }
static async Task Main(string[] args) { try { if (!ThreadPool.SetMaxThreads(300, 300)) { Console.WriteLine("Not able to set max thread count"); return; } Task.Run(async() => { while (true) { await Task.Delay(1000); var mostPopularStream = SavedStreams.OrderByDescending(kvp => kvp.Value).FirstOrDefault(); if (mostPopularStream.Value == 0) { Console.WriteLine("There is no popular streams right now"); } else { Console.WriteLine($"Stream {mostPopularStream.Key} is most popular with sum {mostPopularStream.Value}"); } } }); var readStreamTasks = new List <Task>(); for (int i = 0; i < Streams.Count(); i++) { int temp = i; readStreamTasks.Add(Task.Run(async() => { await ReadAsyncStream(Streams[temp], temp); })); } await Task.WhenAll(readStreamTasks); } catch (Exception ex) { Console.WriteLine(ex.Message); } }
/** * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text * in the provided {@link Reader}. * * @return A {@link TokenStream} built from an {@link ArabicLetterTokenizer} filtered with * {@link LowerCaseFilter}, {@link StopFilter}, {@link ArabicNormalizationFilter} * and {@link ArabicStemFilter}. */ public override TokenStream ReusableTokenStream(string fieldName, TextReader reader) { SavedStreams streams = (SavedStreams)GetPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(); streams.Source = new ArabicLetterTokenizer(reader); streams.Result = new LowerCaseFilter(streams.Source); streams.Result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), streams.Result, stoptable); streams.Result = new ArabicNormalizationFilter(streams.Result); streams.Result = new ArabicStemFilter(streams.Result); SetPreviousTokenStream(streams); } else { streams.Source.Reset(reader); } return(streams.Result); }
public override TokenStream ReusableTokenStream(string fieldName, TextReader reader) { SavedStreams savedStreams = (SavedStreams)PreviousTokenStream; if (savedStreams != null) { savedStreams.tokenStream.Reset(reader); } else { savedStreams = new SavedStreams(); PreviousTokenStream = savedStreams; savedStreams.tokenStream = new StandardTokenizer(matchVersion, reader); savedStreams.filteredTokenStream = new StandardFilter(savedStreams.tokenStream); savedStreams.filteredTokenStream = new LowerCaseFilter(savedStreams.filteredTokenStream); savedStreams.filteredTokenStream = new StopFilter(enableStopPositionIncrements, savedStreams.filteredTokenStream, stopSet); } savedStreams.tokenStream.MaxTokenLength = MaxTokenLength; savedStreams.tokenStream.SetReplaceInvalidAcronym(replaceInvalidAcronym); return(savedStreams.filteredTokenStream); }
public override TokenStream ReusableTokenStream(string fieldName, TextReader reader) { var savedStreams = (SavedStreams)PreviousTokenStream; if (savedStreams == null) { savedStreams = new SavedStreams(); this.PreviousTokenStream = savedStreams; savedStreams.TokenStream = new StandardTokenizer(_matchVersion, reader); savedStreams.FilteredTokenStream = new StandardFilter(savedStreams.TokenStream); savedStreams.FilteredTokenStream = new LowerCaseFilter(savedStreams.FilteredTokenStream); savedStreams.FilteredTokenStream = new ASCIIFoldingFilter(savedStreams.FilteredTokenStream); savedStreams.FilteredTokenStream = new StopFilter(_enableStopPositionIncrements, savedStreams.FilteredTokenStream, _stopWords); } else { savedStreams.TokenStream.Reset(reader); } savedStreams.TokenStream.MaxTokenLength = MaxTokenLength; return(savedStreams.FilteredTokenStream); }
public override TokenStream ReusableTokenStream(string fieldName, System.IO.TextReader reader) { SavedStreams streams = GetPreviousTokenStream() as SavedStreams; if (streams == null) { streams = new SavedStreams(); streams.source = new StreamLemmasFilter(reader, hebMorphLemmatizer, lemmaFilter, alwaysSaveMarkedOriginal); // This stop filter is here temporarily, until HebMorph is smart enough to clear stop words // all by itself streams.result = new StopFilter(enableStopPositionIncrements, streams.source, STOP_WORDS_SET); SetPreviousTokenStream(streams); } else { streams.source.Reset(reader); } return streams.result; }
public override TokenStream ReusableTokenStream(string fieldName, TextReader reader) { SavedStreams streams = this.PreviousTokenStream as SavedStreams; if (streams == null) { streams = new SavedStreams(); streams.Source = new StandardTokenizer(matchVersion, reader); streams.Result = new StandardFilter(streams.Source); streams.Result = new LowerCaseFilter(streams.Result); streams.Result = new StopFilter(false, streams.Result, stoptable); streams.Result = new BulgarianStemFilter(streams.Result); this.PreviousTokenStream = streams; } else { streams.Source.Reset(reader); } return(streams.Result); }
public override TokenStream ReusableTokenStream(string fieldName, System.IO.TextReader reader) { if (overridesTokenStreamMethod) { // LUCENE-1678: force fallback to tokenStream() if we // have been subclassed and that subclass overrides // tokenStream but not reusableTokenStream return(TokenStream(fieldName, reader)); } SavedStreams streams = PreviousTokenStream as SavedStreams; if (streams == null) { streams = new SavedStreams(); streams.source = new HebrewTokenizer(reader, PrefixTree); // Niqqud normalization streams.result = new NiqqudFilter(streams.source); // TODO: should we ignoreCase in StopFilter? streams.result = new StopFilter(enableStopPositionIncrements, streams.result, STOP_WORDS_SET); // TODO: Apply LowerCaseFilter to NonHebrew tokens only streams.result = new LowerCaseFilter(streams.result); if (suffixByTokenType != null && suffixByTokenType.Count > 0) { streams.result = new AddSuffixFilter(streams.result, suffixByTokenType); } PreviousTokenStream = streams; } else { streams.source.Reset(reader); } return(streams.result); }
/* * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text * in the provided {@link Reader}. * * @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with * {@link LowerCaseFilter}, {@link StandardFilter}, {@link StopFilter}, and * {@link BrazilianStemFilter}. */ public override TokenStream ReusableTokenStream(string fieldName, TextReader reader) { var streams = (SavedStreams)this.PreviousTokenStream; if (streams == null) { streams = new SavedStreams(); streams.Source = new StandardTokenizer(this.matchVersion, reader); streams.Result = new LowerCaseFilter(streams.Source); streams.Result = new StandardFilter(streams.Result); streams.Result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(this.matchVersion), streams.Result, this.stoptable); streams.Result = new BrazilianStemFilterCustom(streams.Result, this.excltable); this.PreviousTokenStream = streams; } else { streams.Source.Reset(reader); } return(streams.Result); }
/* * Returns a (possibly reused) {@link TokenStream} which tokenizes all the * text in the provided {@link Reader}. * * @return A {@link TokenStream} built from a {@link StandardTokenizer} * filtered with {@link StandardFilter}, {@link StopFilter}, * {@link FrenchStemFilter} and {@link LowerCaseFilter} */ public override TokenStream ReusableTokenStream(String fieldName, TextReader reader) { SavedStreams streams = (SavedStreams)PreviousTokenStream; if (streams == null) { streams = new SavedStreams(); streams.source = new StandardTokenizer(matchVersion, reader); streams.result = new StandardFilter(streams.source); streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), streams.result, stoptable); streams.result = new FrenchStemFilter(streams.result, excltable); // Convert to lowercase after stemming! streams.result = new LowerCaseFilter(streams.result); PreviousTokenStream = streams; } else { streams.source.Reset(reader); } return(streams.result); }
public override TokenStream ReusableTokenStream(String fieldName, TextReader reader) { if (overridesTokenStreamMethod) { // LUCENE-1678: force fallback to tokenStream() if we // have been subclassed and that subclass overrides // tokenStream but not reusableTokenStream return(TokenStream(fieldName, reader)); } SavedStreams streams = (SavedStreams)PreviousTokenStream; if (streams == null) { streams = new SavedStreams(); streams.wrapped = defaultAnalyzer.ReusableTokenStream(fieldName, reader); streams.shingle = new ShingleFilter(streams.wrapped); PreviousTokenStream = streams; } else { TokenStream result = defaultAnalyzer.ReusableTokenStream(fieldName, reader); if (result == streams.wrapped) { /* the wrapped analyzer reused the stream */ streams.shingle.Reset(); } else { /* the wrapped analyzer did not, create a new shingle around the new one */ streams.wrapped = result; streams.shingle = new ShingleFilter(streams.wrapped); } } streams.shingle.SetMaxShingleSize(maxShingleSize); streams.shingle.SetOutputUnigrams(outputUnigrams); return(streams.shingle); }
public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader) { SavedStreams streams = (SavedStreams)GetPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(); SetPreviousTokenStream(streams); streams.tokenStream = new StandardTokenizer(reader); streams.filteredTokenStream = new StandardFilter(streams.tokenStream); streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream); streams.filteredTokenStream = new StopFilter(streams.filteredTokenStream, stopSet); } else { streams.tokenStream.Reset(reader); } streams.tokenStream.SetMaxTokenLength(maxTokenLength); streams.tokenStream.SetReplaceInvalidAcronym(replaceInvalidAcronym); return(streams.filteredTokenStream); }
public override TokenStream ReusableTokenStream(string fieldName, System.IO.TextReader reader) { if (overridesTokenStreamMethod) { // LUCENE-1678: force fallback to tokenStream() if we // have been subclassed and that subclass overrides // tokenStream but not reusableTokenStream return TokenStream(fieldName, reader); } SavedStreams streams = PreviousTokenStream as SavedStreams; if (streams == null) { streams = new SavedStreams(); streams.source = new HebrewTokenizer(reader, PrefixTree); // Niqqud normalization streams.result = new NiqqudFilter(streams.source); // TODO: should we ignoreCase in StopFilter? streams.result = new StopFilter(enableStopPositionIncrements, streams.result, STOP_WORDS_SET); // TODO: Apply LowerCaseFilter to NonHebrew tokens only streams.result = new LowerCaseFilter(streams.result); if (suffixByTokenType != null && suffixByTokenType.Count > 0) streams.result = new AddSuffixFilter(streams.result, suffixByTokenType); PreviousTokenStream = streams; } else { streams.source.Reset(reader); } return streams.result; }
public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader) { if (overridesTokenStreamMethod) { // LUCENE-1678: force fallback to tokenStream() if we // have been subclassed and that subclass overrides // tokenStream but not reusableTokenStream return TokenStream(fieldName, reader); } SavedStreams streams = (SavedStreams) GetPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(); SetPreviousTokenStream(streams); streams.tokenStream = new StandardTokenizer(reader); streams.filteredTokenStream = new StandardFilter(streams.tokenStream); streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream); if (useDefaultStopPositionIncrements) { streams.filteredTokenStream = new StopFilter(streams.filteredTokenStream, stopSet); } else { streams.filteredTokenStream = new StopFilter(enableStopPositionIncrements, streams.filteredTokenStream, stopSet); } } else { streams.tokenStream.Reset(reader); } streams.tokenStream.SetMaxTokenLength(maxTokenLength); streams.tokenStream.SetReplaceInvalidAcronym(replaceInvalidAcronym); return streams.filteredTokenStream; }
public override TokenStream ReusableTokenStream(String fieldName, TextReader reader) { if (overridesTokenStreamMethod) { // LUCENE-1678: force fallback to tokenStream() if we // have been subclassed and that subclass overrides // tokenStream but not reusableTokenStream return TokenStream(fieldName, reader); } /* map of SavedStreams for each field */ IDictionary<String, SavedStreams> streamMap = (IDictionary<String, SavedStreams>)PreviousTokenStream; if (streamMap == null) { streamMap = new HashMap<String, SavedStreams>(); PreviousTokenStream = streamMap; } SavedStreams streams = streamMap[fieldName]; if (streams == null) { /* an entry for this field does not exist, create one */ streams = new SavedStreams(); streamMap.Add(fieldName, streams); streams.Wrapped = _delegate.ReusableTokenStream(fieldName, reader); /* if there are any stopwords for the field, save the stopfilter */ var stopWords = stopWordsPerField[fieldName]; if (stopWords != null) streams.WithStopFilter = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), streams.Wrapped, stopWords); else streams.WithStopFilter = streams.Wrapped; } else { /* * an entry for this field exists, verify the wrapped stream has not * changed. if it has not, reuse it, otherwise wrap the new stream. */ TokenStream result = _delegate.ReusableTokenStream(fieldName, reader); if (result == streams.Wrapped) { /* the wrapped analyzer reused the stream */ streams.WithStopFilter.Reset(); } else { /* * the wrapped analyzer did not. if there are any stopwords for the * field, create a new StopFilter around the new stream */ streams.Wrapped = result; var stopWords = stopWordsPerField[fieldName]; if (stopWords != null) streams.WithStopFilter = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), streams.Wrapped, stopWords); else streams.WithStopFilter = streams.Wrapped; } } return streams.WithStopFilter; }
public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader) { SavedStreams streams = (SavedStreams) GetPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(); SetPreviousTokenStream(streams); streams.tokenStream = new StandardTokenizer(reader); streams.filteredTokenStream = new StandardFilter(streams.tokenStream); streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream); streams.filteredTokenStream = new StopFilter(streams.filteredTokenStream, stopSet); } else { streams.tokenStream.Reset(reader); } streams.tokenStream.SetMaxTokenLength(maxTokenLength); streams.tokenStream.SetReplaceInvalidAcronym(replaceInvalidAcronym); return streams.filteredTokenStream; }
public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader) { var streams = (SavedStreams) PreviousTokenStream; if (streams == null) { streams = new SavedStreams(this) {source = new LowerCaseTokenizer(reader)}; streams.result = new StopFilter(enablePositionIncrements, streams.source, stopWords); PreviousTokenStream = streams; } else streams.source.Reset(reader); return streams.result; }
/* * Returns a (possibly reused) {@link TokenStream} which tokenizes all the * text in the provided {@link Reader}. * * @return A {@link TokenStream} built from a {@link StandardTokenizer} * filtered with {@link StandardFilter}, {@link StopFilter}, * {@link FrenchStemFilter} and {@link LowerCaseFilter} */ public override TokenStream ReusableTokenStream(String fieldName, TextReader reader) { SavedStreams streams = (SavedStreams)PreviousTokenStream; if (streams == null) { streams = new SavedStreams(); streams.source = new StandardTokenizer(matchVersion, reader); streams.result = new StandardFilter(streams.source); streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), streams.result, stoptable); streams.result = new FrenchStemFilter(streams.result, excltable); // Convert to lowercase after stemming! streams.result = new LowerCaseFilter(streams.result); PreviousTokenStream = streams; } else { streams.source.Reset(reader); } return streams.result; }
/* * Returns a (possibly reused) {@link TokenStream} which tokenizes all the * text in the provided {@link Reader}. * * @return A {@link TokenStream} built from a {@link StandardTokenizer} * filtered with {@link StandardFilter}, {@link StopFilter}, * and {@link DutchStemFilter} */ public override TokenStream ReusableTokenStream(String fieldName, TextReader reader) { if (overridesTokenStreamMethod) { // LUCENE-1678: force fallback to tokenStream() if we // have been subclassed and that subclass overrides // tokenStream but not reusableTokenStream return TokenStream(fieldName, reader); } SavedStreams streams = (SavedStreams)PreviousTokenStream; if (streams == null) { streams = new SavedStreams(); streams.source = new StandardTokenizer(matchVersion, reader); streams.result = new StandardFilter(streams.source); streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), streams.result, stoptable); streams.result = new DutchStemFilter(streams.result, excltable, stemdict); PreviousTokenStream = streams; } else { streams.source.Reset(reader); } return streams.result; }
public override TokenStream ReusableTokenStream(String fieldName, TextReader reader) { if (overridesTokenStreamMethod) { // LUCENE-1678: force fallback to tokenStream() if we // have been subclassed and that subclass overrides // tokenStream but not reusableTokenStream return TokenStream(fieldName, reader); } SavedStreams streams = (SavedStreams)PreviousTokenStream; if (streams == null) { streams = new SavedStreams(); streams.wrapped = defaultAnalyzer.ReusableTokenStream(fieldName, reader); streams.shingle = new ShingleFilter(streams.wrapped); PreviousTokenStream = streams; } else { TokenStream result = defaultAnalyzer.ReusableTokenStream(fieldName, reader); if (result == streams.wrapped) { /* the wrapped analyzer reused the stream */ streams.shingle.Reset(); } else { /* the wrapped analyzer did not, create a new shingle around the new one */ streams.wrapped = result; streams.shingle = new ShingleFilter(streams.wrapped); } } streams.shingle.SetMaxShingleSize(maxShingleSize); streams.shingle.SetOutputUnigrams(outputUnigrams); return streams.shingle; }
/* * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text * in the provided {@link Reader}. * * @return A {@link TokenStream} built from a * {@link RussianLetterTokenizer} filtered with * {@link RussianLowerCaseFilter}, {@link StopFilter}, * and {@link RussianStemFilter} */ public override TokenStream ReusableTokenStream(String fieldName, TextReader reader) { SavedStreams streams = (SavedStreams)PreviousTokenStream; if (streams == null) { streams = new SavedStreams(); streams.source = new RussianLetterTokenizer(reader); streams.result = new LowerCaseFilter(streams.source); streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), streams.result, stopSet); streams.result = new RussianStemFilter(streams.result); PreviousTokenStream = streams; } else { streams.source.Reset(reader); } return streams.result; }
/** * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text * in the provided {@link Reader}. * * @return A {@link TokenStream} built from a {@link ArabicLetterTokenizer} * filtered with {@link LowerCaseFilter}, * {@link ArabicNormalizationFilter}, * {@link PersianNormalizationFilter} and Persian Stop words */ public override TokenStream ReusableTokenStream(String fieldName, TextReader reader) { SavedStreams streams = (SavedStreams)PreviousTokenStream; if (streams == null) { streams = new SavedStreams(); streams.source = new ArabicLetterTokenizer(reader); streams.result = new LowerCaseFilter(streams.source); streams.result = new ArabicNormalizationFilter(streams.result); /* additional persian-specific normalization */ streams.result = new PersianNormalizationFilter(streams.result); /* * the order here is important: the stopword list is normalized with the * above! */ streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), streams.result, stoptable); PreviousTokenStream = streams; } else { streams.source.Reset(reader); } return streams.result; }
/** * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text * in the provided {@link Reader}. * * @param fieldName lucene field name * @param reader Input {@link Reader} * @return A {@link TokenStream} built from {@link CJKTokenizer}, filtered with * {@link StopFilter} */ public override sealed TokenStream ReusableTokenStream(String fieldName, TextReader reader) { /* tokenStream() is final, no back compat issue */ SavedStreams streams = (SavedStreams) PreviousTokenStream; if (streams == null) { streams = new SavedStreams(); streams.source = new CJKTokenizer(reader); streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), streams.source, stopTable); PreviousTokenStream = streams; } else { streams.source.Reset(reader); } return streams.result; }
/* Returns a (possibly reused) {@link StandardTokenizer} filtered by a * {@link StandardFilter}, a {@link LowerCaseFilter}, * a {@link StopFilter}, and a {@link SnowballFilter} */ public override TokenStream ReusableTokenStream(String fieldName, TextReader reader) { if (overridesTokenStreamMethod) { // LUCENE-1678: force fallback to tokenStream() if we // have been subclassed and that subclass overrides // tokenStream but not reusableTokenStream return TokenStream(fieldName, reader); } SavedStreams streams = (SavedStreams)this.GetPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(); streams.source = new StandardTokenizer(matchVersion, reader); streams.result = new Portal.LuceneInterface.WordDelimiterFilter(streams.source, 1, 1, 1, 1, 1); streams.result = new StandardFilter(streams.result); streams.result = new LowerCaseFilter(streams.result); if (stopSet != null) streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), streams.result, stopSet); streams.result = new SnowballFilter(streams.result, "English"); this.SetPreviousTokenStream(streams); } else { streams.source.Reset(reader); } return streams.result; }
public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader) { SavedStreams streams = (SavedStreams)PreviousTokenStream; if (streams == null) { streams = new SavedStreams(this); streams.source = new CharJoinAbbreviationsLowerCaseTokenizer(reader); //streams.result = new SynonymFilter(streams.source, engine); streams.result = new CharJoinAbbreviationsLowerCaseFilter(streams.source); //streams.result = streams.source; //if we dont have a filter above PreviousTokenStream = streams; } else streams.source.Reset(reader); return streams.result; }
/* * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text * in the provided {@link Reader}. * * @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with * {@link LowerCaseFilter}, {@link StandardFilter}, {@link StopFilter}, and * {@link BrazilianStemFilter}. */ public override TokenStream ReusableTokenStream(string fieldName, TextReader reader) { var streams = (SavedStreams)this.PreviousTokenStream; if (streams == null) { streams = new SavedStreams(); streams.Source = new StandardTokenizer(this.matchVersion, reader); streams.Result = new LowerCaseFilter(streams.Source); streams.Result = new StandardFilter(streams.Result); streams.Result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(this.matchVersion), streams.Result, this.stoptable); streams.Result = new BrazilianStemFilterCustom(streams.Result, this.excltable); this.PreviousTokenStream = streams; } else { streams.Source.Reset(reader); } return streams.Result; }
public override TokenStream ReusableTokenStream(string fieldName, System.IO.TextReader reader) { if (overridesTokenStreamMethod) { // LUCENE-1678: force fallback to tokenStream() if we // have been subclassed and that subclass overrides // tokenStream but not reusableTokenStream return TokenStream(fieldName, reader); } SavedStreams streams = PreviousTokenStream as SavedStreams; if (streams == null) { streams = new SavedStreams(); PreviousTokenStream = streams; streams.source = new StreamLemmasFilter(reader, hebMorphLemmatizer, lemmaFilter, alwaysSaveMarkedOriginal); // This stop filter is here temporarily, until HebMorph is smart enough to clear stop words // all by itself streams.result = new StopFilter(enableStopPositionIncrements, streams.source, STOP_WORDS_SET); } else { streams.source.Reset(reader); } return streams.result; }
public override TokenStream ReusableTokenStream(string fieldName, TextReader reader) { SavedStreams streams = this.PreviousTokenStream as SavedStreams; if (streams == null) { streams = new SavedStreams(); streams.Source = new StandardTokenizer(matchVersion, reader); streams.Result = new StandardFilter(streams.Source); streams.Result = new LowerCaseFilter(streams.Result); streams.Result = new StopFilter(false, streams.Result, stoptable); streams.Result = new BulgarianStemFilter(streams.Result); this.PreviousTokenStream = streams; } else { streams.Source.Reset(reader); } return streams.Result; }
public void SaveStream(string path, Stream inputStream) { SavedStreams.Add(path); }
/** * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text * in the provided {@link Reader}. * * @return A {@link TokenStream} built from an {@link ArabicLetterTokenizer} filtered with * {@link LowerCaseFilter}, {@link StopFilter}, {@link ArabicNormalizationFilter} * and {@link ArabicStemFilter}. */ public override TokenStream ReusableTokenStream(string fieldName, TextReader reader) { SavedStreams streams = (SavedStreams)GetPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(); streams.Source = new ArabicLetterTokenizer(reader); streams.Result = new LowerCaseFilter(streams.Source); streams.Result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), streams.Result, stoptable); streams.Result = new ArabicNormalizationFilter(streams.Result); streams.Result = new ArabicStemFilter(streams.Result); SetPreviousTokenStream(streams); } else { streams.Source.Reset(reader); } return streams.Result; }