protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { var tokenizer = new AdrivaTokenizer(this.LuceneVersion, reader); var components = new TokenStreamComponents(tokenizer); return(components); }
protected override TokenStreamComponents WrapComponents(string fieldName, TokenStreamComponents components) { if (!stopWordsPerField.TryGetValue(fieldName, out ISet <string> stopWords) || stopWords is null) { return(components); } var stopFilter = new StopFilter(matchVersion, components.TokenStream, new CharArraySet(matchVersion, stopWords, false)); return(new TokenStreamComponents(components.Tokenizer, stopFilter)); }
public void Main() { const string s = "関西国際空港"; Console.WriteLine($"対象の文字列:{s}"); using var reader = new StringReader(s); Tokenizer tokenizer = new JapaneseTokenizer(reader, ReadDict(), false, JapaneseTokenizerMode.NORMAL); var tokenStreamComponents = new TokenStreamComponents(tokenizer, tokenizer); using var tokenStream = tokenStreamComponents.TokenStream; // note:処理の実行前にResetを実行する必要がある tokenStream.Reset(); while (tokenStream.IncrementToken()) { Console.WriteLine("---"); Console.WriteLine( $"ICharTermAttribute=>{tokenStream.GetAttribute<ICharTermAttribute>().ToString()}"); Console.WriteLine( $"ITermToBytesRefAttribute#BytesRef=>{tokenStream.GetAttribute<ITermToBytesRefAttribute>().BytesRef}"); Console.WriteLine( $"IOffsetAttribute#StartOffset=>{tokenStream.GetAttribute<IOffsetAttribute>().StartOffset}"); Console.WriteLine( $"IOffsetAttribute#EndOffset=>{tokenStream.GetAttribute<IOffsetAttribute>().EndOffset}"); Console.WriteLine( $"IPositionIncrementAttribute=>{tokenStream.GetAttribute<IPositionIncrementAttribute>().PositionIncrement}"); Console.WriteLine( $"IPositionLengthAttribute=>{tokenStream.GetAttribute<IPositionLengthAttribute>().PositionLength}"); Console.WriteLine( $"IBaseFormAttribute#GetBaseForm=>{tokenStream.GetAttribute<IBaseFormAttribute>().GetBaseForm()}"); Console.WriteLine( $"IPartOfSpeechAttribute#GetPartOfSpeech=>{tokenStream.GetAttribute<IPartOfSpeechAttribute>().GetPartOfSpeech()}"); Console.WriteLine( $"IReadingAttribute#GetReading=>{tokenStream.GetAttribute<IReadingAttribute>().GetReading()}"); Console.WriteLine( $"IReadingAttribute#GetPronunciation=>{tokenStream.GetAttribute<IReadingAttribute>().GetPronunciation()}"); Console.WriteLine( $"IInflectionAttribute#GetInflectionForm=>{tokenStream.GetAttribute<IInflectionAttribute>().GetInflectionForm()}"); Console.WriteLine( $"IInflectionAttribute#GetInflectionType=>{tokenStream.GetAttribute<IInflectionAttribute>().GetInflectionType()}"); Console.WriteLine("---"); } }
protected override TokenStreamComponents WrapComponents(string fieldName, TokenStreamComponents components) { var stopWords = stopWordsPerField.ContainsKey(fieldName) ? stopWordsPerField[fieldName] : null; if (stopWords == null) { return(components); } var stopFilter = new StopFilter(matchVersion, components.TokenStream, new CharArraySet(matchVersion, stopWords, false)); return(new TokenStreamComponents(components.Tokenizer, stopFilter)); }
protected override TokenStreamComponents WrapComponents(string fieldName, TokenStreamComponents components) { HashSet <string> stopWords = stopWordsPerField[fieldName]; if (stopWords == null) { return(components); } StopFilter stopFilter = new StopFilter(matchVersion, components.TokenStream, new CharArraySet(matchVersion, stopWords, false)); return(new TokenStreamComponents(components.Tokenizer, stopFilter)); }
protected override TokenStreamComponents WrapComponents(string fieldName, TokenStreamComponents components) { ShingleFilter filter = new ShingleFilter(components.TokenStream, minShingleSize, maxShingleSize); filter.SetMinShingleSize(minShingleSize); filter.SetMaxShingleSize(maxShingleSize); filter.SetTokenSeparator(tokenSeparator); filter.SetOutputUnigrams(outputUnigrams); filter.SetOutputUnigramsIfNoShingles(outputUnigramsIfNoShingles); filter.SetFillerToken(fillerToken); return(new TokenStreamComponents(components.Tokenizer, filter)); }
protected override TokenStreamComponents WrapComponents(string fieldName, TokenStreamComponents components) { ShingleFilter filter = new ShingleFilter(components.TokenStream, minShingleSize, maxShingleSize); filter.MinShingleSize = minShingleSize; filter.MaxShingleSize = maxShingleSize; filter.TokenSeparator = tokenSeparator; filter.OutputUnigrams = outputUnigrams; filter.OutputUnigramsIfNoShingles = outputUnigramsIfNoShingles; filter.FillerToken = fillerToken; return(new TokenStreamComponents(components.Tokenizer, filter)); }
protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { TokenStreamComponents tokenStreamComponents = null; Tokenizer tokenizer = new StandardTokenizer(matchVersion, reader); TokenStream stream = new LowerCaseFilter(matchVersion, tokenizer); stream = new CyrllicToLatinFilter(stream); stream = new StopFilter(matchVersion, stream, StopFilter.MakeStopSet(matchVersion, STOP_WORDS)); stream = new SnowballFilter(stream, new SimpleSerbianStemmer()); stream = new ASCIIFoldingFilter(stream); tokenStreamComponents = new TokenStreamComponents(tokenizer, stream); return(tokenStreamComponents); }
protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { TokenStreamComponents tokenStreamComponents = null; Tokenizer tokenizer = new StandardTokenizer(matchVersion, reader); TokenStream stream = new StandardFilter(matchVersion, tokenizer); stream = new LowerCaseFilter(matchVersion, stream); stream = new StopFilter(matchVersion, stream, StopAnalyzer.ENGLISH_STOP_WORDS_SET); stream = new PorterStemFilter(stream); stream = new SnowballFilter(stream, new EnglishStemmer()); tokenStreamComponents = new TokenStreamComponents(tokenizer, stream); return(tokenStreamComponents); }
public override void SetReusableComponents(Analyzer analyzer, string fieldName, TokenStreamComponents components) { SetStoredValue(analyzer, components); }
protected override TokenStreamComponents WrapComponents(string fieldName, TokenStreamComponents components) { if (fieldName.Equals("textgrams") && outerInstance.minPrefixChars > 0) { return new TokenStreamComponents(components.Tokenizer, new EdgeNGramTokenFilter( outerInstance.matchVersion, components.TokenStream, 1, outerInstance.minPrefixChars)); } else { return components; } }
/// <summary> /// Wraps / alters the given TokenStreamComponents, taken from the wrapped /// Analyzer, to form new components. It is through this method that new /// TokenFilters can be added by AnalyzerWrappers. By default, the given /// components are returned. /// </summary> /// <param name="fieldName"> /// Name of the field which is to be analyzed </param> /// <param name="components"> /// TokenStreamComponents taken from the wrapped Analyzer </param> /// <returns> Wrapped / altered TokenStreamComponents. </returns> protected virtual TokenStreamComponents WrapComponents(string fieldName, TokenStreamComponents components) { return components; }
protected override TokenStreamComponents WrapComponents(string fieldName, TokenStreamComponents components) { return new TokenStreamComponents(components.Tokenizer, new LimitTokenCountFilter(components.TokenStream, maxTokenCount, consumeAllTokens)); }
protected override TokenStreamComponents WrapComponents(string fieldName, TokenStreamComponents components) { return(new TokenStreamComponents(components.Tokenizer, new LimitTokenCountFilter(components.TokenStream, maxTokenCount, consumeAllTokens))); }
protected override TokenStreamComponents WrapComponents(string fieldName, TokenStreamComponents components) { ShingleFilter shingles = new ShingleFilter(components.TokenStream, 2, outerInstance.grams); shingles.SetTokenSeparator(char.ToString((char)outerInstance.separator)); return(new TokenStreamComponents(components.Tokenizer, shingles)); }
protected override TokenStreamComponents WrapComponents(string fieldName, TokenStreamComponents components) { if (fieldName.Equals("textgrams") && outerInstance.minPrefixChars > 0) { return(new TokenStreamComponents(components.Tokenizer, new EdgeNGramTokenFilter( outerInstance.matchVersion, components.TokenStream, 1, outerInstance.minPrefixChars))); } else { return(components); } }
protected override TokenStreamComponents WrapComponents(string fieldName, TokenStreamComponents components) { var stopWords = stopWordsPerField[fieldName]; if (stopWords == null) { return components; } var stopFilter = new StopFilter(matchVersion, components.TokenStream, new CharArraySet(matchVersion, stopWords, false)); return new TokenStreamComponents(components.Tokenizer, stopFilter); }
protected override TokenStreamComponents WrapComponents(string fieldName, TokenStreamComponents components) { ShingleFilter filter = new ShingleFilter(components.TokenStream, minShingleSize, maxShingleSize); filter.MinShingleSize = minShingleSize; filter.MaxShingleSize = maxShingleSize; filter.TokenSeparator = tokenSeparator; filter.OutputUnigrams = outputUnigrams; filter.OutputUnigramsIfNoShingles = outputUnigramsIfNoShingles; filter.FillerToken = fillerToken; return new TokenStreamComponents(components.Tokenizer, filter); }
public override void SetReusableComponents(Analyzer analyzer, string fieldName, TokenStreamComponents components) { var componentsPerField = (IDictionary<string, TokenStreamComponents>)GetStoredValue(analyzer); if (componentsPerField == null) { componentsPerField = new Dictionary<string, TokenStreamComponents>(); SetStoredValue(analyzer, componentsPerField); } componentsPerField[fieldName] = components; }
protected override TokenStreamComponents WrapComponents(string fieldName, TokenStreamComponents components) { return components; }
/// <summary> /// Stores the given TokenStreamComponents as the reusable components for the /// field with the give name. /// </summary> /// <param name="fieldName"> Name of the field whose TokenStreamComponents are being set </param> /// <param name="components"> TokenStreamComponents which are to be reused for the field </param> public abstract void SetReusableComponents(Analyzer analyzer, string fieldName, TokenStreamComponents components);