private AnalysisDescriptor Analysis(AnalysisDescriptor analysis) => analysis .Tokenizers(tokenizers => tokenizers .Pattern("name-tokenizer", p => p.Pattern(@"\W+")) ) .TokenFilters(tokenfilters => tokenfilters .WordDelimiter("name-words", w => w .SplitOnCaseChange() .PreserveOriginal() .SplitOnNumerics() .GenerateNumberParts(false) .GenerateWordParts() ) ) .Analyzers(analyzers => analyzers .Custom("name-keyword", c => c .Tokenizer("keyword") .Filters("lowercase") ) .Custom("html_stripper", cc => cc .Filters("trim", "lowercase") .CharFilters("html_strip") .Tokenizer("name-tokenizer") ) .Custom("name-analyzer", c => c .Filters("name-words", "lowercase") .Tokenizer("ik_max_word") ) );
protected override AnalysisDescriptor Contribute(AnalysisDescriptor descriptor, IEnumerable <KeyValuePair <string, TokenizerBase> > build) { return(descriptor.Tokenizers(a => { foreach (var item in build.Where(x => CanContribute(x, a))) { a.Add(item.Key, item.Value); } return a; })); }
public static AnalysisDescriptor AutoCompleteAnalyzers(this AnalysisDescriptor analysis) { return(analysis.Tokenizers(t => t.Whitespace("whitespace_tokenizer")) .TokenFilters(t => t.EdgeNGram("ngram_filter", n => n.MinGram(1).MaxGram(8))) .Analyzers(a => a .Custom("default_autocomplete", c => c .Tokenizer("whitespace_tokenizer").Filters("lowercase", "asciifolding") ) .Custom("snowball_autocomplete", c => c .Tokenizer("whitespace_tokenizer").Filters("lowercase", "asciifolding", "snowball") ) .Custom("shingle_autocomplete", c => c .Tokenizer("whitespace_tokenizer").Filters("shingle", "lowercase", "asciifolding") ) .Custom("ngram_autocomplete", c => c .Tokenizer("whitespace_tokenizer").Filters("lowercase", "asciifolding", "ngram_filter") ) .Custom("search_autocomplete", c => c .Tokenizer("whitespace_tokenizer").Filters("lowercase", "asciifolding") ) )); }
private static AnalysisDescriptor Analysis(AnalysisDescriptor analysis) => analysis .Tokenizers(tokenizers => tokenizers .Pattern("nuget-id-tokenizer", p => p.Pattern(@"\W+")) ) .TokenFilters(tokenfilters => tokenfilters .WordDelimiter("nuget-id-words", w => w .SplitOnCaseChange() .PreserveOriginal() .SplitOnNumerics() .GenerateNumberParts(false) .GenerateWordParts() ) ) .Analyzers(analyzers => analyzers .Custom("nuget-id-analyzer", c => c .Tokenizer("nuget-id-tokenizer") .Filters("nuget-id-words", "lowercase") ) .Custom("nuget-id-keyword", c => c .Tokenizer("keyword") .Filters("lowercase") ) );
protected override IAnalysis FluentAnalysis(AnalysisDescriptor an) => an.Tokenizers(d => AssertionSetup.Fluent(AssertionSetup.Name, d));
/// <summary> /// Job Analysis descriptions /// </summary> /// private AnalysisDescriptor GetJobsAnalysisDescriptor(LanguageCode languageCode = LanguageCode.EN) { var descriptor = new AnalysisDescriptor(); descriptor.TokenFilters(cf => cf.Add("shingle_title", new ShingleTokenFilter())); descriptor.TokenFilters( f => f.Add("job_stopfilter", new StopTokenFilter { Stopwords = new List <string> { "job", "jobs" } })); // Title Analyzer var titleAnalyzer = GetTitleAnalyzer(languageCode); descriptor.Analyzers(a => a.Add("job_title", titleAnalyzer)); // Path Analyzer var pathAnalyzer = GetPathAnalyzer(); descriptor.Analyzers(a => a.Add("path", pathAnalyzer)); // Lowercase Analyzer var lowercaseAnalyzer = GetLowercaseAnalyzer(languageCode); descriptor.Analyzers(a => a.Add("lowercase", lowercaseAnalyzer)); // Snowball Token Filter var snowballPorterFilter = GetStemmerTokenFilter(languageCode); descriptor.TokenFilters(d => d.Add("snowballPorterFilter", snowballPorterFilter)); // Stopwords Filter var stopwordFilter = GetStopwordFilter(languageCode); descriptor.TokenFilters(d => d.Add("stopwordFilter", stopwordFilter)); // Word Delimiter Token Filter var wdtFitler = GetWordDelimeterTokenFilter(languageCode); descriptor.TokenFilters(d => d.Add("wdtFitler", wdtFitler)); // Job Default Analyzer var jobDefaultAnalyzer = GetJobDefaultAnanyzer(languageCode); descriptor.Analyzers(a => a.Add("jobDefaultAnalyzer", jobDefaultAnalyzer)); // Job Default with Delimiter Analyzer var jobDefaultWithDelimiterAnalyzer = GetJobDefaultWithDelimiterAnalyzer(languageCode); descriptor.Analyzers(a => a.Add("jobDefaultWithDelimiterAnalyzer", jobDefaultWithDelimiterAnalyzer)); // Title Suggestion Anlyzer var titleSuggestAnalyzer = GetTitleSuggestAnalyzer(languageCode); descriptor.Analyzers(a => a.Add("titleSuggestAnalyzer", titleSuggestAnalyzer)); // country, match first node in hierarchy path descriptor.Tokenizers(t => t.Add("country_path", new PatternTokenizer { Pattern = "^(/[0-9]+/).*", Group = 1 })); descriptor.Analyzers(a => a.Add("country_path", new CustomAnalyzer { Tokenizer = "country_path" })); // region, match first and second nodes in hierarchy path descriptor.Tokenizers(t => t.Add("region_path", new PatternTokenizer { Pattern = "^(/[0-9]+/[0-9]+/).*", Group = 1 })); descriptor.Analyzers(a => a.Add("region_path", new CustomAnalyzer { Tokenizer = "region_path" })); // city, match first four or first three nodes in path as cities in some countries lack a second level division descriptor.Tokenizers(t => t.Add("city_path", new PatternTokenizer { Pattern = "^(/[0-9]+/[0-9]+/[0-9]+/[0-9]+/[0-9]+/[0-9]+/|/[0-9]+/[0-9]+/[0-9]+/[0-9]+/[0-9]+/|/[0-9]+/[0-9]+/[0-9]+/[0-9]+/|/[0-9]+/[0-9]+/[0-9]+/).*", Group = 1 })); descriptor.Analyzers(a => a.Add("city_path", new CustomAnalyzer { Tokenizer = "city_path" })); return(descriptor); }