예제 #1
0
 private AnalysisDescriptor Analysis(AnalysisDescriptor analysis) => analysis
 .Tokenizers(tokenizers => tokenizers
             .Pattern("name-tokenizer", p => p.Pattern(@"\W+"))
             )
 .TokenFilters(tokenfilters => tokenfilters
               .WordDelimiter("name-words", w => w
                              .SplitOnCaseChange()
                              .PreserveOriginal()
                              .SplitOnNumerics()
                              .GenerateNumberParts(false)
                              .GenerateWordParts()
                              )
               )
 .Analyzers(analyzers => analyzers
            .Custom("name-keyword", c => c
                    .Tokenizer("keyword")
                    .Filters("lowercase")
                    )
            .Custom("html_stripper", cc => cc
                    .Filters("trim", "lowercase")
                    .CharFilters("html_strip")
                    .Tokenizer("name-tokenizer")
                    )
            .Custom("name-analyzer", c => c
                    .Filters("name-words", "lowercase")
                    .Tokenizer("ik_max_word")
                    )
            );
예제 #2
0
 protected override AnalysisDescriptor Contribute(AnalysisDescriptor descriptor, IEnumerable <KeyValuePair <string, TokenizerBase> > build)
 {
     return(descriptor.Tokenizers(a =>
     {
         foreach (var item in build.Where(x => CanContribute(x, a)))
         {
             a.Add(item.Key, item.Value);
         }
         return a;
     }));
 }
예제 #3
0
 public static AnalysisDescriptor AutoCompleteAnalyzers(this AnalysisDescriptor analysis)
 {
     return(analysis.Tokenizers(t => t.Whitespace("whitespace_tokenizer"))
            .TokenFilters(t => t.EdgeNGram("ngram_filter", n => n.MinGram(1).MaxGram(8)))
            .Analyzers(a => a
                       .Custom("default_autocomplete", c => c
                               .Tokenizer("whitespace_tokenizer").Filters("lowercase", "asciifolding")
                               )
                       .Custom("snowball_autocomplete", c => c
                               .Tokenizer("whitespace_tokenizer").Filters("lowercase", "asciifolding", "snowball")
                               )
                       .Custom("shingle_autocomplete", c => c
                               .Tokenizer("whitespace_tokenizer").Filters("shingle", "lowercase", "asciifolding")
                               )
                       .Custom("ngram_autocomplete", c => c
                               .Tokenizer("whitespace_tokenizer").Filters("lowercase", "asciifolding", "ngram_filter")
                               )
                       .Custom("search_autocomplete", c => c
                               .Tokenizer("whitespace_tokenizer").Filters("lowercase", "asciifolding")
                               )
                       ));
 }
예제 #4
0
 private static AnalysisDescriptor Analysis(AnalysisDescriptor analysis) => analysis
 .Tokenizers(tokenizers => tokenizers
             .Pattern("nuget-id-tokenizer", p => p.Pattern(@"\W+"))
             )
 .TokenFilters(tokenfilters => tokenfilters
               .WordDelimiter("nuget-id-words", w => w
                              .SplitOnCaseChange()
                              .PreserveOriginal()
                              .SplitOnNumerics()
                              .GenerateNumberParts(false)
                              .GenerateWordParts()
                              )
               )
 .Analyzers(analyzers => analyzers
            .Custom("nuget-id-analyzer", c => c
                    .Tokenizer("nuget-id-tokenizer")
                    .Filters("nuget-id-words", "lowercase")
                    )
            .Custom("nuget-id-keyword", c => c
                    .Tokenizer("keyword")
                    .Filters("lowercase")
                    )
            );
 protected override IAnalysis FluentAnalysis(AnalysisDescriptor an) =>
 an.Tokenizers(d => AssertionSetup.Fluent(AssertionSetup.Name, d));
예제 #6
0
        /// <summary>
        /// Job Analysis descriptions
        /// </summary>
        ///
        private AnalysisDescriptor GetJobsAnalysisDescriptor(LanguageCode languageCode = LanguageCode.EN)
        {
            var descriptor = new AnalysisDescriptor();

            descriptor.TokenFilters(cf => cf.Add("shingle_title", new ShingleTokenFilter()));

            descriptor.TokenFilters(
                f => f.Add("job_stopfilter", new StopTokenFilter {
                Stopwords = new List <string> {
                    "job", "jobs"
                }
            }));

            // Title Analyzer
            var titleAnalyzer = GetTitleAnalyzer(languageCode);

            descriptor.Analyzers(a => a.Add("job_title", titleAnalyzer));

            // Path Analyzer
            var pathAnalyzer = GetPathAnalyzer();

            descriptor.Analyzers(a => a.Add("path", pathAnalyzer));

            // Lowercase Analyzer
            var lowercaseAnalyzer = GetLowercaseAnalyzer(languageCode);

            descriptor.Analyzers(a => a.Add("lowercase", lowercaseAnalyzer));

            // Snowball Token Filter
            var snowballPorterFilter = GetStemmerTokenFilter(languageCode);

            descriptor.TokenFilters(d => d.Add("snowballPorterFilter", snowballPorterFilter));

            // Stopwords Filter
            var stopwordFilter = GetStopwordFilter(languageCode);

            descriptor.TokenFilters(d => d.Add("stopwordFilter", stopwordFilter));

            // Word Delimiter Token Filter
            var wdtFitler = GetWordDelimeterTokenFilter(languageCode);

            descriptor.TokenFilters(d => d.Add("wdtFitler", wdtFitler));

            // Job Default Analyzer
            var jobDefaultAnalyzer = GetJobDefaultAnanyzer(languageCode);

            descriptor.Analyzers(a => a.Add("jobDefaultAnalyzer", jobDefaultAnalyzer));

            // Job Default with Delimiter Analyzer
            var jobDefaultWithDelimiterAnalyzer = GetJobDefaultWithDelimiterAnalyzer(languageCode);

            descriptor.Analyzers(a => a.Add("jobDefaultWithDelimiterAnalyzer", jobDefaultWithDelimiterAnalyzer));

            // Title Suggestion Anlyzer
            var titleSuggestAnalyzer = GetTitleSuggestAnalyzer(languageCode);

            descriptor.Analyzers(a => a.Add("titleSuggestAnalyzer", titleSuggestAnalyzer));

            // country, match first node in hierarchy path
            descriptor.Tokenizers(t => t.Add("country_path", new PatternTokenizer {
                Pattern = "^(/[0-9]+/).*", Group = 1
            }));
            descriptor.Analyzers(a => a.Add("country_path", new CustomAnalyzer {
                Tokenizer = "country_path"
            }));

            // region, match first and second nodes in hierarchy path
            descriptor.Tokenizers(t => t.Add("region_path", new PatternTokenizer {
                Pattern = "^(/[0-9]+/[0-9]+/).*", Group = 1
            }));
            descriptor.Analyzers(a => a.Add("region_path", new CustomAnalyzer {
                Tokenizer = "region_path"
            }));

            // city, match first four or first three nodes in path as cities in some countries lack a second level division
            descriptor.Tokenizers(t => t.Add("city_path", new PatternTokenizer {
                Pattern = "^(/[0-9]+/[0-9]+/[0-9]+/[0-9]+/[0-9]+/[0-9]+/|/[0-9]+/[0-9]+/[0-9]+/[0-9]+/[0-9]+/|/[0-9]+/[0-9]+/[0-9]+/[0-9]+/|/[0-9]+/[0-9]+/[0-9]+/).*", Group = 1
            }));
            descriptor.Analyzers(a => a.Add("city_path", new CustomAnalyzer {
                Tokenizer = "city_path"
            }));

            return(descriptor);
        }