Ejemplo n.º 1
0
 public static IAnalysis ProjectAnalysisSettings(AnalysisDescriptor analysis)
 {
     analysis
     .TokenFilters(tokenFilters => tokenFilters
                   .Shingle("shingle", shingle => shingle
                            .MinShingleSize(2)
                            .MaxShingleSize(4)
                            )
                   )
     .Analyzers(analyzers => analyzers
                .Custom("shingle", shingle => shingle
                        .Filters("standard", "shingle")
                        .Tokenizer("standard")
                        )
                );
     //normalizers are a new feature since 5.2.0
     if (TestConfiguration.Instance.InRange(">=5.2.0"))
     {
         analysis.Normalizers(analyzers => analyzers
                              .Custom("my_normalizer", n => n
                                      .Filters("lowercase", "asciifolding")
                                      )
                              );
     }
     return(analysis);
 }
Ejemplo n.º 2
0
        private IAnalysis ConfigureConcatenateAndAutocompleteAnalysis(AnalysisDescriptor analysis)
        {
            // for concatenate filter see my fork: https://github.com/rh78/elasticsearch-concatenate-token-filter

            return(analysis
                   .TokenFilters(filter => filter
                                 .UserDefined("concatenate_filter", new ConcatenateTokenFilter()
            {
                TokenSeparator = " ",
                IncrementGap = 1000
            })
                                 .EdgeNGram("edge_ngram_filter", edgeNGram => edgeNGram
                                            .MinGram(1)
                                            .MaxGram(50)
                                            )
                                 .NGram("ngram_filter", nGram => nGram
                                        .MinGram(3)
                                        .MaxGram(50)
                                        )
                                 .NGram("short_ngram_filter", nGram => nGram
                                        .MinGram(1)
                                        .MaxGram(50)
                                        )
                                 )
                   .Analyzers(analyzer => analyzer
                              .Custom("edge_ngram_concatenate_index", custom => custom
                                      .Tokenizer("standard")
                                      .Filters(new string[] { "lowercase", "asciifolding", "concatenate_filter", "edge_ngram_filter" })
                                      )
                              .Custom("edge_ngram_partial_index", custom => custom
                                      .Tokenizer("standard")
                                      .Filters(new string[] { "lowercase", "asciifolding", "edge_ngram_filter" })
                                      )
                              .Custom("ngram_concatenate_index", custom => custom
                                      .Tokenizer("standard")
                                      .Filters(new string[] { "lowercase", "asciifolding", "concatenate_filter", "ngram_filter" })
                                      )
                              .Custom("short_ngram_concatenate_index", custom => custom
                                      .Tokenizer("standard")
                                      .Filters(new string[] { "lowercase", "asciifolding", "concatenate_filter", "short_ngram_filter" })
                                      )
                              .Custom("ngram_partial_index", custom => custom
                                      .Tokenizer("standard")
                                      .Filters(new string[] { "lowercase", "asciifolding", "ngram_filter" })
                                      )
                              .Custom("short_ngram_partial_index", custom => custom
                                      .Tokenizer("standard")
                                      .Filters(new string[] { "lowercase", "asciifolding", "short_ngram_filter" })
                                      )
                              .Custom("concatenate_search", custom => custom
                                      .Tokenizer("standard")
                                      .Filters(new string[] { "lowercase", "asciifolding", "concatenate_filter" })
                                      )
                              .Custom("partial_search", custom => custom
                                      .Tokenizer("standard")
                                      .Filters(new string[] { "lowercase", "asciifolding" })
                                      )
                              ));
        }
Ejemplo n.º 3
0
        private static AnalysisDescriptor CreateAnalysisDescriptor(string charHtmlFilter, TokenFiltersDescriptor tokenFilters, AnalyzersDescriptor analyzers)
        {
            var analysisDescriptor = new AnalysisDescriptor();

            analysisDescriptor.CharFilters(c => c.HtmlStrip(charHtmlFilter));
            analysisDescriptor.TokenFilters(t => tokenFilters);
            analysisDescriptor.Analyzers(a => analyzers);
            return(analysisDescriptor);
        }
Ejemplo n.º 4
0
 public IAnalysis ConfigureAnalysis(AnalysisDescriptor analysisDescriptor)
 {
     return(analysisDescriptor.TokenFilters(tf => tf.Synonym("city_synonym", tfd => tfd.Synonyms("lol => laughing", "new york, nyc")))
            .Analyzers(aa =>
                       aa.Custom("cna", ca => ca
                                 .CharFilters("html_strip")
                                 .Tokenizer("standard")
                                 .Filters("lowercase", "stop", "city_synonym"))
                       ));
 }
 protected override AnalysisDescriptor Contribute(AnalysisDescriptor descriptor, IEnumerable <KeyValuePair <string, TokenFilterBase> > build)
 {
     return(descriptor.TokenFilters(a =>
     {
         foreach (var item in build.Where(x => CanContribute(x, a)))
         {
             a.Add(item.Key, item.Value);
         }
         return a;
     }));
 }
Ejemplo n.º 6
0
 private IAnalysis ProjectAnalysisSettings(AnalysisDescriptor analysis) => analysis
 .TokenFilters(tokenFilters => tokenFilters
               .Shingle("shingle", shingle => shingle
                        .MinShingleSize(2)
                        .MaxShingleSize(4)
                        )
               )
 .Analyzers(analyzers => analyzers
            .Custom("shingle", shingle => shingle
                    .Filters("standard", "shingle")
                    .Tokenizer("standard")
                    )
            );
 protected override IAnalysis FluentAnalysis(AnalysisDescriptor an) =>
 an.TokenFilters(d => AssertionSetup.Fluent(AssertionSetup.Name, d));
Ejemplo n.º 8
0
        public static AnalysisDescriptor DutchAnalysis(AnalysisDescriptor analysis) => analysis

        //  custom filters
        .TokenFilters(tokenfilters => tokenfilters
                      .Stop("dutch_stop", w => w
                            .StopWords("_dutch_")
                            )
                      .Stemmer("dutch_stemmer", w => w
                               .Language("dutch")
                               )
                      )
        .CharFilters(charFilters => charFilters
                     .PatternReplace("kill_numbers", p => p
                                     .Pattern("(\\d+)")
                                     .Replacement("")))

        //  custom analyzers
        .Analyzers(analyzers => analyzers
                   .Custom("dutch", c => c
                           .CharFilters("kill_numbers")
                           .Tokenizer("standard")
                           .Filters("lowercase", "dutch_stop", "dutch_stemmer")
                           )
                   );
 private IAnalysis CreateAnalysis(AnalysisDescriptor analysisDescriptor)
 {
     return(analysisDescriptor
            .TokenFilters(CreateTokenFilters)
            .Analyzers(CreateAnalyzers));
 }
Ejemplo n.º 10
0
        /// <summary>
        /// Job Analysis descriptions
        /// </summary>
        ///
        private AnalysisDescriptor GetJobsAnalysisDescriptor(LanguageCode languageCode = LanguageCode.EN)
        {
            var descriptor = new AnalysisDescriptor();

            descriptor.TokenFilters(cf => cf.Add("shingle_title", new ShingleTokenFilter()));

            descriptor.TokenFilters(
                f => f.Add("job_stopfilter", new StopTokenFilter {
                Stopwords = new List <string> {
                    "job", "jobs"
                }
            }));

            // Title Analyzer
            var titleAnalyzer = GetTitleAnalyzer(languageCode);

            descriptor.Analyzers(a => a.Add("job_title", titleAnalyzer));

            // Path Analyzer
            var pathAnalyzer = GetPathAnalyzer();

            descriptor.Analyzers(a => a.Add("path", pathAnalyzer));

            // Lowercase Analyzer
            var lowercaseAnalyzer = GetLowercaseAnalyzer(languageCode);

            descriptor.Analyzers(a => a.Add("lowercase", lowercaseAnalyzer));

            // Snowball Token Filter
            var snowballPorterFilter = GetStemmerTokenFilter(languageCode);

            descriptor.TokenFilters(d => d.Add("snowballPorterFilter", snowballPorterFilter));

            // Stopwords Filter
            var stopwordFilter = GetStopwordFilter(languageCode);

            descriptor.TokenFilters(d => d.Add("stopwordFilter", stopwordFilter));

            // Word Delimiter Token Filter
            var wdtFitler = GetWordDelimeterTokenFilter(languageCode);

            descriptor.TokenFilters(d => d.Add("wdtFitler", wdtFitler));

            // Job Default Analyzer
            var jobDefaultAnalyzer = GetJobDefaultAnanyzer(languageCode);

            descriptor.Analyzers(a => a.Add("jobDefaultAnalyzer", jobDefaultAnalyzer));

            // Job Default with Delimiter Analyzer
            var jobDefaultWithDelimiterAnalyzer = GetJobDefaultWithDelimiterAnalyzer(languageCode);

            descriptor.Analyzers(a => a.Add("jobDefaultWithDelimiterAnalyzer", jobDefaultWithDelimiterAnalyzer));

            // Title Suggestion Anlyzer
            var titleSuggestAnalyzer = GetTitleSuggestAnalyzer(languageCode);

            descriptor.Analyzers(a => a.Add("titleSuggestAnalyzer", titleSuggestAnalyzer));

            // country, match first node in hierarchy path
            descriptor.Tokenizers(t => t.Add("country_path", new PatternTokenizer {
                Pattern = "^(/[0-9]+/).*", Group = 1
            }));
            descriptor.Analyzers(a => a.Add("country_path", new CustomAnalyzer {
                Tokenizer = "country_path"
            }));

            // region, match first and second nodes in hierarchy path
            descriptor.Tokenizers(t => t.Add("region_path", new PatternTokenizer {
                Pattern = "^(/[0-9]+/[0-9]+/).*", Group = 1
            }));
            descriptor.Analyzers(a => a.Add("region_path", new CustomAnalyzer {
                Tokenizer = "region_path"
            }));

            // city, match first four or first three nodes in path as cities in some countries lack a second level division
            descriptor.Tokenizers(t => t.Add("city_path", new PatternTokenizer {
                Pattern = "^(/[0-9]+/[0-9]+/[0-9]+/[0-9]+/[0-9]+/[0-9]+/|/[0-9]+/[0-9]+/[0-9]+/[0-9]+/[0-9]+/|/[0-9]+/[0-9]+/[0-9]+/[0-9]+/|/[0-9]+/[0-9]+/[0-9]+/).*", Group = 1
            }));
            descriptor.Analyzers(a => a.Add("city_path", new CustomAnalyzer {
                Tokenizer = "city_path"
            }));

            return(descriptor);
        }
 private static IAnalysis SetupAnalysis(AnalysisDescriptor analysis)
 {
     return(analysis.
            TokenFilters(SetupTokenFilter)
            .Analyzers(SetupAnalyzer));
 }