public static AnalysisDescriptor SetAnalysis(AnalysisDescriptor analysis) { return(analysis .CharFilters(c => c .Mapping("mapping", f => f.Mappings(GetCharMapping())) .PatternReplace("digits", f => f.Pattern("[^0-9]").Replacement("")) ) .TokenFilters(f => f .NGram("digits_ngram", t => t.MinGram(3).MaxGram(8)) .Length("length_limit", t => t.Min(1).Max(20)) .EdgeNGram("custom_edge_ngram", t => t.MinGram(1).MaxGram(50)) ) .Tokenizers(t => t .NGram("ngram", d => d.MinGram(2).MaxGram(30))) .Analyzers(a => a .UserDefined(Replace, new CustomAnalyzer { Tokenizer = "whitespace", Filter = new[] { "lowercase", "scandinavian_folding", "unique" }, CharFilter = new[] { "mapping" } }) .UserDefined(ReplaceNgram, new CustomAnalyzer { Tokenizer = "whitespace", Filter = new[] { "lowercase", "scandinavian_folding", "custom_edge_ngram", "unique" }, CharFilter = new[] { "mapping", "html_strip" } }) .UserDefined(Key, new CustomAnalyzer { Tokenizer = "keyword", Filter = new[] { "lowercase", "scandinavian_folding" } }) .UserDefined(Digits, new CustomAnalyzer { Tokenizer = "keyword", CharFilter = new[] { "digits" }, Filter = new[] { "digits_ngram", "unique", "length_limit" } }) .UserDefined(Lowercase, new CustomAnalyzer { Tokenizer = "keyword", Filter = new[] { "lowercase" } }) )); }
private static AnalysisDescriptor CreateAnalysisDescriptor(string charHtmlFilter, TokenFiltersDescriptor tokenFilters, AnalyzersDescriptor analyzers) { var analysisDescriptor = new AnalysisDescriptor(); analysisDescriptor.CharFilters(c => c.HtmlStrip(charHtmlFilter)); analysisDescriptor.TokenFilters(t => tokenFilters); analysisDescriptor.Analyzers(a => analyzers); return(analysisDescriptor); }
protected override AnalysisDescriptor Contribute(AnalysisDescriptor descriptor, IEnumerable <KeyValuePair <string, CharFilterBase> > build) { return(descriptor.CharFilters(a => { foreach (var item in build.Where(x => CanContribute(x, a))) { a.Add(item.Key, item.Value); } return a; })); }
private static AnalysisDescriptor Analysis(AnalysisDescriptor analysis) => analysis .CharFilters(c => c.Mapping("swedish_char_mapping", m => m.Mappings("w => v", "W => V"))) .TokenFilters(tf => tf.Hunspell("sv_SE", x => x.Dedup().Locale("sv_SE"))) .Analyzers(analyzers => analyzers .Custom(LowercaseKeywordAnalyserName, c => c .Tokenizer("keyword") .Filters("lowercase") ) .Custom(SwedishTextAnalyserName, c => c .Tokenizer("standard") .Filters("lowercase", "sv_SE") .CharFilters("html_strip", "swedish_char_mapping") ) );
protected override IAnalysis FluentAnalysis(AnalysisDescriptor an) => an.CharFilters(d => AssertionSetup.Fluent(AssertionSetup.Name, d));
public static AnalysisDescriptor CreateRemoveNumbersCharFilters(this AnalysisDescriptor analysisDescriptor) { return(analysisDescriptor.CharFilters(cFilter => cFilter .Mapping(REMOVE_NUMBERS_CHAR_FILTER_NAME, mpf => mpf .Mappings("1=>", "2=>", "3=>", "4=>", "5=>", "6=>", "7=>", "8=>", "9=>")))); }