protected virtual TokenFiltersDescriptor ConfigureTokenFilters(TokenFiltersDescriptor tokenFilters, string documentType) { return(tokenFilters .NGram(NGramFilterName, descriptor => ConfigureNGramFilter(descriptor, documentType)) .EdgeNGram(EdgeNGramFilterName, descriptor => ConfigureEdgeNGramFilter(descriptor, documentType)) ); }
private static IPromise <ITokenFilters> CreateTokenFilters(TokenFiltersDescriptor tokenFiltersDescriptor) { return(tokenFiltersDescriptor .NGram(TokenFilters.NgramFilter, ng => ng .MinGram(3) .MaxGram(10))); }
private static IPromise <ITokenFilters> SetupTokenFilter(TokenFiltersDescriptor tokenFilters) { return(tokenFilters. NGram(TokenFilter.NGram, s => s .MinGram(3) .MaxGram(10))); }
public static TokenFiltersDescriptor AddTokenFilters(this TokenFiltersDescriptor descriptor) { foreach (var tokenFilterEntry in FiltersMap) { descriptor = descriptor.UserDefined(tokenFilterEntry.Key, tokenFilterEntry.Value); } return(descriptor); }
public static TokenFiltersDescriptor AddVocabularyFilter(this TokenFiltersDescriptor fd, IDictionary <TaxonomyResultDTO, IList <TaxonomyResultDTO> > dictionary, string vocabularyKey) { if (string.IsNullOrEmpty(vocabularyKey)) { return(fd); } return(fd.Synonym(vocabularyKey, s => s.Tokenizer(ElasticTokenizers.Keyword).Synonyms(BuildVocabularyList(dictionary)))); }
public static TokenFiltersDescriptor AddTaxonomyFilters(this TokenFiltersDescriptor fd, IEnumerable <MetadataProperty> metadataProperties) { foreach (var metadataProperty in metadataProperties) { if (metadataProperty == null || !metadataProperty.Properties.ContainsKey(Strings.Taxonomy)) { continue; } IDictionary <TaxonomyResultDTO, IList <TaxonomyResultDTO> > dictionary = TaxonomyTransformer.BuildFlatDictionary(metadataProperty.Properties[Strings.Taxonomy]); string autoPhraseKey = metadataProperty.Key.GetPreparedAnalyzerName(ElasticFilters.AutoPhrasePrefix); string vocabularyKey = metadataProperty.Key.GetPreparedAnalyzerName(ElasticFilters.VocabularyPrefix); fd.AddAutoPhraseFilter(dictionary, autoPhraseKey); fd.AddVocabularyFilter(dictionary, vocabularyKey); } return(fd); }
private TokenFiltersDescriptor CreateTokenFilterDescriptor() { var tokenFilters = new TokenFiltersDescriptor(); tokenFilters.WordDelimiter("word_filter", w => w .GenerateWordParts(true) .GenerateNumberParts(true) .CatenateWords(false) .CatenateNumbers(false) .CatenateAll(false) .SplitOnCaseChange(false) .PreserveOriginal(false) .SplitOnNumerics(false) .StemEnglishPossessive(false) ); tokenFilters.PatternReplace("filler_filter", p => p .Pattern(".*" + Common.Constants.TextFieldSeparator + ".*") .Replacement(string.Empty) ); return(tokenFilters); }
private AnalyzersDescriptor CreateAnalyzersDescriptor(int shingleCount, string tokenizer, string charHtmlFilter, TokenFiltersDescriptor tokenFilters) { var analyzers = new AnalyzersDescriptor(); for (var i = 1; i <= shingleCount; i++) { var actualIndex = i; var filterName = string.Format("{0}{1}", _filterPrefix, actualIndex); if (i != 1) { var filterDescriptor = new ShingleTokenFilterDescriptor().MinShingleSize(actualIndex) .MaxShingleSize(actualIndex) .OutputUnigrams(false) .OutputUnigramsIfNoShingles(false); tokenFilters.Shingle(filterName, desc => filterDescriptor); } var analyzerName = string.Format("{0}{1}", _analyzerPrefix, actualIndex); var analyzer = i != 1 ? new CustomAnalyzer { Filter = new List <string> { "lowercase", "word_filter", filterName, "filler_filter" }, Tokenizer = tokenizer, CharFilter = new List <string> { charHtmlFilter } } : new CustomAnalyzer { Filter = new List <string> { "lowercase", "word_filter", "filler_filter" }, Tokenizer = tokenizer, CharFilter = new List <string> { charHtmlFilter } }; analyzers.Custom(analyzerName, a => analyzer); } return(analyzers); }
private static AnalysisDescriptor CreateAnalysisDescriptor(string charHtmlFilter, TokenFiltersDescriptor tokenFilters, AnalyzersDescriptor analyzers) { var analysisDescriptor = new AnalysisDescriptor(); analysisDescriptor.CharFilters(c => c.HtmlStrip(charHtmlFilter)); analysisDescriptor.TokenFilters(t => tokenFilters); analysisDescriptor.Analyzers(a => analyzers); return(analysisDescriptor); }
public static TokenFiltersDescriptor AddWordDelimeter(this TokenFiltersDescriptor fd) { return(fd.PatternReplace(ElasticFilters.FilterWordDelimiter, wd => wd.Pattern("_").Replacement(" "))); }
public static TokenFiltersDescriptor AddAutoCompleteShingleFilter(this TokenFiltersDescriptor fd) { return(fd.Shingle(ElasticFilters.AutocompleteShingle, sh => sh.MinShingleSize(2).MaxShingleSize(4))); }
public static TokenFiltersDescriptor AddAutoCompleteNgramFilter(this TokenFiltersDescriptor fd) { return(fd.EdgeNGram(ElasticFilters.AutocompleteNgram, eng => eng.MinGram(3).MaxGram(15))); }
public static TokenFiltersDescriptor AddNgramFilter(this TokenFiltersDescriptor fd) { return(fd.NGram(ElasticFilters.Ngram, ng => ng.MinGram(2).MaxGram(15))); }
public static TokenFiltersDescriptor AddEnglishStopwordsFilter(this TokenFiltersDescriptor fd) { return(fd.Stop(ElasticFilters.EnglishStopwords, s => s.StopWords("_english_"))); }