Ejemplo n.º 1
0
 protected virtual TokenFiltersDescriptor ConfigureTokenFilters(TokenFiltersDescriptor tokenFilters, string documentType)
 {
     return(tokenFilters
            .NGram(NGramFilterName, descriptor => ConfigureNGramFilter(descriptor, documentType))
            .EdgeNGram(EdgeNGramFilterName, descriptor => ConfigureEdgeNGramFilter(descriptor, documentType))
            );
 }
 private static IPromise <ITokenFilters> CreateTokenFilters(TokenFiltersDescriptor tokenFiltersDescriptor)
 {
     return(tokenFiltersDescriptor
            .NGram(TokenFilters.NgramFilter, ng => ng
                   .MinGram(3)
                   .MaxGram(10)));
 }
 private static IPromise <ITokenFilters> SetupTokenFilter(TokenFiltersDescriptor tokenFilters)
 {
     return(tokenFilters.
            NGram(TokenFilter.NGram, s => s
                  .MinGram(3)
                  .MaxGram(10)));
 }
Ejemplo n.º 4
0
        public static TokenFiltersDescriptor AddTokenFilters(this TokenFiltersDescriptor descriptor)
        {
            foreach (var tokenFilterEntry in FiltersMap)
            {
                descriptor = descriptor.UserDefined(tokenFilterEntry.Key, tokenFilterEntry.Value);
            }

            return(descriptor);
        }
        public static TokenFiltersDescriptor AddVocabularyFilter(this TokenFiltersDescriptor fd, IDictionary <TaxonomyResultDTO, IList <TaxonomyResultDTO> > dictionary, string vocabularyKey)
        {
            if (string.IsNullOrEmpty(vocabularyKey))
            {
                return(fd);
            }

            return(fd.Synonym(vocabularyKey, s => s.Tokenizer(ElasticTokenizers.Keyword).Synonyms(BuildVocabularyList(dictionary))));
        }
        public static TokenFiltersDescriptor AddTaxonomyFilters(this TokenFiltersDescriptor fd, IEnumerable <MetadataProperty> metadataProperties)
        {
            foreach (var metadataProperty in metadataProperties)
            {
                if (metadataProperty == null || !metadataProperty.Properties.ContainsKey(Strings.Taxonomy))
                {
                    continue;
                }
                IDictionary <TaxonomyResultDTO, IList <TaxonomyResultDTO> > dictionary = TaxonomyTransformer.BuildFlatDictionary(metadataProperty.Properties[Strings.Taxonomy]);

                string autoPhraseKey = metadataProperty.Key.GetPreparedAnalyzerName(ElasticFilters.AutoPhrasePrefix);
                string vocabularyKey = metadataProperty.Key.GetPreparedAnalyzerName(ElasticFilters.VocabularyPrefix);

                fd.AddAutoPhraseFilter(dictionary, autoPhraseKey);
                fd.AddVocabularyFilter(dictionary, vocabularyKey);
            }
            return(fd);
        }
Ejemplo n.º 7
0
        private TokenFiltersDescriptor CreateTokenFilterDescriptor()
        {
            var tokenFilters = new TokenFiltersDescriptor();

            tokenFilters.WordDelimiter("word_filter", w => w
                                       .GenerateWordParts(true)
                                       .GenerateNumberParts(true)
                                       .CatenateWords(false)
                                       .CatenateNumbers(false)
                                       .CatenateAll(false)
                                       .SplitOnCaseChange(false)
                                       .PreserveOriginal(false)
                                       .SplitOnNumerics(false)
                                       .StemEnglishPossessive(false)
                                       );

            tokenFilters.PatternReplace("filler_filter", p => p
                                        .Pattern(".*" + Common.Constants.TextFieldSeparator + ".*")
                                        .Replacement(string.Empty)
                                        );

            return(tokenFilters);
        }
Ejemplo n.º 8
0
        private AnalyzersDescriptor CreateAnalyzersDescriptor(int shingleCount, string tokenizer, string charHtmlFilter, TokenFiltersDescriptor tokenFilters)
        {
            var analyzers = new AnalyzersDescriptor();

            for (var i = 1; i <= shingleCount; i++)
            {
                var actualIndex = i;

                var filterName = string.Format("{0}{1}", _filterPrefix, actualIndex);
                if (i != 1)
                {
                    var filterDescriptor =
                        new ShingleTokenFilterDescriptor().MinShingleSize(actualIndex)
                        .MaxShingleSize(actualIndex)
                        .OutputUnigrams(false)
                        .OutputUnigramsIfNoShingles(false);
                    tokenFilters.Shingle(filterName, desc => filterDescriptor);
                }

                var analyzerName = string.Format("{0}{1}", _analyzerPrefix, actualIndex);
                var analyzer     =
                    i != 1
                        ? new CustomAnalyzer
                {
                    Filter = new List <string> {
                        "lowercase", "word_filter", filterName, "filler_filter"
                    },
                    Tokenizer  = tokenizer,
                    CharFilter = new List <string> {
                        charHtmlFilter
                    }
                }
                        : new CustomAnalyzer {
                    Filter = new List <string> {
                        "lowercase", "word_filter", "filler_filter"
                    }, Tokenizer = tokenizer, CharFilter = new List <string> {
                        charHtmlFilter
                    }
                };
                analyzers.Custom(analyzerName, a => analyzer);
            }

            return(analyzers);
        }
Ejemplo n.º 9
0
        private static AnalysisDescriptor CreateAnalysisDescriptor(string charHtmlFilter, TokenFiltersDescriptor tokenFilters, AnalyzersDescriptor analyzers)
        {
            var analysisDescriptor = new AnalysisDescriptor();

            analysisDescriptor.CharFilters(c => c.HtmlStrip(charHtmlFilter));
            analysisDescriptor.TokenFilters(t => tokenFilters);
            analysisDescriptor.Analyzers(a => analyzers);
            return(analysisDescriptor);
        }
 public static TokenFiltersDescriptor AddWordDelimeter(this TokenFiltersDescriptor fd)
 {
     return(fd.PatternReplace(ElasticFilters.FilterWordDelimiter, wd => wd.Pattern("_").Replacement(" ")));
 }
 public static TokenFiltersDescriptor AddAutoCompleteShingleFilter(this TokenFiltersDescriptor fd)
 {
     return(fd.Shingle(ElasticFilters.AutocompleteShingle, sh => sh.MinShingleSize(2).MaxShingleSize(4)));
 }
 public static TokenFiltersDescriptor AddAutoCompleteNgramFilter(this TokenFiltersDescriptor fd)
 {
     return(fd.EdgeNGram(ElasticFilters.AutocompleteNgram, eng => eng.MinGram(3).MaxGram(15)));
 }
 public static TokenFiltersDescriptor AddNgramFilter(this TokenFiltersDescriptor fd)
 {
     return(fd.NGram(ElasticFilters.Ngram, ng => ng.MinGram(2).MaxGram(15)));
 }
 public static TokenFiltersDescriptor AddEnglishStopwordsFilter(this TokenFiltersDescriptor fd)
 {
     return(fd.Stop(ElasticFilters.EnglishStopwords, s => s.StopWords("_english_")));
 }