Beispiel #1
0
        /// <summary>
        /// Initialize analyzer on elasticsearch
        /// </summary>
        /// <param name="client"></param>
        protected virtual void InitializeAnalyzer(IVulcanClient client)
        {
            var language = VulcanHelper.GetAnalyzer(client.Language);
            IUpdateIndexSettingsResponse response;

            if (language != "standard")
            {
                // first, stop words
                response = client.UpdateIndexSettings(client.IndexName, uix => uix
                                                      .IndexSettings(ixs => ixs
                                                                     .Analysis(ana => ana
                                                                               .TokenFilters(tf => tf
                                                                                             .Stop("stop", sw => sw
                                                                                                   .StopWords(GetStopwordsLanguage(language)))))));

                if (!response.IsValid)
                {
                    Logger.Error($"Could not set up stop words for {client.IndexName}: {response.DebugInformation}");
                }

                // next, stemmer
                if (!new[] { "cjk", "persian", "thai" }.Contains(language))
                {
                    response = client.UpdateIndexSettings(client.IndexName, uix => uix
                                                          .IndexSettings(ixs => ixs
                                                                         .Analysis(ana => ana
                                                                                   .TokenFilters(tf => tf
                                                                                                 .Stemmer("stemmer", stm => stm
                                                                                                          .Language(GetStemmerLanguage(language)))))));

                    if (!response.IsValid)
                    {
                        Logger.Error($"Could not set up stemmers for {client.IndexName}: {response.DebugInformation}");
                    }
                }

                // next, stemmer overrides
                if (language == "dutch")
                {
                    response = client.UpdateIndexSettings(client.IndexName, uix => uix
                                                          .IndexSettings(ixs => ixs
                                                                         .Analysis(ana => ana
                                                                                   .TokenFilters(tf => tf
                                                                                                 .StemmerOverride("override", stm => stm
                                                                                                                  .Rules("fiets=>fiets",
                                                                                                                         "bromfiets=>bromfiets",
                                                                                                                         "ei=>eier",
                                                                                                                         "kind=>kinder"))))));

                    if (!response.IsValid)
                    {
                        Logger.Error($"Could not set up stemmer overrides for {client.IndexName}: {response.DebugInformation}");
                    }
                }

                // next, elision
                if (new[] { "catalan", "french", "irish", "italian" }.Contains(language))
                {
                    response = client.UpdateIndexSettings(client.IndexName, uix => uix
                                                          .IndexSettings(ixs => ixs
                                                                         .Analysis(ana => ana
                                                                                   .TokenFilters(tf => tf
                                                                                                 .Elision("elision", e => e
                                                                                                          .Articles(GetElisionArticles(language)))))));

                    if (!response.IsValid)
                    {
                        Logger.Error($"Could not set up elisions for {client.IndexName}: {response.DebugInformation}");
                    }
                }

                // next, possessive
                if (language == "english")
                {
                    response = client.UpdateIndexSettings(client.IndexName, uix => uix
                                                          .IndexSettings(ixs => ixs
                                                                         .Analysis(ana => ana
                                                                                   .TokenFilters(tf => tf
                                                                                                 .Stemmer("possessive", stm => stm
                                                                                                          .Language("possessive_english"))))));

                    if (!response.IsValid)
                    {
                        Logger.Error($"Could not set up possessives for {client.IndexName}: {response.DebugInformation}");
                    }
                }

                // next, lowercase
                if (new[] { "greek", "irish", "turkish" }.Contains(language))
                {
                    response = client.UpdateIndexSettings(client.IndexName, uix => uix
                                                          .IndexSettings(ixs => ixs
                                                                         .Analysis(ana => ana
                                                                                   .TokenFilters(tf => tf
                                                                                                 .Lowercase("custom_lowercase", stm => stm
                                                                                                            .Language(language))))));

                    if (!response.IsValid)
                    {
                        Logger.Error("Could not set up lowercases for " + client.IndexName + ": " + response.DebugInformation);
                    }
                }
            }

            response = client.UpdateIndexSettings(client.IndexName, uix => uix
                                                  .IndexSettings(ixs => ixs
                                                                 .Analysis(ana => ana
                                                                           .TokenFilters(tf => tf
                                                                                         .Synonym("synonyms", syn => syn
                                                                                                  .Synonyms(GetSynonyms(client))))
                                                                           .Analyzers(a => a
                                                                                      .Custom("default", cad => cad
                                                                                              .Tokenizer("standard")
                                                                                              .Filters(GetFilters(language)))))));

            if (!response.IsValid)
            {
                Logger.Error($"Could not set up custom analyzers for {client.IndexName}: {response.DebugInformation}");
            }

            if (language != "persian")
            {
                return;
            }

            response = client.UpdateIndexSettings(client.IndexName, uix => uix
                                                  .IndexSettings(ixs => ixs
                                                                 .Analysis(ana => ana
                                                                           .CharFilters(cf => cf
                                                                                        .Mapping("zero_width_spaces", stm => stm
                                                                                                 .Mappings("\\u200C=> ")))
                                                                           .Analyzers(a => a
                                                                                      .Custom("default", cad => cad
                                                                                              .CharFilters("zero_width_spaces"))))));

            if (!response.IsValid)
            {
                Logger.Error("Could not set up char filters for " + client.IndexName + ": " + response.DebugInformation);
            }
        }
Beispiel #2
0
        /// <summary>
        /// Get a Vulcan client
        /// </summary>
        /// <param name="language">Pass in null for current culture, a specific culture or CultureInfo.InvariantCulture to get a client for non-language specific data</param>
        /// <returns>A Vulcan client</returns>
        public virtual IVulcanClient GetClient(CultureInfo language = null)
        {
            var cultureInfo = language ?? CultureInfo.CurrentUICulture;

            IVulcanClient storedClient;

            if (clients.TryGetValue(cultureInfo, out storedClient))
            {
                return(storedClient);
            }

            lock (lockObject)
            {
                // we now know what our culture is (current culture or invariant), but we need to choose the language analyzer
                var languageAnalyzer = VulcanHelper.GetAnalyzer(cultureInfo);
                var indexName        = VulcanHelper.GetIndexName(Index, cultureInfo);
                var settings         = CommonConnectionSettings.Service.ConnectionSettings;
                settings.InferMappingFor <ContentMixin>(pd => pd.Ignore(p => p.MixinInstance));
                settings.DefaultIndex(indexName);

                var client = CreateVulcanClient(Index, settings, cultureInfo);

                // first let's check our version
                var nodesInfo = client.NodesInfo();

                if (nodesInfo?.Nodes?.Any() != true)
                {
                    throw new Exception("Could not get Nodes info to check Elasticsearch Version. Check that you are correctly connected to Elasticsearch?");
                }
                else
                {
                    var node = nodesInfo.Nodes.First();                // just use first

                    if (string.IsNullOrWhiteSpace(node.Value.Version)) // just use first
                    {
                        throw new Exception("Could not find a version on node to check Elasticsearch Version. Check that you are correctly connected to Elasticsearch?");
                    }
                    else
                    {
                        if (node.Value.Version.StartsWith("1."))
                        {
                            throw new Exception("Sorry, Vulcan only works with Elasticsearch version 2.x or higher. The Elasticsearch node you are currently connected to is version " + node.Value.Version);
                        }
                    }
                }

                client.RunCustomIndexTemplates(Index, Logger);

                // keep our base last with lowest possible Order
                client.PutIndexTemplate($"{Index}_analyzer_disabling", ad => ad
                                        .Order(0)
                                        .Template($"{Index}*") //match on all created indices for index name
                                        .Mappings(mappings => mappings.Map("_default_", map => map.DynamicTemplates(
                                                                               dyn => dyn.DynamicTemplate("analyzer_template", dt => dt
                                                                                                          .Match("*")                 //matches all fields
                                                                                                          .MatchMappingType("string") //that are a string
                                                                                                          .Mapping(dynmap => dynmap.String(s => s
                                                                                                                                           .NotAnalyzed()
                                                                                                                                           .IgnoreAbove(CreateIndexCustomizer.Service.IgnoreAbove) // needed for: document contains at least one immense term in field
                                                                                                                                           .IncludeInAll(false)
                                                                                                                                           .Fields(f => f
                                                                                                                                                   .String(ana => ana
                                                                                                                                                           .Name(VulcanFieldConstants.AnalyzedModifier)
                                                                                                                                                           .IncludeInAll(false)
                                                                                                                                                           .Store(true)
                                                                                                                                                           )
                                                                                                                                                   ))
                                                                                                                   )
                                                                                                          )))));

                if (!client.IndexExists(indexName).Exists)
                {
                    var response = client.CreateIndex(indexName, CreateIndexCustomizer.Service.CustomizeIndex);

                    if (!response.IsValid)
                    {
                        Logger.Error("Could not create index " + indexName + ": " + response.DebugInformation);
                    }
                }

                client.Refresh(indexName);
                var closeResponse = client.CloseIndex(indexName);

                if (!closeResponse.IsValid)
                {
                    Logger.Error("Could not close index " + indexName + ": " + closeResponse.DebugInformation);
                }

                InitializeAnalyzer(client);
                client.RunCustomizers(Logger); // allows for customizations

                var openResponse = client.OpenIndex(indexName);
                var initShards   = client.ClusterHealth(x => x.WaitForActiveShards(CreateIndexCustomizer.Service.WaitForActiveShards)); // fixes empty results on first request

                clients.Add(cultureInfo, client);

                return(client);
            }
        }