private static IndexState GetEnglishState()
        {
            var settings = new IndexSettings
            {
                Analysis = new Analysis
                {
                    Tokenizers = new Tokenizers
                    {
                        {
                            "epi_ngram_tokenizer", new NGramTokenizer
                            {
                                MaxGram = 5,
                                MinGram = 3
                            }
                        }
                    },
                    TokenFilters = new TokenFilters
                    {
                        {
                            "epi_english_stopwords", new StopTokenFilter
                            {
                                StopWords = "_english_"
                            }
                        },
                        {
                            "epi_english_stemmer", new StemmerTokenFilter
                            {
                                Language = "english"
                            }
                        },
                        {
                            Names.TokenFilters.EditorSynonyms, new SynonymTokenFilter
                            {
                                IgnoreCase = true,
                                Synonyms   = SynonymHelper.ResolveSynonymsForLanguage("en"),
                                Tokenizer  = "keyword",
                                Expand     = true
                            }
                        }
                    },
                    Analyzers = new Analyzers
                    {
                        {
                            Names.Analyzers.Ngram, new CustomAnalyzer
                            {
                                CharFilter = new[] { "html_strip" },
                                Tokenizer  = "epi_ngram_tokenizer",
                                Filter     = new[]
                                {
                                    "lowercase", "epi_english_stopwords", "epi_english_stemmer"
                                }
                            }
                        },
                        {
                            Names.Analyzers.Language, new CustomAnalyzer
                            {
                                CharFilter = new[] { "html_strip" },
                                Tokenizer  = "standard",
                                Filter     = new[]
                                {
                                    "lowercase", "epi_editor_synonyms", "epi_english_stopwords", "epi_english_stemmer"
                                }
                            }
                        }
                    }
                }
            };

            return(new IndexState
            {
                Settings = settings
            });
        }
Beispiel #2
0
        public void UpdateEditorSynonyms()
        {
            foreach (var languageBranch in ServiceLocator.Current.GetInstance <ILanguageBranchRepository>().ListEnabled())
            {
                var indexName = ElasticEpiClient.Current.ContentIndexName(languageBranch.Culture.Name);

                if (!ElasticEpiClient.Current.Get().IndexExists(indexName).Exists)
                {
                    // Skip if we don't have any indexes with this name
                    continue;
                }

                ElasticEpiClient.Current.Get().CloseIndex(indexName);

                ElasticEpiClient.Current.Get().UpdateIndexSettings(null, i => i.Index(indexName)
                                                                   .IndexSettings(idx => idx
                                                                                  .Analysis(a => a
                                                                                            .TokenFilters(tf => tf.Synonym(IndexSettingsFactory.Names.TokenFilters.EditorSynonyms, s => s
                                                                                                                           .Synonyms(SynonymHelper.ResolveSynonymsForLanguage(languageBranch.Culture.Name)))))));

                ElasticEpiClient.Current.Get().OpenIndex(indexName);
            }
        }
        private static IndexState GetNorwegianState(Language variant)
        {
            var norwegianCultures = ElasticEpiLanguageHelper.GetNorwegianCultures();

            var settings = new IndexSettings
            {
                Analysis = new Analysis
                {
                    Tokenizers = new Tokenizers
                    {
                        {
                            "epi_ngram_tokenizer", new NGramTokenizer
                            {
                                MaxGram = 5,
                                MinGram = 3
                            }
                        }
                    },
                    TokenFilters = new TokenFilters
                    {
                        {
                            "epi_norwegian_stopwords", new StopTokenFilter
                            {
                                StopWordsPath = "norwegian_stop.txt"
                            }
                        },
                        {
                            "epi_norwegian_stemmer", new StemmerTokenFilter
                            {
                                Language = variant == Language.Bokmal ? "light_norwegian" : "light_nynorsk"
                            }
                        },
                        {
                            "epi_norwegian_synonyms", new SynonymTokenFilter
                            {
                                SynonymsPath = "nynorsk.txt"
                            }
                        },
                        {
                            "epi_editor_synonyms", new SynonymTokenFilter
                            {
                                IgnoreCase = true,
                                Synonyms   = SynonymHelper.ResolveSynonymsForLanguage(variant == Language.Bokmal ? norwegianCultures.Bokmal.Name : norwegianCultures.Nynorsk.Name),
                                Tokenizer  = "keyword",
                                Expand     = true
                            }
                        }
                    },
                    Analyzers = new Analyzers
                    {
                        {
                            Names.Analyzers.Ngram, new CustomAnalyzer
                            {
                                CharFilter = new[] { "html_strip" },
                                Tokenizer  = "epi_ngram_tokenizer",
                                Filter     = new[]
                                {
                                    "lowercase", "epi_norwegian_stopwords", "epi_norwegian_stemmer"
                                }
                            }
                        },
                        {
                            Names.Analyzers.Language, new CustomAnalyzer
                            {
                                CharFilter = new[] { "html_strip" },
                                Tokenizer  = "standard",
                                Filter     = new[]
                                {
                                    "lowercase", "epi_norwegian_synonyms", "epi_editor_synonyms", "epi_norwegian_stopwords", "epi_norwegian_stemmer"
                                }
                            }
                        }
                    }
                }
            };

            return(new IndexState
            {
                Settings = settings
            });
        }