public IndexStatus CreateIndex <T>() where T : BaseType, new() { if (ElasticClient.IndexExists(ElasticClient.ConnectionSettings.DefaultIndex).Exists) { return(IndexStatus.AlreadyExists); } var defaultAnalyzer = new CustomAnalyzer { Filter = new List <string> { "lowercase", "asciifolding", "word_delimiter" }, CharFilter = new List <string> { "html_strip" }, Tokenizer = "standard" }; var createIndexResponse = ElasticClient.CreateIndex(ElasticClient.ConnectionSettings.DefaultIndex, config => config.Settings(s => s .NumberOfShards(1) .NumberOfReplicas(0) .Analysis(a => a .Analyzers(b => b .UserDefined("default", defaultAnalyzer) ) )) .Mappings(m => m .Map <T>(d => d .AutoMap() ) )); return(createIndexResponse.Acknowledged ? IndexStatus.Created : IndexStatus.Failed); }
bool createIndex(string indexName, ElasticClient client) { if (client.IndexExists(i => i.Index(indexName)).Exists) { var response = client.DeleteIndex(i => i.Index(indexName)); Logger.Current.Verbose("Deleted index." + response.ConnectionStatus.ToString()); } var word_delimiter_filter = new WordDelimiterTokenFilter() { CatenateAll = true, GenerateNumberParts = false, SplitOnCaseChange = false, GenerateWordParts = false, SplitOnNumerics = false, PreserveOriginal = true }; var customAnalyzer = new CustomAnalyzer() { Filter = new List <String>() { "lowercase", "word_delimiter_filter" }, Tokenizer = "whitespace" }; var createResult = client.CreateIndex(indexName, index => index.Analysis(a => a.TokenFilters(t => t.Add("word_delimiter_filter", word_delimiter_filter)).Analyzers(an => an.Add("custom", customAnalyzer))) .AddMapping <Tag>(tmd => MapTagCompletionFields(tmd))); return(createResult.ConnectionStatus.Success); }
bool createIndex(string indexName, ElasticClient client) { if (client.IndexExists(indexName).Exists) { client.DeleteIndex(new DeleteIndexRequest(indexName)); } var customAnalyzer = new CustomAnalyzer { Filter = new List <string> { "standard", "lowercase", "stop" }, Tokenizer = "uax_url_email" }; var duplicateCheckAnalyzer = new CustomAnalyzer { Filter = new List <string> { "standard", "lowercase" }, Tokenizer = "keyword" }; var createResult = client.CreateIndex(indexName, index => index .Analysis(a => a .Analyzers(an => an .Add("custom", customAnalyzer) .Add("duplicateCheckAnalyzer", duplicateCheckAnalyzer))).NumberOfShards(5).NumberOfReplicas(1) .AddMapping <SuppressedEmail>(pmd => MapSuppressedEmailCompletionFields <SuppressedEmail>(pmd))); string qe = createResult.ConnectionStatus.ToString(); Logger.Current.Verbose(qe); return(createResult.ConnectionStatus.Success); }
private CreateIndexDescriptor AddCategoryAnalyzers(CreateIndexDescriptor descriptor) { var autoComplete = new CustomAnalyzer { Filter = new List <string> { "lowercase", "asciifolding", "autocomplete_filter" }, Tokenizer = "standard" }; var autoCompleteNative = new CustomAnalyzer { Filter = new List <string> { "lowercase", "autocomplete_filter" }, Tokenizer = "standard" }; descriptor.Analysis(x => x .TokenFilters(f => f .Add("autocomplete_filter", new EdgeNGramTokenFilter { MaxGram = 20, MinGram = 1 })) .Analyzers(a => a .Add("autocomplete", autoComplete) .Add("autocompletenative", autoCompleteNative))); return(descriptor); }
private void CreateElasticIndex(Database db, ElasticClient client) { var ret = client.IndexExists(client.ConnectionSettings.DefaultIndex); if (!ret.Exists) { var set = new IndexSettings { NumberOfReplicas = 2, NumberOfShards = 25 }; var an = new CustomAnalyzer { Tokenizer = "standard", Filter = new[] { "lowercase", "czech_stop", "czech_stemmer", "asciifolding" } }; set.Analysis = new Analysis() { Analyzers = new Analyzers(), TokenFilters = new TokenFilters(), }; set.Analysis.Analyzers.Add("default", an); set.Analysis.TokenFilters.Add("czech_stop", new StopTokenFilter() { StopWords = new string[] { "_czech_" } }); set.Analysis.TokenFilters.Add("czech_stemmer", new StemmerTokenFilter() { Language = "czech" }); var idxSt = new IndexState { Settings = set }; var res = client .CreateIndex(client.ConnectionSettings.DefaultIndex, i => i .InitializeUsing(idxSt) .Mappings(m => { switch (db) { case Database.Dokument: return(m.Map <Dokument>(map => map.AutoMap().DateDetection(false))); case Database.Osoba: return(m.Map <Osoba>(map => map.AutoMap().DateDetection(false))); case Database.Rizeni: return(m.Map <Rizeni>(map => map.AutoMap().DateDetection(false))); default: throw new ArgumentOutOfRangeException($"Unknown DB type {db.ToString()}"); } }) ); } }
public void AddAnalyser(string id, string tokenizer, string filter) { CustomAnalyzer custom = new CustomAnalyzer(); var filters = filter.Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries); custom.Filter = filters; custom.Tokenizer = tokenizer; Analyzers.Add(id, custom); }
private static IAnalyzer LowerCaseOnlyAnalyzer() { var analyzer = new CustomAnalyzer(); analyzer.Tokenizer = "whitespace"; analyzer.Filter = new List <string> { "lowercase", }; return(analyzer); }
//public static void CreateIndex() //{ // CreateIndex(defaultIndexName); //} public static void CreateIndex(ElasticClient client) { IndexSettings set = new IndexSettings(); set.NumberOfReplicas = 2; set.NumberOfShards = 25; // Create a Custom Analyzer ... var an = new CustomAnalyzer(); an.Tokenizer = "standard"; // ... with Filters from the StandardAnalyzer var filter = new List <string>(); filter.Add("lowercase"); filter.Add("czech_stop"); //an.Filter.Add("czech_keywords"); filter.Add("czech_stemmer"); filter.Add("asciifolding"); an.Filter = filter; // Add the Analyzer with a name set.Analysis = new Nest.Analysis() { Analyzers = new Analyzers(), TokenFilters = new TokenFilters(), }; set.Analysis.Analyzers.Add("default", an); set.Analysis.TokenFilters.Add("czech_stop", new StopTokenFilter() { StopWords = new string[] { "_czech_" } }); set.Analysis.TokenFilters.Add("czech_stemmer", new StemmerTokenFilter() { Language = "czech" }); IndexState idxSt = new IndexState(); idxSt.Settings = set; Nest.ICreateIndexResponse res = null; res = client .CreateIndex(client.ConnectionSettings.DefaultIndex, i => i .InitializeUsing(idxSt) .Mappings(m => m.Map("data", mm => mm .Properties(ps => ps .Date(psn => psn.Name("DbCreated")) .Keyword(psn => psn.Name("DbCreatedBy")) ) ) ) ); }
public override CreateIndexDescriptor Configure(CreateIndexDescriptor idx) { var keywordLowercaseAnalyzer = new CustomAnalyzer { Filter = new List <string> { "lowercase" }, Tokenizer = "keyword" }; return(idx .NumberOfShards(Settings.Current.ElasticSearchNumberOfShards) .NumberOfReplicas(Settings.Current.ElasticSearchNumberOfReplicas) .Analysis(descriptor => descriptor.Analyzers(bases => bases.Add(KEYWORD_LOWERCASE_ANALYZER, keywordLowercaseAnalyzer))) .AddMapping <User>(BuildMapping)); }
private static IAnalyzer DefaultAnalyzer() { var analyzer = new CustomAnalyzer(); analyzer.Tokenizer = "standard"; analyzer.Filter = new List <string> { "lowercase", "czech_stop", "czech_stemmer", "asciifolding" }; return(analyzer); }
public CreateIndexDescriptor CreateIndex(CreateIndexDescriptor idx) { var keywordLowercaseAnalyzer = new CustomAnalyzer { Filter = new List <string> { "lowercase" }, Tokenizer = "keyword" }; return(idx.Analysis(descriptor => descriptor.Analyzers(bases => bases.Add(KEYWORD_LOWERCASE, keywordLowercaseAnalyzer))) .AddMapping <Application>(GetApplicationMap) .AddMapping <Organization>(GetOrganizationMap) .AddMapping <Project>(GetProjectMap) .AddMapping <Models.Token>(GetTokenMap) .AddMapping <User>(GetUserMap) .AddMapping <WebHook>(GetWebHookMap)); }
public static void CreateIndex(ElasticClient client) { var analyzer = new CustomAnalyzer(); analyzer.Tokenizer = "standard"; analyzer.Filter = new List <string> { "lowercase", "asciifolding", "word_delimiter" }; analyzer.CharFilter = new List <string> { "html_strip" }; client.CreateIndex("news-deneme", c => c.Settings(s => s.NumberOfShards(1). NumberOfReplicas(1). Analysis(a => a.Analyzers(b => b.UserDefined("default", analyzer)))). Mappings(m => m.Map <News>(d => d.AutoMap()))); }
public void LowerCaseFilterTests(Filters.LowerCaseFilterFactory sut, Interface.IResourceLoader resourceLoader) { List<string> result = null; "Given a LowerCase filter".Given(() => { }); "when a sample text 'Bob's I.O.U.' is analyzed".When( () => { ((Interface.IFlexFilterFactory)sut).Initialize(new Dictionary<string, string>(), resourceLoader); var filters = new List<Interface.IFlexFilterFactory> { sut }; var analyzer = new CustomAnalyzer(new Tokenizers.StandardTokenizerFactory(), filters.ToArray()); result = SearchDsl.ParseTextUsingAnalyzer(analyzer, "test", "Bob's I.O.U."); }); "it should produce 2 tokens".Observation(() => result.Count.Should().Be(2)); "it should be 'bob's','i.o.u'".Observation( () => result.Should().Equal(new List<string> { "bob's", "i.o.u" })); }
public void LengthFilterTests(Filters.LengthFilterFactory sut, Interface.IResourceLoader resourceLoader) { List<string> result = null; "Given a Length Filter".Given(() => { }); "when a sample text 'turn right at Albuquerque' is analyzed with min:3 and max:7".When( () => { ((Interface.IFlexFilterFactory)sut).Initialize(new Dictionary<string, string> { { "min", "3" }, { "max", "7" } }, resourceLoader); var filters = new List<Interface.IFlexFilterFactory> { sut }; var analyzer = new CustomAnalyzer(new Tokenizers.StandardTokenizerFactory(), filters.ToArray()); result = SearchDsl.ParseTextUsingAnalyzer(analyzer, "test", "turn right at Albuquerque"); }); "it should produce 2 tokens".Observation(() => result.Count.Should().Be(2)); "it should be 'turn','right'".Observation( () => result.Should().Equal(new List<string> { "turn", "right" })); }
public void CanUseAllAnalysisComponentNames() { Run(() => { TokenizerName[] allTokenizerNames = GetAllExtensibleEnumValues <TokenizerName>(); TokenFilterName[] allTokenFilterNames = GetAllExtensibleEnumValues <TokenFilterName>(); CharFilterName[] allCharFilterNames = GetAllExtensibleEnumValues <CharFilterName>(); var analyzerWithAllTokenFiltersAndCharFilters = new CustomAnalyzer(SearchTestUtilities.GenerateName(), TokenizerName.Lowercase, allTokenFilterNames, allCharFilterNames); IEnumerable <Analyzer> analyzersWithAllTokenizers = allTokenizerNames.Select(tn => new CustomAnalyzer(SearchTestUtilities.GenerateName(), tn)); Index index = CreateTestIndex(); index.Analyzers = new[] { analyzerWithAllTokenFiltersAndCharFilters }.Concat(analyzersWithAllTokenizers).ToArray(); TestAnalysisComponents(index); }); }
public override CreateIndexDescriptor ConfigureDescriptor(CreateIndexDescriptor idx) { idx = base.ConfigureDescriptor(idx); idx.NumberOfShards(3); var keywordLowercase = new CustomAnalyzer { Filter = new List <string> { "lowercase" }, Tokenizer = "keyword" }; idx .Analysis(descriptor => descriptor .Analyzers(bases => bases .Add("keyword_lowercase", keywordLowercase) ) ); return(idx); }
public bool CreateIndex() { // To Create the index. It doesn't create duplicate. // TODO: if any attribute changes or gets added. var indexSettings = new IndexSettings(); var emailAnalyzer = new CustomAnalyzer { Filter = new List <string> { "lowercase", "uppercase", "asciifolding", "stop" }, Tokenizer = "uax_url_email" }; var analyzers = new Analyzers(); analyzers.Add("custom_email_analyzer", emailAnalyzer); var indexstate = new IndexState(); indexstate.Settings = new IndexSettings { Analysis = new Analysis { Analyzers = analyzers } }; var availa = EsClient.Indices.Exists("customer"); if (availa.Exists) { //EsClient.Indices.Delete("customer"); } //EsCoreOperation.CreateIndex(EsClient,"CustomerModel"); EsCoreOperation.RefereshIndex <CustomerModel>(() => new CustomerModel().GetMapper()); //.Properties(ps => ps.Text(t => t.Name(n => n.Email).Fields(ff => ff.Text(tt => tt.Name("emailanalyzer").Analyzer("custom_email_analyzer")))))) //UploadData(); //ps.Completion(com => com.Name(p => p.Suggest)) return(true); }
public void KeepWordFilterShouldOnlyKeepKeepwords( Filters.KeepWordsFilterFactory sut, Interface.IResourceLoader resourceLoader) { List<string> result = null; "Given a keepword filter".Given(() => { }); "when a wordlist of keepwords is passed and a sample text 'hello world test' is analyzed".When( () => { ((Interface.IFlexFilterFactory)sut).Initialize( new Dictionary<string, string> { { "filename", "wordlist.txt" } }, resourceLoader); var filters = new List<Interface.IFlexFilterFactory> { sut }; var analyzer = new CustomAnalyzer(new Tokenizers.StandardTokenizerFactory(), filters.ToArray()); result = SearchDsl.ParseTextUsingAnalyzer(analyzer, "test", "hello world test"); }); "it should produce 2 tokens".Observation(() => result.Count.Should().Be(2)); "it should remove all non keep words from the input".Then( () => result.Should().Equal(new List<string> { "hello", "world" })); }
public static void CreateIndex(ElasticClient client, IndexType idxTyp) { IndexSettings set = new IndexSettings(); set.NumberOfReplicas = 2; if (idxTyp == IndexType.DataSource) { set.NumberOfShards = 4; } else { set.NumberOfShards = 8; } // Create a Custom Analyzer ... var an = new CustomAnalyzer(); an.Tokenizer = "standard"; // ... with Filters from the StandardAnalyzer var filter = new List <string>(); filter.Add("lowercase"); filter.Add("czech_stop"); //an.Filter.Add("czech_keywords"); filter.Add("czech_stemmer"); //pouzit Hunspell filter.Add("asciifolding"); an.Filter = filter; // Add the Analyzer with a name set.Analysis = new Nest.Analysis() { Analyzers = new Analyzers(), TokenFilters = new TokenFilters(), }; set.Analysis.Analyzers.Add("default", an); set.Analysis.TokenFilters.Add("czech_stop", new StopTokenFilter() { StopWords = new string[] { "_czech_" } }); set.Analysis.TokenFilters.Add("czech_stemmer", new StemmerTokenFilter() { Language = "czech" }); //Humspell IndexState idxSt = new IndexState(); idxSt.Settings = set; CreateIndexResponse res = null; switch (idxTyp) { case IndexType.VerejneZakazky: res = client.Indices .Create(client.ConnectionSettings.DefaultIndex, i => i .InitializeUsing(idxSt) .Map <Lib.Data.VZ.VerejnaZakazka>(map => map.AutoMap().DateDetection(false)) ); break; case IndexType.ProfilZadavatele: res = client.Indices .Create(client.ConnectionSettings.DefaultIndex, i => i .InitializeUsing(idxSt) .Map <Lib.Data.VZ.ProfilZadavatele>(map => map.AutoMap().DateDetection(false)) ); break; case IndexType.Insolvence: res = client.Indices .Create(client.ConnectionSettings.DefaultIndex, i => i //todo: es7 check .InitializeUsing(idxSt) .Map <Lib.Data.Insolvence.Rizeni>(map => map.AutoMap().DateDetection(false)) ); break; case IndexType.Dotace: res = client.Indices .Create(client.ConnectionSettings.DefaultIndex, i => i //todo: es7 check .InitializeUsing(idxSt) .Map <Data.Dotace.Dotace>(map => map.AutoMap().DateDetection(false)) ); break; case IndexType.Smlouvy: res = client.Indices .Create(client.ConnectionSettings.DefaultIndex, i => i //todo: es7 check .InitializeUsing(idxSt) .Map <Lib.Data.Smlouva>(map => map.AutoMap().DateDetection(false)) ); break; case IndexType.Firmy: res = client.Indices .Create(client.ConnectionSettings.DefaultIndex, i => i //todo: es7 check .InitializeUsing(idxSt) .Map <Data.Firma.Search.FirmaInElastic>(map => map.AutoMap(maxRecursion: 1)) ); break; case IndexType.Logs: res = client.Indices .Create(client.ConnectionSettings.DefaultIndex, i => i //todo: es7 check .InitializeUsing(idxSt) .Map <Lib.Data.Logs.ProfilZadavateleDownload>(map => map.AutoMap(maxRecursion: 1)) ); break; case IndexType.VerejneZakazkyNaProfiluRaw: res = client.Indices .Create(client.ConnectionSettings.DefaultIndex, i => i //todo: es7 check .InitializeUsing(idxSt) .Map <Lib.Data.External.ProfilZadavatelu.ZakazkaRaw>(map => map .Properties(p => p .Keyword(k => k.Name(n => n.ZakazkaId)) .Keyword(k => k.Name(n => n.Profil)) .Date(k => k.Name(n => n.LastUpdate)) ) ) ); break; } }
protected void EnsureModelIndices(ElasticClient client) { base.ExecuteMethod("EnsureModelIndices", delegate() { if (debug_reset) { bool executeDebug = false; lock (debug_lock) { if (debug_reset) { executeDebug = true; } debug_reset = false; } if (executeDebug) { //client.Map<Objective>(m => m // .MapFromAttributes() // .Type(DocumentTypes.OBJECTIVES) // .Properties(props => props // .String(s => s // .Name(p => p.campaign_id) // .Index(FieldIndexOption.NotAnalyzed)) // .String(s => s // .Name(p => p.objective_id) // .Index(FieldIndexOption.NotAnalyzed) // )) // ); } } if (!this.HasEnsuredModelIndices) { lock (ensure_lock) { if (!this.HasEnsuredModelIndices) { if (!client.IndexExists(this.IndexName).Exists) { CustomAnalyzer ignoreCaseAnalyzer = new CustomAnalyzer { Tokenizer = "keyword", Filter = new[] { "lowercase" } }; Analysis analysis = new Analysis(); analysis.Analyzers = new Analyzers(); analysis.Analyzers.Add("case_insensitive", ignoreCaseAnalyzer); ICreateIndexResponse createResult = client.CreateIndex(this.IndexName, delegate(Nest.CreateIndexDescriptor descriptor) { descriptor.Settings(ss => ss .Analysis(a => analysis) .NumberOfReplicas(this.ReplicaCount) .NumberOfShards(this.ShardCount) .Setting("merge.policy.merge_factor", "10") .Setting("search.slowlog.threshold.fetch.warn", "1s") .Setting("max_result_window", "2147483647") ); this.MapIndexModels(descriptor); return(descriptor); }); if (!createResult.Acknowledged) { throw new Exception("Error creating index, mapping is no longer valid"); } } HasEnsuredModelIndices = true; } } } }); }
public static void CreateIndex(ElasticClient client, IndexType idxTyp) { IndexSettings set = new IndexSettings(); set.NumberOfReplicas = 2; if (idxTyp == IndexType.DataSource) { set.NumberOfShards = 5; } else { set.NumberOfShards = 10; } // Create a Custom Analyzer ... var an = new CustomAnalyzer(); an.Tokenizer = "standard"; // ... with Filters from the StandardAnalyzer var filter = new List <string>(); filter.Add("lowercase"); filter.Add("czech_stop"); //an.Filter.Add("czech_keywords"); filter.Add("czech_stemmer"); filter.Add("asciifolding"); an.Filter = filter; // Add the Analyzer with a name set.Analysis = new Nest.Analysis() { Analyzers = new Analyzers(), TokenFilters = new TokenFilters(), }; set.Analysis.Analyzers.Add("default", an); set.Analysis.TokenFilters.Add("czech_stop", new StopTokenFilter() { StopWords = new string[] { "_czech_" } }); set.Analysis.TokenFilters.Add("czech_stemmer", new StemmerTokenFilter() { Language = "czech" }); IndexState idxSt = new IndexState(); idxSt.Settings = set; Nest.ICreateIndexResponse res = null; switch (idxTyp) { //case IndexType.VerejneZakazkyRaw2006: // res = client // .CreateIndex(client.ConnectionSettings.DefaultIndex, i => i // .InitializeUsing(idxSt) // .Mappings(m => m // .Map<Lib.Data.VZ.VerejnaZakazka.ImportXMLpre2016.VerejnaZakazka2006>(map => map.AutoMap().DateDetection(false)) // .Map<Lib.Data.VZ.VerejnaZakazka.ImportXMLpre2016.CastiVerejneZakazky2006>(map => map.AutoMap().DateDetection(false)) // ) // ); // break; case IndexType.VerejneZakazky: res = client .CreateIndex(client.ConnectionSettings.DefaultIndex, i => i .InitializeUsing(idxSt) .Mappings(m => m .Map <Lib.Data.VZ.VerejnaZakazka>(map => map.AutoMap().DateDetection(false)) .Map <Lib.Data.VZ.ProfilZadavatele>(map => map.AutoMap().DateDetection(false)) ) ); break; case IndexType.Smlouvy: res = client .CreateIndex(client.ConnectionSettings.DefaultIndex, i => i .InitializeUsing(idxSt) .Mappings(m => m //.Map("_default_", mm => mm.TtlField(ttl => ttl.Enable(false))) .Map <Lib.Data.Smlouva>(map => map //.TtlField(ttl => ttl.Enable(false)) .AutoMap() .DateDetection(false) ) //.Map<Person>(map => map.AutoMap(maxRecursion: 1)) //.Map<VerejnaZakazka>(map => map.AutoMap(maxRecursion: 1).DateDetection(false)) ) ); break; case IndexType.Firmy: res = client .CreateIndex(client.ConnectionSettings.DefaultIndex, i => i .InitializeUsing(idxSt) .Mappings(m => m .Map <Data.Firma.Search.FirmaInElastic>(map => map.AutoMap(maxRecursion: 1)) ) ); break; case IndexType.Ucty: res = client .CreateIndex(client.ConnectionSettings.DefaultIndex, i => i .InitializeUsing(idxSt) .Mappings(m => m .Map <Lib.Data.TransparentniUcty.BankovniUcet>(map => map.AutoMap(maxRecursion: 1)) .Map <Lib.Data.TransparentniUcty.BankovniPolozka>(map => map.AutoMap(maxRecursion: 1)) ) ); break; case IndexType.Logs: res = client .CreateIndex(client.ConnectionSettings.DefaultIndex, i => i .InitializeUsing(idxSt) .Mappings(m => m .Map <Lib.Data.Logs.ProfilZadavateleDownload>(map => map.AutoMap(maxRecursion: 1)) ) ); break; case IndexType.VerejneZakazkyNaProfiluRaw: res = client .CreateIndex(client.ConnectionSettings.DefaultIndex, i => i .InitializeUsing(idxSt) .Mappings(m => m .Map <Lib.Data.External.ProfilZadavatelu.ZakazkaRaw>(map => map .Properties(p => p .Keyword(k => k.Name(n => n.ZakazkaId)) .Keyword(k => k.Name(n => n.Profil)) .Date(k => k.Name(n => n.LastUpdate)) ) ) ) ); break; } //Console.WriteLine(res.IsValid); }
public void ReverseStringTests(Filters.ReverseStringFilterFactory sut, Interface.IResourceLoader resourceLoader) { List<string> result = null; "Given a ReverseString Filter".Given(() => { }); "when a sample text 'hello how are you' is analyzed".When( () => { ((Interface.IFlexFilterFactory)sut).Initialize(new Dictionary<string, string>(), resourceLoader); var filters = new List<Interface.IFlexFilterFactory> { sut }; var analyzer = new CustomAnalyzer(new Tokenizers.StandardTokenizerFactory(), filters.ToArray()); result = SearchDsl.ParseTextUsingAnalyzer(analyzer, "test", "hello how are you"); }); "it should produce 4 tokens".Observation(() => result.Count.Should().Be(4)); "it should be 'olleh', 'woh', 'era', 'uoy' ".Observation( () => result.Should().Equal(new List<string> { "olleh", "woh", "era", "uoy" })); }
/// <summary> /// Create the default index if it doesnt already exist /// </summary> /// <returns>The existing or new index</returns> private async Task CreateIndexIfNotExistsAsync(ISearchServiceClient serviceClient, string indexName) { if (Disabled) { throw new Exception($"{nameof(AzureEmployerSearchRepository)} is disabled"); } if (await serviceClient.Indexes.ExistsAsync(indexName)) { return; } var index = new Index { Name = indexName, Fields = FieldBuilder.BuildForType <EmployerSearchModel>() }; index.Suggesters = new List <Suggester> { new Suggester( suggestorName, nameof(EmployerSearchModel.Name), nameof(EmployerSearchModel.PreviousName), nameof(EmployerSearchModel.Abbreviations)) }; var charFilterRemoveAmpersand = new MappingCharFilter("gpg_remove_Ampersand", new List <string> { "&=>" }); var charFilterRemoveDot = new MappingCharFilter("gpg_remove_Dot", new List <string> { ".=>" }); var charFilterRemoveLtdInfoCaseInsensitive = new PatternReplaceCharFilter( "gpg_patternReplaceCharFilter_Ltd", "(?i)(limited|ltd|llp| uk|\\(uk\\)|-uk)[\\.]*", string.Empty); // case insensitive 'limited' 'ltd', 'llp', ' uk', '(uk)', '-uk' followed by zero or more dots (to cater for ltd. and some mis-punctuated limited..) var charFilterRemoveWhitespace = new PatternReplaceCharFilter( "gpg_patternReplaceCharFilter_removeWhitespace", "\\s", string.Empty); index.CharFilters = new List <CharFilter> { charFilterRemoveAmpersand, charFilterRemoveDot, charFilterRemoveLtdInfoCaseInsensitive, charFilterRemoveWhitespace }; var edgeNGramTokenFilterFront = new EdgeNGramTokenFilterV2("gpg_edgeNGram_front", 3, 300, EdgeNGramTokenFilterSide.Front); var edgeNGramTokenFilterBack = new EdgeNGramTokenFilterV2("gpg_edgeNGram_back", 3, 300, EdgeNGramTokenFilterSide.Back); index.TokenFilters = new List <TokenFilter> { edgeNGramTokenFilterFront, edgeNGramTokenFilterBack }; var standardTokenizer = new StandardTokenizerV2("gpg_standard_v2_tokenizer"); var keywordTokenizer = new KeywordTokenizerV2("gpg_keyword_v2_tokenizer"); index.Tokenizers = new List <Tokenizer> { standardTokenizer, keywordTokenizer }; var suffixAnalyzer = new CustomAnalyzer( "gpg_suffix", standardTokenizer.Name, new List <TokenFilterName> { TokenFilterName.Lowercase, edgeNGramTokenFilterBack.Name }, new List <CharFilterName> { charFilterRemoveAmpersand.Name, charFilterRemoveLtdInfoCaseInsensitive.Name }); var completeTokenAnalyzer = new CustomAnalyzer( "gpg_prefix_completeToken", keywordTokenizer.Name, new List <TokenFilterName> { TokenFilterName.Lowercase, edgeNGramTokenFilterFront.Name }, new List <CharFilterName> { charFilterRemoveDot.Name, charFilterRemoveAmpersand.Name, charFilterRemoveLtdInfoCaseInsensitive.Name, charFilterRemoveWhitespace.Name }); index.Analyzers = new List <Analyzer> { suffixAnalyzer, completeTokenAnalyzer }; index.Fields.First(f => f.Name == nameof(EmployerSearchModel.PartialNameForSuffixSearches)).Analyzer = suffixAnalyzer.Name; index.Fields.First(f => f.Name == nameof(EmployerSearchModel.PartialNameForSuffixSearches)).SynonymMaps = new[] { synonymMapName }; index.Fields.First(f => f.Name == nameof(EmployerSearchModel.PartialNameForCompleteTokenSearches)) .Analyzer = completeTokenAnalyzer.Name; index.Fields.First(f => f.Name == nameof(EmployerSearchModel.PartialNameForCompleteTokenSearches)) .SynonymMaps = new[] { synonymMapName }; index.Fields.First(f => f.Name == nameof(EmployerSearchModel.Name)).SynonymMaps = new[] { synonymMapName }; index.Fields.First(f => f.Name == nameof(EmployerSearchModel.PreviousName)).SynonymMaps = new[] { synonymMapName }; //Add the synonyms if they dont already exist if (!await serviceClient.SynonymMaps.ExistsAsync(synonymMapName)) { serviceClient.SynonymMaps.CreateOrUpdate( new SynonymMap { Name = synonymMapName, //Format = "solr", cannot set after upgrade from v5.03 to version 9.0.0 Synonyms = "coop, co-operative" }); } await serviceClient.Indexes.CreateAsync(index); }
public void SynonymFilterShouldGenerateSynonym( Filters.SynonymFilter sut, Interface.IResourceLoader resourceLoader) { List<string> result = null; "Given a Synonym filter".Given(() => { }); "when a wordlist of Synonym is passed and a sample text 'easy' is analyzed".When( () => { ((Interface.IFlexFilterFactory)sut).Initialize( new Dictionary<string, string> { { "filename", "synonymlist.txt" } }, resourceLoader); var filters = new List<Interface.IFlexFilterFactory> { sut }; var analyzer = new CustomAnalyzer(new Tokenizers.StandardTokenizerFactory(), filters.ToArray()); result = SearchDsl.ParseTextUsingAnalyzer(analyzer, "test", "easy"); }); "it should produce 3 tokens".Observation(() => result.Count.Should().Be(3)); "it should generate new tokens for the synonmyns".Then( () => result.Should().Equal(new List<string> { "easy", "simple", "clear" })); }
public void Run() { _logger.Info("Opening file {0}", _filename); using (TextReader reader = File.OpenText(_filename)) { var csv = new CsvReader(reader); csv.Configuration.RegisterClassMap <LocationMapper>(); _logger.Debug("Reading..."); var allCsvRows = csv.GetRecords <LocationData>().ToList(); _logger.Info("Read {0} records from file", allCsvRows.Count); _logger.Debug("Connecting to {0}", _endpoint); var settings = new ConnectionSettings(_endpoint); settings.SetDefaultIndex(_indexName); var client = new ElasticClient(settings); _logger.Debug("Checking if index already exists"); if (client.IndexExists(_indexName).Exists) { if (!_append) { _logger.Debug("Deleting existing index"); client.DeleteIndex(_indexName); } } else { if (_append) { throw new Exception("Cannot append to existing data because there is no existing index"); } } if (_append) { _logger.Debug("Appending to existing index"); } else { _logger.Debug("Creating new index"); var indexSettings = new IndexSettings(); var keywordLowercaseCustomAnalyzer = new CustomAnalyzer { Tokenizer = "keyword", Filter = new[] { "lowercase" } }; indexSettings.Analysis.Analyzers.Add("keywordlowercase", keywordLowercaseCustomAnalyzer); client.CreateIndex(i => i.Index(_indexName).InitializeUsing(indexSettings)); client.Map <LocationData>(p => p.Index(_indexName).MapFromAttributes()); } _logger.Debug("Indexing \"{0}\" in batches of {1}...", _indexName, _batchSize); var loop = 0; while (true) { var batch = allCsvRows.Skip(loop * _batchSize).Take(_batchSize).ToList(); if (!batch.Any()) { break; } var result = client.IndexMany(batch, _indexName); _logger.Debug("Indexed {0} records in {1}ms", result.Items.Count(), result.Took); loop++; } _logger.Info("Indexed {0} records into \"{1}\" at {2}", allCsvRows.Count, _indexName, _endpoint); } }
public void PatternReplaceTests(Filters.PatternReplaceFilterFactory sut, Interface.IResourceLoader resourceLoader) { List<string> result = null; "Given a PatternReplace Filter".Given(() => { }); "when a sample text 'cat concatenate catycat' is analyzed with pattern:cat and replacementtext:dog".When( () => { ((Interface.IFlexFilterFactory)sut).Initialize(new Dictionary<string, string> { { "pattern", "cat" }, { "replacementtext", "dog" } }, resourceLoader); var filters = new List<Interface.IFlexFilterFactory> { sut }; var analyzer = new CustomAnalyzer(new Tokenizers.StandardTokenizerFactory(), filters.ToArray()); result = SearchDsl.ParseTextUsingAnalyzer(analyzer, "test", "cat concatenate catycat"); }); "it should produce 3 tokens".Observation(() => result.Count.Should().Be(3)); "it should be 'turn','right'".Observation( () => result.Should().Equal(new List<string> { "dog", "condogenate", "dogydog" })); }
public void GenericTokenizerTests( string tokenizerName, Interface.IFlexTokenizerFactory tokenizerFactory, string parseString, List<string> expected) { List<string> result = null; (string.Format("Given a {0}", tokenizerName)).Given(() => { }); (string.Format("when a sample text {0} is analyzed", parseString)).When( () => { // Creating a dummy filter which won't do anything so that we can test the effect of tokenizer // in a stand alone manner Interface.IFlexFilterFactory filter = new Filters.PatternReplaceFilterFactory(); filter.Initialize( new Dictionary<string, string> { { "pattern", "1" }, { "replacementtext", "" } }, new Factories.ResourceLoader()); var filters = new List<Interface.IFlexFilterFactory> { filter }; var analyzer = new CustomAnalyzer(tokenizerFactory, filters.ToArray()); result = SearchDsl.ParseTextUsingAnalyzer(analyzer, "test", parseString); }); string.Format("it should produce {0} tokens", expected.Count) .Observation(() => result.Count.Should().Be(expected.Count)); string.Format("it should be '{0}'", string.Join("',", expected)).Observation(() => result.Should().Equal(expected)); }