示例#1
0
        public IndexStatus CreateIndex <T>() where T : BaseType, new()
        {
            if (ElasticClient.IndexExists(ElasticClient.ConnectionSettings.DefaultIndex).Exists)
            {
                return(IndexStatus.AlreadyExists);
            }

            var defaultAnalyzer = new CustomAnalyzer
            {
                Filter = new List <string> {
                    "lowercase", "asciifolding", "word_delimiter"
                },
                CharFilter = new List <string> {
                    "html_strip"
                },
                Tokenizer = "standard"
            };

            var createIndexResponse = ElasticClient.CreateIndex(ElasticClient.ConnectionSettings.DefaultIndex,
                                                                config => config.Settings(s => s
                                                                                          .NumberOfShards(1)
                                                                                          .NumberOfReplicas(0)
                                                                                          .Analysis(a => a
                                                                                                    .Analyzers(b => b
                                                                                                               .UserDefined("default", defaultAnalyzer)
                                                                                                               )
                                                                                                    ))
                                                                .Mappings(m => m
                                                                          .Map <T>(d => d
                                                                                   .AutoMap()
                                                                                   )
                                                                          ));

            return(createIndexResponse.Acknowledged ? IndexStatus.Created : IndexStatus.Failed);
        }
示例#2
0
        bool createIndex(string indexName, ElasticClient client)
        {
            if (client.IndexExists(i => i.Index(indexName)).Exists)
            {
                var response = client.DeleteIndex(i => i.Index(indexName));
                Logger.Current.Verbose("Deleted index." + response.ConnectionStatus.ToString());
            }

            var word_delimiter_filter = new WordDelimiterTokenFilter()
            {
                CatenateAll = true, GenerateNumberParts = false, SplitOnCaseChange = false, GenerateWordParts = false, SplitOnNumerics = false, PreserveOriginal = true
            };
            var customAnalyzer = new CustomAnalyzer()
            {
                Filter = new List <String>()
                {
                    "lowercase", "word_delimiter_filter"
                },
                Tokenizer = "whitespace"
            };
            var createResult = client.CreateIndex(indexName, index => index.Analysis(a => a.TokenFilters(t => t.Add("word_delimiter_filter", word_delimiter_filter)).Analyzers(an => an.Add("custom", customAnalyzer)))
                                                  .AddMapping <Tag>(tmd => MapTagCompletionFields(tmd)));

            return(createResult.ConnectionStatus.Success);
        }
        bool createIndex(string indexName, ElasticClient client)
        {
            if (client.IndexExists(indexName).Exists)
            {
                client.DeleteIndex(new DeleteIndexRequest(indexName));
            }

            var customAnalyzer = new CustomAnalyzer
            {
                Filter = new List <string> {
                    "standard", "lowercase", "stop"
                },
                Tokenizer = "uax_url_email"
            };

            var duplicateCheckAnalyzer = new CustomAnalyzer
            {
                Filter = new List <string> {
                    "standard", "lowercase"
                },
                Tokenizer = "keyword"
            };
            var createResult = client.CreateIndex(indexName, index => index
                                                  .Analysis(a => a
                                                            .Analyzers(an => an
                                                                       .Add("custom", customAnalyzer)
                                                                       .Add("duplicateCheckAnalyzer", duplicateCheckAnalyzer))).NumberOfShards(5).NumberOfReplicas(1)
                                                  .AddMapping <SuppressedEmail>(pmd => MapSuppressedEmailCompletionFields <SuppressedEmail>(pmd)));

            string qe = createResult.ConnectionStatus.ToString();

            Logger.Current.Verbose(qe);
            return(createResult.ConnectionStatus.Success);
        }
        private CreateIndexDescriptor AddCategoryAnalyzers(CreateIndexDescriptor descriptor)
        {
            var autoComplete = new CustomAnalyzer
            {
                Filter = new List <string> {
                    "lowercase", "asciifolding", "autocomplete_filter"
                },
                Tokenizer = "standard"
            };

            var autoCompleteNative = new CustomAnalyzer
            {
                Filter = new List <string> {
                    "lowercase", "autocomplete_filter"
                },
                Tokenizer = "standard"
            };

            descriptor.Analysis(x => x
                                .TokenFilters(f => f
                                              .Add("autocomplete_filter", new EdgeNGramTokenFilter {
                MaxGram = 20, MinGram = 1
            }))
                                .Analyzers(a => a
                                           .Add("autocomplete", autoComplete)
                                           .Add("autocompletenative", autoCompleteNative)));

            return(descriptor);
        }
示例#5
0
        private void CreateElasticIndex(Database db, ElasticClient client)
        {
            var ret = client.IndexExists(client.ConnectionSettings.DefaultIndex);

            if (!ret.Exists)
            {
                var set = new IndexSettings
                {
                    NumberOfReplicas = 2,
                    NumberOfShards   = 25
                };
                var an = new CustomAnalyzer
                {
                    Tokenizer = "standard",
                    Filter    = new[] { "lowercase", "czech_stop", "czech_stemmer", "asciifolding" }
                };
                set.Analysis = new Analysis()
                {
                    Analyzers    = new Analyzers(),
                    TokenFilters = new TokenFilters(),
                };
                set.Analysis.Analyzers.Add("default", an);
                set.Analysis.TokenFilters.Add("czech_stop", new StopTokenFilter()
                {
                    StopWords = new string[] { "_czech_" }
                });
                set.Analysis.TokenFilters.Add("czech_stemmer", new StemmerTokenFilter()
                {
                    Language = "czech"
                });
                var idxSt = new IndexState {
                    Settings = set
                };

                var res = client
                          .CreateIndex(client.ConnectionSettings.DefaultIndex, i => i
                                       .InitializeUsing(idxSt)
                                       .Mappings(m =>
                {
                    switch (db)
                    {
                    case Database.Dokument:
                        return(m.Map <Dokument>(map => map.AutoMap().DateDetection(false)));

                    case Database.Osoba:
                        return(m.Map <Osoba>(map => map.AutoMap().DateDetection(false)));

                    case Database.Rizeni:
                        return(m.Map <Rizeni>(map => map.AutoMap().DateDetection(false)));

                    default:
                        throw new ArgumentOutOfRangeException($"Unknown DB type {db.ToString()}");
                    }
                })
                                       );
            }
        }
        public void AddAnalyser(string id, string tokenizer, string filter)
        {
            CustomAnalyzer custom  = new CustomAnalyzer();
            var            filters = filter.Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries);

            custom.Filter    = filters;
            custom.Tokenizer = tokenizer;
            Analyzers.Add(id, custom);
        }
示例#7
0
        private static IAnalyzer LowerCaseOnlyAnalyzer()
        {
            var analyzer = new CustomAnalyzer();

            analyzer.Tokenizer = "whitespace";
            analyzer.Filter    = new List <string>
            {
                "lowercase",
            };
            return(analyzer);
        }
示例#8
0
        //public static void CreateIndex()
        //{
        //    CreateIndex(defaultIndexName);
        //}

        public static void CreateIndex(ElasticClient client)
        {
            IndexSettings set = new IndexSettings();

            set.NumberOfReplicas = 2;
            set.NumberOfShards   = 25;
            // Create a Custom Analyzer ...
            var an = new CustomAnalyzer();

            an.Tokenizer = "standard";
            // ... with Filters from the StandardAnalyzer
            var filter = new List <string>();

            filter.Add("lowercase");
            filter.Add("czech_stop");
            //an.Filter.Add("czech_keywords");
            filter.Add("czech_stemmer");
            filter.Add("asciifolding");
            an.Filter = filter;
            // Add the Analyzer with a name
            set.Analysis = new Nest.Analysis()
            {
                Analyzers    = new Analyzers(),
                TokenFilters = new TokenFilters(),
            };

            set.Analysis.Analyzers.Add("default", an);
            set.Analysis.TokenFilters.Add("czech_stop", new StopTokenFilter()
            {
                StopWords = new string[] { "_czech_" }
            });
            set.Analysis.TokenFilters.Add("czech_stemmer", new StemmerTokenFilter()
            {
                Language = "czech"
            });
            IndexState idxSt = new IndexState();

            idxSt.Settings = set;

            Nest.ICreateIndexResponse res = null;
            res = client
                  .CreateIndex(client.ConnectionSettings.DefaultIndex, i => i
                               .InitializeUsing(idxSt)
                               .Mappings(m => m.Map("data", mm => mm
                                                    .Properties(ps => ps
                                                                .Date(psn => psn.Name("DbCreated"))
                                                                .Keyword(psn => psn.Name("DbCreatedBy"))
                                                                )
                                                    )
                                         )
                               );
        }
示例#9
0
        public override CreateIndexDescriptor Configure(CreateIndexDescriptor idx)
        {
            var keywordLowercaseAnalyzer = new CustomAnalyzer {
                Filter = new List <string> {
                    "lowercase"
                }, Tokenizer = "keyword"
            };

            return(idx
                   .NumberOfShards(Settings.Current.ElasticSearchNumberOfShards)
                   .NumberOfReplicas(Settings.Current.ElasticSearchNumberOfReplicas)
                   .Analysis(descriptor => descriptor.Analyzers(bases => bases.Add(KEYWORD_LOWERCASE_ANALYZER, keywordLowercaseAnalyzer)))
                   .AddMapping <User>(BuildMapping));
        }
示例#10
0
        private static IAnalyzer DefaultAnalyzer()
        {
            var analyzer = new CustomAnalyzer();

            analyzer.Tokenizer = "standard";
            analyzer.Filter    = new List <string>
            {
                "lowercase",
                "czech_stop",
                "czech_stemmer",
                "asciifolding"
            };
            return(analyzer);
        }
        public CreateIndexDescriptor CreateIndex(CreateIndexDescriptor idx)
        {
            var keywordLowercaseAnalyzer = new CustomAnalyzer {
                Filter = new List <string> {
                    "lowercase"
                }, Tokenizer = "keyword"
            };

            return(idx.Analysis(descriptor => descriptor.Analyzers(bases => bases.Add(KEYWORD_LOWERCASE, keywordLowercaseAnalyzer)))
                   .AddMapping <Application>(GetApplicationMap)
                   .AddMapping <Organization>(GetOrganizationMap)
                   .AddMapping <Project>(GetProjectMap)
                   .AddMapping <Models.Token>(GetTokenMap)
                   .AddMapping <User>(GetUserMap)
                   .AddMapping <WebHook>(GetWebHookMap));
        }
        public static void CreateIndex(ElasticClient client)
        {
            var analyzer = new CustomAnalyzer();

            analyzer.Tokenizer = "standard";
            analyzer.Filter    = new List <string> {
                "lowercase", "asciifolding", "word_delimiter"
            };
            analyzer.CharFilter = new List <string> {
                "html_strip"
            };

            client.CreateIndex("news-deneme", c => c.Settings(s => s.NumberOfShards(1).
                                                              NumberOfReplicas(1).
                                                              Analysis(a => a.Analyzers(b => b.UserDefined("default", analyzer)))).
                               Mappings(m => m.Map <News>(d => d.AutoMap())));
        }
示例#13
0
        public void LowerCaseFilterTests(Filters.LowerCaseFilterFactory sut, Interface.IResourceLoader resourceLoader)
        {
            List<string> result = null;

            "Given a LowerCase filter".Given(() => { });
            "when a sample text 'Bob's I.O.U.' is analyzed".When(
                () =>
                {
                    ((Interface.IFlexFilterFactory)sut).Initialize(new Dictionary<string, string>(), resourceLoader);
                    var filters = new List<Interface.IFlexFilterFactory> { sut };
                    var analyzer = new CustomAnalyzer(new Tokenizers.StandardTokenizerFactory(), filters.ToArray());
                    result = SearchDsl.ParseTextUsingAnalyzer(analyzer, "test", "Bob's I.O.U.");
                });

            "it should produce 2 tokens".Observation(() => result.Count.Should().Be(2));
            "it should be 'bob's','i.o.u'".Observation(
                () => result.Should().Equal(new List<string> { "bob's", "i.o.u" }));
        }
示例#14
0
        public void LengthFilterTests(Filters.LengthFilterFactory sut, Interface.IResourceLoader resourceLoader)
        {
            List<string> result = null;

            "Given a Length Filter".Given(() => { });
            "when a sample text 'turn right at Albuquerque' is analyzed with min:3 and max:7".When(
                () =>
                {
                    ((Interface.IFlexFilterFactory)sut).Initialize(new Dictionary<string, string> { { "min", "3" }, { "max", "7" } }, resourceLoader);
                    var filters = new List<Interface.IFlexFilterFactory> { sut };
                    var analyzer = new CustomAnalyzer(new Tokenizers.StandardTokenizerFactory(), filters.ToArray());
                    result = SearchDsl.ParseTextUsingAnalyzer(analyzer, "test", "turn right at Albuquerque");
                });

            "it should produce 2 tokens".Observation(() => result.Count.Should().Be(2));
            "it should be 'turn','right'".Observation(
                () => result.Should().Equal(new List<string> { "turn", "right" }));
        }
        public void CanUseAllAnalysisComponentNames()
        {
            Run(() =>
            {
                TokenizerName[] allTokenizerNames     = GetAllExtensibleEnumValues <TokenizerName>();
                TokenFilterName[] allTokenFilterNames = GetAllExtensibleEnumValues <TokenFilterName>();
                CharFilterName[] allCharFilterNames   = GetAllExtensibleEnumValues <CharFilterName>();

                var analyzerWithAllTokenFiltersAndCharFilters =
                    new CustomAnalyzer(SearchTestUtilities.GenerateName(), TokenizerName.Lowercase, allTokenFilterNames, allCharFilterNames);

                IEnumerable <Analyzer> analyzersWithAllTokenizers =
                    allTokenizerNames.Select(tn => new CustomAnalyzer(SearchTestUtilities.GenerateName(), tn));

                Index index     = CreateTestIndex();
                index.Analyzers = new[] { analyzerWithAllTokenFiltersAndCharFilters }.Concat(analyzersWithAllTokenizers).ToArray();

                TestAnalysisComponents(index);
            });
        }
        public override CreateIndexDescriptor ConfigureDescriptor(CreateIndexDescriptor idx)
        {
            idx = base.ConfigureDescriptor(idx);
            idx.NumberOfShards(3);

            var keywordLowercase = new CustomAnalyzer {
                Filter = new List <string> {
                    "lowercase"
                },
                Tokenizer = "keyword"
            };

            idx
            .Analysis(descriptor => descriptor
                      .Analyzers(bases => bases
                                 .Add("keyword_lowercase", keywordLowercase)
                                 )
                      );

            return(idx);
        }
示例#17
0
        public bool CreateIndex()
        {
            // To Create the index. It doesn't create duplicate.
            // TODO: if any attribute changes or gets added.
            var indexSettings = new IndexSettings();
            var emailAnalyzer = new CustomAnalyzer
            {
                Filter = new List <string> {
                    "lowercase", "uppercase", "asciifolding", "stop"
                },
                Tokenizer = "uax_url_email"
            };

            var analyzers = new Analyzers();

            analyzers.Add("custom_email_analyzer", emailAnalyzer);
            var indexstate = new IndexState();

            indexstate.Settings = new IndexSettings
            {
                Analysis = new Analysis
                {
                    Analyzers = analyzers
                }
            };

            var availa = EsClient.Indices.Exists("customer");

            if (availa.Exists)
            {
                //EsClient.Indices.Delete("customer");
            }
            //EsCoreOperation.CreateIndex(EsClient,"CustomerModel");
            EsCoreOperation.RefereshIndex <CustomerModel>(() => new CustomerModel().GetMapper());
            //.Properties(ps => ps.Text(t => t.Name(n => n.Email).Fields(ff => ff.Text(tt => tt.Name("emailanalyzer").Analyzer("custom_email_analyzer"))))))
            //UploadData();
            //ps.Completion(com => com.Name(p => p.Suggest))

            return(true);
        }
示例#18
0
        public void KeepWordFilterShouldOnlyKeepKeepwords(
            Filters.KeepWordsFilterFactory sut,
            Interface.IResourceLoader resourceLoader)
        {
            List<string> result = null;

            "Given a keepword filter".Given(() => { });
            "when a wordlist of keepwords is passed and a sample text 'hello world test' is analyzed".When(
                () =>
                {
                    ((Interface.IFlexFilterFactory)sut).Initialize(
                        new Dictionary<string, string> { { "filename", "wordlist.txt" } },
                        resourceLoader);
                    var filters = new List<Interface.IFlexFilterFactory> { sut };
                    var analyzer = new CustomAnalyzer(new Tokenizers.StandardTokenizerFactory(), filters.ToArray());
                    result = SearchDsl.ParseTextUsingAnalyzer(analyzer, "test", "hello world test");
                });

            "it should produce 2 tokens".Observation(() => result.Count.Should().Be(2));
            "it should remove all non keep words from the input".Then(
                () => result.Should().Equal(new List<string> { "hello", "world" }));
        }
示例#19
0
        public static void CreateIndex(ElasticClient client, IndexType idxTyp)
        {
            IndexSettings set = new IndexSettings();

            set.NumberOfReplicas = 2;
            if (idxTyp == IndexType.DataSource)
            {
                set.NumberOfShards = 4;
            }
            else
            {
                set.NumberOfShards = 8;
            }
            // Create a Custom Analyzer ...
            var an = new CustomAnalyzer();

            an.Tokenizer = "standard";
            // ... with Filters from the StandardAnalyzer
            var filter = new List <string>();

            filter.Add("lowercase");
            filter.Add("czech_stop");
            //an.Filter.Add("czech_keywords");
            filter.Add("czech_stemmer"); //pouzit Hunspell
            filter.Add("asciifolding");
            an.Filter = filter;
            // Add the Analyzer with a name
            set.Analysis = new Nest.Analysis()
            {
                Analyzers    = new Analyzers(),
                TokenFilters = new TokenFilters(),
            };

            set.Analysis.Analyzers.Add("default", an);
            set.Analysis.TokenFilters.Add("czech_stop", new StopTokenFilter()
            {
                StopWords = new string[] { "_czech_" }
            });
            set.Analysis.TokenFilters.Add("czech_stemmer", new StemmerTokenFilter()
            {
                Language = "czech"
            });                                                                                              //Humspell
            IndexState idxSt = new IndexState();

            idxSt.Settings = set;

            CreateIndexResponse res = null;

            switch (idxTyp)
            {
            case IndexType.VerejneZakazky:
                res = client.Indices
                      .Create(client.ConnectionSettings.DefaultIndex, i => i
                              .InitializeUsing(idxSt)
                              .Map <Lib.Data.VZ.VerejnaZakazka>(map => map.AutoMap().DateDetection(false))
                              );
                break;

            case IndexType.ProfilZadavatele:
                res = client.Indices
                      .Create(client.ConnectionSettings.DefaultIndex, i => i
                              .InitializeUsing(idxSt)
                              .Map <Lib.Data.VZ.ProfilZadavatele>(map => map.AutoMap().DateDetection(false))
                              );
                break;

            case IndexType.Insolvence:
                res = client.Indices
                      .Create(client.ConnectionSettings.DefaultIndex, i => i  //todo: es7 check
                              .InitializeUsing(idxSt)
                              .Map <Lib.Data.Insolvence.Rizeni>(map => map.AutoMap().DateDetection(false))
                              );
                break;

            case IndexType.Dotace:
                res = client.Indices
                      .Create(client.ConnectionSettings.DefaultIndex, i => i  //todo: es7 check
                              .InitializeUsing(idxSt)
                              .Map <Data.Dotace.Dotace>(map => map.AutoMap().DateDetection(false))
                              );
                break;

            case IndexType.Smlouvy:
                res = client.Indices
                      .Create(client.ConnectionSettings.DefaultIndex, i => i  //todo: es7 check
                              .InitializeUsing(idxSt)
                              .Map <Lib.Data.Smlouva>(map => map.AutoMap().DateDetection(false))
                              );
                break;

            case IndexType.Firmy:
                res = client.Indices
                      .Create(client.ConnectionSettings.DefaultIndex, i => i  //todo: es7 check
                              .InitializeUsing(idxSt)
                              .Map <Data.Firma.Search.FirmaInElastic>(map => map.AutoMap(maxRecursion: 1))
                              );
                break;

            case IndexType.Logs:
                res = client.Indices
                      .Create(client.ConnectionSettings.DefaultIndex, i => i  //todo: es7 check
                              .InitializeUsing(idxSt)
                              .Map <Lib.Data.Logs.ProfilZadavateleDownload>(map => map.AutoMap(maxRecursion: 1))
                              );
                break;

            case IndexType.VerejneZakazkyNaProfiluRaw:
                res = client.Indices
                      .Create(client.ConnectionSettings.DefaultIndex, i => i  //todo: es7 check
                              .InitializeUsing(idxSt)
                              .Map <Lib.Data.External.ProfilZadavatelu.ZakazkaRaw>(map => map
                                                                                   .Properties(p => p
                                                                                               .Keyword(k => k.Name(n => n.ZakazkaId))
                                                                                               .Keyword(k => k.Name(n => n.Profil))
                                                                                               .Date(k => k.Name(n => n.LastUpdate))
                                                                                               )
                                                                                   )
                              );
                break;
            }
        }
示例#20
0
 protected void EnsureModelIndices(ElasticClient client)
 {
     base.ExecuteMethod("EnsureModelIndices", delegate()
     {
         if (debug_reset)
         {
             bool executeDebug = false;
             lock (debug_lock)
             {
                 if (debug_reset)
                 {
                     executeDebug = true;
                 }
                 debug_reset = false;
             }
             if (executeDebug)
             {
                 //client.Map<Objective>(m => m
                 //            .MapFromAttributes()
                 //            .Type(DocumentTypes.OBJECTIVES)
                 //            .Properties(props => props
                 //                .String(s => s
                 //                    .Name(p => p.campaign_id)
                 //                    .Index(FieldIndexOption.NotAnalyzed))
                 //                .String(s => s
                 //                    .Name(p => p.objective_id)
                 //                    .Index(FieldIndexOption.NotAnalyzed)
                 //            ))
                 //         );
             }
         }
         if (!this.HasEnsuredModelIndices)
         {
             lock (ensure_lock)
             {
                 if (!this.HasEnsuredModelIndices)
                 {
                     if (!client.IndexExists(this.IndexName).Exists)
                     {
                         CustomAnalyzer ignoreCaseAnalyzer = new CustomAnalyzer
                         {
                             Tokenizer = "keyword",
                             Filter    = new[] { "lowercase" }
                         };
                         Analysis analysis  = new Analysis();
                         analysis.Analyzers = new Analyzers();
                         analysis.Analyzers.Add("case_insensitive", ignoreCaseAnalyzer);
                         ICreateIndexResponse createResult = client.CreateIndex(this.IndexName, delegate(Nest.CreateIndexDescriptor descriptor)
                         {
                             descriptor.Settings(ss => ss
                                                 .Analysis(a => analysis)
                                                 .NumberOfReplicas(this.ReplicaCount)
                                                 .NumberOfShards(this.ShardCount)
                                                 .Setting("merge.policy.merge_factor", "10")
                                                 .Setting("search.slowlog.threshold.fetch.warn", "1s")
                                                 .Setting("max_result_window", "2147483647")
                                                 );
                             this.MapIndexModels(descriptor);
                             return(descriptor);
                         });
                         if (!createResult.Acknowledged)
                         {
                             throw new Exception("Error creating index, mapping is no longer valid");
                         }
                     }
                     HasEnsuredModelIndices = true;
                 }
             }
         }
     });
 }
示例#21
0
        public static void CreateIndex(ElasticClient client, IndexType idxTyp)
        {
            IndexSettings set = new IndexSettings();

            set.NumberOfReplicas = 2;
            if (idxTyp == IndexType.DataSource)
            {
                set.NumberOfShards = 5;
            }
            else
            {
                set.NumberOfShards = 10;
            }
            // Create a Custom Analyzer ...
            var an = new CustomAnalyzer();

            an.Tokenizer = "standard";
            // ... with Filters from the StandardAnalyzer
            var filter = new List <string>();

            filter.Add("lowercase");
            filter.Add("czech_stop");
            //an.Filter.Add("czech_keywords");
            filter.Add("czech_stemmer");
            filter.Add("asciifolding");
            an.Filter = filter;
            // Add the Analyzer with a name
            set.Analysis = new Nest.Analysis()
            {
                Analyzers    = new Analyzers(),
                TokenFilters = new TokenFilters(),
            };

            set.Analysis.Analyzers.Add("default", an);
            set.Analysis.TokenFilters.Add("czech_stop", new StopTokenFilter()
            {
                StopWords = new string[] { "_czech_" }
            });
            set.Analysis.TokenFilters.Add("czech_stemmer", new StemmerTokenFilter()
            {
                Language = "czech"
            });
            IndexState idxSt = new IndexState();

            idxSt.Settings = set;

            Nest.ICreateIndexResponse res = null;
            switch (idxTyp)
            {
            //case IndexType.VerejneZakazkyRaw2006:
            //    res = client
            //       .CreateIndex(client.ConnectionSettings.DefaultIndex, i => i
            //           .InitializeUsing(idxSt)
            //           .Mappings(m => m
            //               .Map<Lib.Data.VZ.VerejnaZakazka.ImportXMLpre2016.VerejnaZakazka2006>(map => map.AutoMap().DateDetection(false))
            //               .Map<Lib.Data.VZ.VerejnaZakazka.ImportXMLpre2016.CastiVerejneZakazky2006>(map => map.AutoMap().DateDetection(false))
            //               )
            //       );
            //    break;

            case IndexType.VerejneZakazky:
                res = client
                      .CreateIndex(client.ConnectionSettings.DefaultIndex, i => i
                                   .InitializeUsing(idxSt)
                                   .Mappings(m => m
                                             .Map <Lib.Data.VZ.VerejnaZakazka>(map => map.AutoMap().DateDetection(false))
                                             .Map <Lib.Data.VZ.ProfilZadavatele>(map => map.AutoMap().DateDetection(false))
                                             )
                                   );
                break;

            case IndexType.Smlouvy:
                res = client
                      .CreateIndex(client.ConnectionSettings.DefaultIndex, i => i
                                   .InitializeUsing(idxSt)
                                   .Mappings(m => m
                                             //.Map("_default_", mm => mm.TtlField(ttl => ttl.Enable(false)))
                                             .Map <Lib.Data.Smlouva>(map => map
                                             //.TtlField(ttl => ttl.Enable(false))
                                                                     .AutoMap()
                                                                     .DateDetection(false)
                                                                     )
                                             //.Map<Person>(map => map.AutoMap(maxRecursion: 1))
                                             //.Map<VerejnaZakazka>(map => map.AutoMap(maxRecursion: 1).DateDetection(false))
                                             )
                                   );
                break;

            case IndexType.Firmy:
                res = client
                      .CreateIndex(client.ConnectionSettings.DefaultIndex, i => i
                                   .InitializeUsing(idxSt)
                                   .Mappings(m => m
                                             .Map <Data.Firma.Search.FirmaInElastic>(map => map.AutoMap(maxRecursion: 1))
                                             )
                                   );
                break;

            case IndexType.Ucty:
                res = client
                      .CreateIndex(client.ConnectionSettings.DefaultIndex, i => i
                                   .InitializeUsing(idxSt)
                                   .Mappings(m => m
                                             .Map <Lib.Data.TransparentniUcty.BankovniUcet>(map => map.AutoMap(maxRecursion: 1))
                                             .Map <Lib.Data.TransparentniUcty.BankovniPolozka>(map => map.AutoMap(maxRecursion: 1))
                                             )
                                   );
                break;

            case IndexType.Logs:
                res = client
                      .CreateIndex(client.ConnectionSettings.DefaultIndex, i => i
                                   .InitializeUsing(idxSt)
                                   .Mappings(m => m
                                             .Map <Lib.Data.Logs.ProfilZadavateleDownload>(map => map.AutoMap(maxRecursion: 1))
                                             )
                                   );
                break;

            case IndexType.VerejneZakazkyNaProfiluRaw:
                res = client
                      .CreateIndex(client.ConnectionSettings.DefaultIndex, i => i
                                   .InitializeUsing(idxSt)
                                   .Mappings(m => m
                                             .Map <Lib.Data.External.ProfilZadavatelu.ZakazkaRaw>(map => map
                                                                                                  .Properties(p => p
                                                                                                              .Keyword(k => k.Name(n => n.ZakazkaId))
                                                                                                              .Keyword(k => k.Name(n => n.Profil))
                                                                                                              .Date(k => k.Name(n => n.LastUpdate))
                                                                                                              )
                                                                                                  )
                                             )
                                   );
                break;
            }


            //Console.WriteLine(res.IsValid);
        }
示例#22
0
        public void ReverseStringTests(Filters.ReverseStringFilterFactory sut, Interface.IResourceLoader resourceLoader)
        {
            List<string> result = null;

            "Given a ReverseString Filter".Given(() => { });
            "when a sample text 'hello how are you' is analyzed".When(
                () =>
                {
                    ((Interface.IFlexFilterFactory)sut).Initialize(new Dictionary<string, string>(), resourceLoader);
                    var filters = new List<Interface.IFlexFilterFactory> { sut };
                    var analyzer = new CustomAnalyzer(new Tokenizers.StandardTokenizerFactory(), filters.ToArray());
                    result = SearchDsl.ParseTextUsingAnalyzer(analyzer, "test", "hello how are you");
                });

            "it should produce 4 tokens".Observation(() => result.Count.Should().Be(4));
            "it should be 'olleh', 'woh', 'era', 'uoy' ".Observation(
                () => result.Should().Equal(new List<string> { "olleh", "woh", "era", "uoy" }));
        }
        /// <summary>
        ///     Create the default index if it doesnt already exist
        /// </summary>
        /// <returns>The existing or new index</returns>
        private async Task CreateIndexIfNotExistsAsync(ISearchServiceClient serviceClient, string indexName)
        {
            if (Disabled)
            {
                throw new Exception($"{nameof(AzureEmployerSearchRepository)} is disabled");
            }

            if (await serviceClient.Indexes.ExistsAsync(indexName))
            {
                return;
            }

            var index = new Index {
                Name = indexName, Fields = FieldBuilder.BuildForType <EmployerSearchModel>()
            };

            index.Suggesters = new List <Suggester>
            {
                new Suggester(
                    suggestorName,
                    nameof(EmployerSearchModel.Name),
                    nameof(EmployerSearchModel.PreviousName),
                    nameof(EmployerSearchModel.Abbreviations))
            };

            var charFilterRemoveAmpersand = new MappingCharFilter("gpg_remove_Ampersand", new List <string> {
                "&=>"
            });
            var charFilterRemoveDot = new MappingCharFilter("gpg_remove_Dot", new List <string> {
                ".=>"
            });
            var charFilterRemoveLtdInfoCaseInsensitive = new PatternReplaceCharFilter(
                "gpg_patternReplaceCharFilter_Ltd",
                "(?i)(limited|ltd|llp| uk|\\(uk\\)|-uk)[\\.]*",
                string.Empty); // case insensitive 'limited' 'ltd', 'llp', ' uk', '(uk)', '-uk' followed by zero or more dots (to cater for ltd. and some mis-punctuated limited..)
            var charFilterRemoveWhitespace = new PatternReplaceCharFilter(
                "gpg_patternReplaceCharFilter_removeWhitespace",
                "\\s",
                string.Empty);

            index.CharFilters = new List <CharFilter>
            {
                charFilterRemoveAmpersand, charFilterRemoveDot, charFilterRemoveLtdInfoCaseInsensitive,
                charFilterRemoveWhitespace
            };

            var edgeNGramTokenFilterFront =
                new EdgeNGramTokenFilterV2("gpg_edgeNGram_front", 3, 300, EdgeNGramTokenFilterSide.Front);
            var edgeNGramTokenFilterBack =
                new EdgeNGramTokenFilterV2("gpg_edgeNGram_back", 3, 300, EdgeNGramTokenFilterSide.Back);

            index.TokenFilters = new List <TokenFilter> {
                edgeNGramTokenFilterFront, edgeNGramTokenFilterBack
            };

            var standardTokenizer = new StandardTokenizerV2("gpg_standard_v2_tokenizer");
            var keywordTokenizer  = new KeywordTokenizerV2("gpg_keyword_v2_tokenizer");

            index.Tokenizers = new List <Tokenizer> {
                standardTokenizer, keywordTokenizer
            };

            var suffixAnalyzer = new CustomAnalyzer(
                "gpg_suffix",
                standardTokenizer.Name,
                new List <TokenFilterName> {
                TokenFilterName.Lowercase, edgeNGramTokenFilterBack.Name
            },
                new List <CharFilterName> {
                charFilterRemoveAmpersand.Name, charFilterRemoveLtdInfoCaseInsensitive.Name
            });

            var completeTokenAnalyzer = new CustomAnalyzer(
                "gpg_prefix_completeToken",
                keywordTokenizer.Name,
                new List <TokenFilterName> {
                TokenFilterName.Lowercase, edgeNGramTokenFilterFront.Name
            },
                new List <CharFilterName>
            {
                charFilterRemoveDot.Name,
                charFilterRemoveAmpersand.Name,
                charFilterRemoveLtdInfoCaseInsensitive.Name,
                charFilterRemoveWhitespace.Name
            });

            index.Analyzers = new List <Analyzer> {
                suffixAnalyzer, completeTokenAnalyzer
            };

            index.Fields.First(f => f.Name == nameof(EmployerSearchModel.PartialNameForSuffixSearches)).Analyzer =
                suffixAnalyzer.Name;
            index.Fields.First(f => f.Name == nameof(EmployerSearchModel.PartialNameForSuffixSearches)).SynonymMaps =
                new[] { synonymMapName };

            index.Fields.First(f => f.Name == nameof(EmployerSearchModel.PartialNameForCompleteTokenSearches))
            .Analyzer =
                completeTokenAnalyzer.Name;
            index.Fields.First(f => f.Name == nameof(EmployerSearchModel.PartialNameForCompleteTokenSearches))
            .SynonymMaps =
                new[] { synonymMapName };

            index.Fields.First(f => f.Name == nameof(EmployerSearchModel.Name)).SynonymMaps         = new[] { synonymMapName };
            index.Fields.First(f => f.Name == nameof(EmployerSearchModel.PreviousName)).SynonymMaps =
                new[] { synonymMapName };

            //Add the synonyms if they dont already exist
            if (!await serviceClient.SynonymMaps.ExistsAsync(synonymMapName))
            {
                serviceClient.SynonymMaps.CreateOrUpdate(
                    new SynonymMap
                {
                    Name = synonymMapName,
                    //Format = "solr", cannot set after upgrade from v5.03 to version 9.0.0
                    Synonyms = "coop, co-operative"
                });
            }

            await serviceClient.Indexes.CreateAsync(index);
        }
示例#24
0
        public void SynonymFilterShouldGenerateSynonym(
            Filters.SynonymFilter sut,
            Interface.IResourceLoader resourceLoader)
        {
            List<string> result = null;

            "Given a Synonym filter".Given(() => { });
            "when a wordlist of Synonym is passed and a sample text 'easy' is analyzed".When(
                () =>
                {
                    ((Interface.IFlexFilterFactory)sut).Initialize(
                        new Dictionary<string, string> { { "filename", "synonymlist.txt" } },
                        resourceLoader);
                    var filters = new List<Interface.IFlexFilterFactory> { sut };
                    var analyzer = new CustomAnalyzer(new Tokenizers.StandardTokenizerFactory(), filters.ToArray());
                    result = SearchDsl.ParseTextUsingAnalyzer(analyzer, "test", "easy");
                });

            "it should produce 3 tokens".Observation(() => result.Count.Should().Be(3));
            "it should generate new tokens for the synonmyns".Then(
                () => result.Should().Equal(new List<string> { "easy", "simple", "clear" }));
        }
示例#25
0
        public void Run()
        {
            _logger.Info("Opening file {0}", _filename);

            using (TextReader reader = File.OpenText(_filename))
            {
                var csv = new CsvReader(reader);
                csv.Configuration.RegisterClassMap <LocationMapper>();

                _logger.Debug("Reading...");
                var allCsvRows = csv.GetRecords <LocationData>().ToList();

                _logger.Info("Read {0} records from file", allCsvRows.Count);

                _logger.Debug("Connecting to {0}", _endpoint);
                var settings = new ConnectionSettings(_endpoint);
                settings.SetDefaultIndex(_indexName);

                var client = new ElasticClient(settings);

                _logger.Debug("Checking if index already exists");
                if (client.IndexExists(_indexName).Exists)
                {
                    if (!_append)
                    {
                        _logger.Debug("Deleting existing index");
                        client.DeleteIndex(_indexName);
                    }
                }
                else
                {
                    if (_append)
                    {
                        throw new Exception("Cannot append to existing data because there is no existing index");
                    }
                }

                if (_append)
                {
                    _logger.Debug("Appending to existing index");
                }
                else
                {
                    _logger.Debug("Creating new index");

                    var indexSettings = new IndexSettings();
                    var keywordLowercaseCustomAnalyzer = new CustomAnalyzer {
                        Tokenizer = "keyword", Filter = new[] { "lowercase" }
                    };
                    indexSettings.Analysis.Analyzers.Add("keywordlowercase", keywordLowercaseCustomAnalyzer);

                    client.CreateIndex(i => i.Index(_indexName).InitializeUsing(indexSettings));

                    client.Map <LocationData>(p => p.Index(_indexName).MapFromAttributes());
                }

                _logger.Debug("Indexing \"{0}\" in batches of {1}...", _indexName, _batchSize);
                var loop = 0;
                while (true)
                {
                    var batch = allCsvRows.Skip(loop * _batchSize).Take(_batchSize).ToList();
                    if (!batch.Any())
                    {
                        break;
                    }

                    var result = client.IndexMany(batch, _indexName);
                    _logger.Debug("Indexed {0} records in {1}ms", result.Items.Count(), result.Took);

                    loop++;
                }

                _logger.Info("Indexed {0} records into \"{1}\" at {2}", allCsvRows.Count, _indexName, _endpoint);
            }
        }
示例#26
0
        public void PatternReplaceTests(Filters.PatternReplaceFilterFactory sut, Interface.IResourceLoader resourceLoader)
        {
            List<string> result = null;

            "Given a PatternReplace Filter".Given(() => { });
            "when a sample text 'cat concatenate catycat' is analyzed with pattern:cat and replacementtext:dog".When(
                () =>
                {
                    ((Interface.IFlexFilterFactory)sut).Initialize(new Dictionary<string, string> { { "pattern", "cat" }, { "replacementtext", "dog" } }, resourceLoader);
                    var filters = new List<Interface.IFlexFilterFactory> { sut };
                    var analyzer = new CustomAnalyzer(new Tokenizers.StandardTokenizerFactory(), filters.ToArray());
                    result = SearchDsl.ParseTextUsingAnalyzer(analyzer, "test", "cat concatenate catycat");
                });

            "it should produce 3 tokens".Observation(() => result.Count.Should().Be(3));
            "it should be 'turn','right'".Observation(
                () => result.Should().Equal(new List<string> { "dog", "condogenate", "dogydog" }));
        }
        public void GenericTokenizerTests(
            string tokenizerName,
            Interface.IFlexTokenizerFactory tokenizerFactory,
            string parseString,
            List<string> expected)
        {
            List<string> result = null;
            (string.Format("Given a {0}", tokenizerName)).Given(() => { });
            (string.Format("when a sample text {0} is analyzed", parseString)).When(
                () =>
                {
                    // Creating a dummy filter which won't do anything so that we can test the effect of tokenizer
                    // in a stand alone manner
                    Interface.IFlexFilterFactory filter = new Filters.PatternReplaceFilterFactory();
                    filter.Initialize(
                        new Dictionary<string, string> { { "pattern", "1" }, { "replacementtext", "" } },
                        new Factories.ResourceLoader());
                    var filters = new List<Interface.IFlexFilterFactory> { filter };
                    var analyzer = new CustomAnalyzer(tokenizerFactory, filters.ToArray());
                    result = SearchDsl.ParseTextUsingAnalyzer(analyzer, "test", parseString);
                });

            string.Format("it should produce {0} tokens", expected.Count)
                .Observation(() => result.Count.Should().Be(expected.Count));
            string.Format("it should be '{0}'", string.Join("',", expected)).Observation(() => result.Should().Equal(expected));
        }