private static Index IndexDefinition() { return(new Index() { Name = "types", Fields = new[] { new Field("id", DataType.String) { IsKey = true, IsRetrievable = true, IsFilterable = false, IsSortable = false, IsFacetable = false, IsSearchable = false }, new Field("namespaces", DataType.Collection(DataType.String)) { IsRetrievable = false, IsFilterable = true, IsSortable = false, IsFacetable = true, IsSearchable = true, IndexAnalyzer = AnalyzerName.Create("name_index"), SearchAnalyzer = AnalyzerName.Create("name_search") }, new Field("types", DataType.Collection(DataType.String)) { IsRetrievable = false, IsFilterable = false, IsSortable = false, IsFacetable = false, IsSearchable = true, IndexAnalyzer = AnalyzerName.Create("name_index"), SearchAnalyzer = AnalyzerName.Create("name_search") }, new Field("typesCamelHump", DataType.Collection(DataType.String)) { IsRetrievable = false, IsFilterable = false, IsSortable = false, IsFacetable = false, IsSearchable = true, IndexAnalyzer = AnalyzerName.Create("camel_hump_index"), SearchAnalyzer = AnalyzerName.Create("camel_hump_search") }, new Field("packageId", DataType.String) { IsRetrievable = true, IsFilterable = false, IsSortable = false, IsFacetable = false, IsSearchable = false }, new Field("packageVersion", DataType.String) { IsRetrievable = true, IsFilterable = false, IsSortable = false, IsFacetable = false, IsSearchable = false }, new Field("prerelease", DataType.Boolean) { IsRetrievable = false, IsFilterable = true, IsSortable = false, IsFacetable = true, IsSearchable = false }, new Field("netstandardVersion", DataType.Int32) { IsRetrievable = true, IsFilterable = true, IsSortable = false, IsFacetable = true, IsSearchable = false }, new Field("published", DataType.DateTimeOffset) { IsRetrievable = true, IsFilterable = true, IsSortable = true, IsFacetable = false, IsSearchable = false }, new Field("totalDownloadCount", DataType.Int32) { IsRetrievable = true, IsFilterable = true, IsSortable = true, IsFacetable = false, IsSearchable = false }, }, CorsOptions = new CorsOptions() { AllowedOrigins = new[] { "*" }, MaxAgeInSeconds = (long)TimeSpan.FromHours(2).TotalSeconds, }, CharFilters = new CharFilter[] { new PatternReplaceCharFilter { Name = "remove_generics", Pattern = "<[^>]*>", Replacement = ".", }, new PatternReplaceCharFilter { Name = "remove_non_uppercase", Pattern = "[^A-Z]+", Replacement = ".", }, new MappingCharFilter { Name = "period_to_space", Mappings = new[] { @".=>\u0020" }, }, new MappingCharFilter { Name = "period_to_empty_string", Mappings = new[] { @".=>" }, }, }, TokenFilters = new TokenFilter[] { new EdgeNGramTokenFilter { Name = "my_ngram", MinGram = 2, MaxGram = 16, Side = EdgeNGramTokenFilterSide.Front, }, }, Analyzers = new Analyzer[] { new CustomAnalyzer { Name = "name_search", CharFilters = new [] { CharFilterName.Create("remove_generics"), CharFilterName.Create("period_to_space"), }, Tokenizer = TokenizerName.Whitespace, TokenFilters = new [] { TokenFilterName.Lowercase, }, }, new CustomAnalyzer { Name = "name_index", CharFilters = new [] { CharFilterName.Create("remove_generics"), CharFilterName.Create("period_to_space"), }, Tokenizer = TokenizerName.Whitespace, TokenFilters = new [] { TokenFilterName.Lowercase, TokenFilterName.Create("my_ngram"), }, }, new CustomAnalyzer { Name = "camel_hump_search", Tokenizer = TokenizerName.Whitespace, TokenFilters = new [] { TokenFilterName.Lowercase, }, }, new CustomAnalyzer { Name = "camel_hump_index", CharFilters = new [] { CharFilterName.Create("remove_generics"), CharFilterName.Create("remove_non_uppercase"), CharFilterName.Create("period_to_empty_string"), }, Tokenizer = TokenizerName.Keyword, TokenFilters = new [] { TokenFilterName.Lowercase, TokenFilterName.Create("my_ngram"), }, }, }, }); }
public void CanCreateAllAnalysisComponents() { Run(() => { // Declare some custom component names to use with CustomAnalyzer. All other names will be randomly generated. var customTokenizerName = TokenizerName.Create("my_tokenizer"); var customTokenFilterName = TokenFilterName.Create("my_tokenfilter"); var customCharFilterName = CharFilterName.Create("my_charfilter"); Index index = CreateTestIndex(); index.Analyzers = new Analyzer[] { new CustomAnalyzer( SearchTestUtilities.GenerateName(), customTokenizerName, new[] { customTokenFilterName }, new[] { customCharFilterName }), new CustomAnalyzer( SearchTestUtilities.GenerateName(), TokenizerName.EdgeNGram), new PatternAnalyzer( SearchTestUtilities.GenerateName(), lowerCaseTerms: false, pattern: "abc", flags: RegexFlags.DotAll, stopwords: new[] { "the" }), new StandardAnalyzer(SearchTestUtilities.GenerateName(), maxTokenLength: 100, stopwords: new[] { "the" }), new StopAnalyzer(SearchTestUtilities.GenerateName(), stopwords: new[] { "the" }), new StopAnalyzer(SearchTestUtilities.GenerateName()) }; index.Tokenizers = new Tokenizer[] { new EdgeNGramTokenizer(customTokenizerName, minGram: 1, maxGram: 1), // One custom tokenizer for CustomAnalyzer above. new EdgeNGramTokenizer( SearchTestUtilities.GenerateName(), minGram: 2, maxGram: 4, tokenChars: new[] { TokenCharacterKind.Letter }), new NGramTokenizer(SearchTestUtilities.GenerateName(), minGram: 2, maxGram: 4, tokenChars: new[] { TokenCharacterKind.Letter }), new ClassicTokenizer(SearchTestUtilities.GenerateName(), maxTokenLength: 100), new KeywordTokenizer(SearchTestUtilities.GenerateName(), bufferSize: 100), new MicrosoftLanguageStemmingTokenizer( SearchTestUtilities.GenerateName(), maxTokenLength: 100, isSearchTokenizer: true, language: MicrosoftStemmingTokenizerLanguage.Croatian), new MicrosoftLanguageTokenizer( SearchTestUtilities.GenerateName(), maxTokenLength: 100, isSearchTokenizer: true, language: MicrosoftTokenizerLanguage.Thai), new PathHierarchyTokenizer( SearchTestUtilities.GenerateName(), delimiter: ':', replacement: '_', bufferSize: 100, reverseTokenOrder: true, numberOfTokensToSkip: 2), new PatternTokenizer( SearchTestUtilities.GenerateName(), pattern: ".*", flags: RegexFlags.Multiline | RegexFlags.Literal, group: 0), new StandardTokenizer(SearchTestUtilities.GenerateName(), maxTokenLength: 100), new UaxUrlEmailTokenizer(SearchTestUtilities.GenerateName(), maxTokenLength: 100) }; index.TokenFilters = new TokenFilter[] { new CjkBigramTokenFilter(customTokenFilterName), // One custom token filter for CustomAnalyzer above. new CjkBigramTokenFilter( SearchTestUtilities.GenerateName(), ignoreScripts: new[] { CjkBigramTokenFilterScripts.Han }, outputUnigrams: true), new CjkBigramTokenFilter(SearchTestUtilities.GenerateName()), new AsciiFoldingTokenFilter(SearchTestUtilities.GenerateName(), preserveOriginal: true), new AsciiFoldingTokenFilter(SearchTestUtilities.GenerateName()), new CommonGramTokenFilter( SearchTestUtilities.GenerateName(), commonWords: new[] { "hello", "goodbye" }, ignoreCase: true, useQueryMode: true), new CommonGramTokenFilter(SearchTestUtilities.GenerateName(), commonWords: new[] { "at" }), new DictionaryDecompounderTokenFilter( SearchTestUtilities.GenerateName(), wordList: new[] { "Schadenfreude" }, minWordSize: 10, minSubwordSize: 5, maxSubwordSize: 13, onlyLongestMatch: true), new EdgeNGramTokenFilter(SearchTestUtilities.GenerateName(), minGram: 2, maxGram: 10, side: EdgeNGramTokenFilterSide.Back), new ElisionTokenFilter(SearchTestUtilities.GenerateName(), articles: new[] { "a" }), new ElisionTokenFilter(SearchTestUtilities.GenerateName()), new KeepTokenFilter(SearchTestUtilities.GenerateName(), keepWords: new[] { "aloha" }, lowerCaseKeepWords: true), new KeepTokenFilter(SearchTestUtilities.GenerateName(), keepWords: new[] { "e", "komo", "mai" }), new KeywordMarkerTokenFilter(SearchTestUtilities.GenerateName(), keywords: new[] { "key", "words" }, ignoreCase: true), new KeywordMarkerTokenFilter(SearchTestUtilities.GenerateName(), keywords: new[] { "essential" }), new LengthTokenFilter(SearchTestUtilities.GenerateName(), min: 5, max: 10), new LimitTokenFilter(SearchTestUtilities.GenerateName(), maxTokenCount: 10, consumeAllTokens: true), new NGramTokenFilter(SearchTestUtilities.GenerateName(), minGram: 2, maxGram: 3), new PatternCaptureTokenFilter(SearchTestUtilities.GenerateName(), patterns: new[] { ".*" }, preserveOriginal: false), new PatternReplaceTokenFilter(SearchTestUtilities.GenerateName(), pattern: "abc", replacement: "123"), new PhoneticTokenFilter(SearchTestUtilities.GenerateName(), encoder: PhoneticEncoder.Soundex, replaceOriginalTokens: false), new ShingleTokenFilter( SearchTestUtilities.GenerateName(), maxShingleSize: 10, minShingleSize: 5, outputUnigrams: false, outputUnigramsIfNoShingles: true, tokenSeparator: " ", filterToken: "|"), new SnowballTokenFilter(SearchTestUtilities.GenerateName(), SnowballTokenFilterLanguage.English), new StemmerOverrideTokenFilter(SearchTestUtilities.GenerateName(), rules: new[] { "ran => run" }), new StemmerTokenFilter(SearchTestUtilities.GenerateName(), StemmerTokenFilterLanguage.French), new StopwordsTokenFilter( SearchTestUtilities.GenerateName(), stopwords: new[] { "a", "the" }, ignoreCase: true, removeTrailingStopWords: false), new StopwordsTokenFilter( SearchTestUtilities.GenerateName(), stopwordsList: StopwordsList.Italian, ignoreCase: true, removeTrailingStopWords: false), new SynonymTokenFilter(SearchTestUtilities.GenerateName(), synonyms: new[] { "great, good" }, ignoreCase: true, expand: false), new TruncateTokenFilter(SearchTestUtilities.GenerateName(), length: 10), new UniqueTokenFilter(SearchTestUtilities.GenerateName(), onlyOnSamePosition: true), new UniqueTokenFilter(SearchTestUtilities.GenerateName()), new WordDelimiterTokenFilter( SearchTestUtilities.GenerateName(), generateWordParts: false, generateNumberParts: false, catenateWords: true, catenateNumbers: true, catenateAll: true, splitOnCaseChange: false, preserveOriginal: true, splitOnNumerics: false, stemEnglishPossessive: false, protectedWords: new[] { "protected" }) }; index.CharFilters = new CharFilter[] { new MappingCharFilter(customCharFilterName, mappings: new[] { "a => b" }), // One custom char filter for CustomAnalyzer above. new MappingCharFilter(SearchTestUtilities.GenerateName(), mappings: new[] { "s => $", "S => $" }), new PatternReplaceCharFilter(SearchTestUtilities.GenerateName(), pattern: "abc", replacement: "123") }; // We have to split up analysis components into two indexes, one where any components with optional properties have defaults that // are zero or null (default(T)), and another where we need to specify the default values we expect to get back from the REST API. Func <int, string> generateSimpleName = n => string.Format(CultureInfo.InvariantCulture, "a{0}", n); int i = 0; Index indexWithSpecialDefaults = CreateTestIndex(); indexWithSpecialDefaults.Analyzers = new Analyzer[] { new PatternAnalyzer(generateSimpleName(i++)), new StandardAnalyzer(generateSimpleName(i++)) }; indexWithSpecialDefaults.Tokenizers = new Tokenizer[] { new EdgeNGramTokenizer(generateSimpleName(i++)), new NGramTokenizer(generateSimpleName(i++)), new ClassicTokenizer(generateSimpleName(i++)), new KeywordTokenizer(generateSimpleName(i++)), new MicrosoftLanguageStemmingTokenizer(generateSimpleName(i++)), new MicrosoftLanguageTokenizer(generateSimpleName(i++)), new PathHierarchyTokenizer(generateSimpleName(i++)), new PatternTokenizer(generateSimpleName(i++)), new StandardTokenizer(generateSimpleName(i++)), new UaxUrlEmailTokenizer(generateSimpleName(i++)) }; indexWithSpecialDefaults.TokenFilters = new TokenFilter[] { new DictionaryDecompounderTokenFilter( generateSimpleName(i++), wordList: new[] { "Bahnhof" }), new EdgeNGramTokenFilter(generateSimpleName(i++)), new LengthTokenFilter(generateSimpleName(i++)), new LimitTokenFilter(generateSimpleName(i++)), new NGramTokenFilter(generateSimpleName(i++)), new PatternCaptureTokenFilter(generateSimpleName(i++), patterns: new[] { "[a-z]*" }), new PhoneticTokenFilter(generateSimpleName(i++)), new ShingleTokenFilter(generateSimpleName(i++)), new StopwordsTokenFilter(generateSimpleName(i++)), new SynonymTokenFilter(generateSimpleName(i++), synonyms: new[] { "mutt, canine => dog" }), new TruncateTokenFilter(generateSimpleName(i++)), new WordDelimiterTokenFilter(generateSimpleName(i++)) }; i = 0; Index expectedIndexWithSpecialDefaults = CreateTestIndex(); expectedIndexWithSpecialDefaults.Name = indexWithSpecialDefaults.Name; expectedIndexWithSpecialDefaults.Analyzers = new Analyzer[] { new PatternAnalyzer(generateSimpleName(i++), lowerCaseTerms: true, pattern: @"\W+"), new StandardAnalyzer(generateSimpleName(i++), maxTokenLength: 255) }; expectedIndexWithSpecialDefaults.Tokenizers = new Tokenizer[] { new EdgeNGramTokenizer(generateSimpleName(i++), minGram: 1, maxGram: 2), new NGramTokenizer(generateSimpleName(i++), minGram: 1, maxGram: 2), new ClassicTokenizer(generateSimpleName(i++), maxTokenLength: 255), new KeywordTokenizer(generateSimpleName(i++), bufferSize: 256), new MicrosoftLanguageStemmingTokenizer(generateSimpleName(i++), maxTokenLength: 255), new MicrosoftLanguageTokenizer(generateSimpleName(i++), maxTokenLength: 255), new PathHierarchyTokenizer(generateSimpleName(i++), delimiter: '/', replacement: '/', bufferSize: 1024), new PatternTokenizer(generateSimpleName(i++), pattern: @"\W+", group: -1), new StandardTokenizer(generateSimpleName(i++), maxTokenLength: 255), new UaxUrlEmailTokenizer(generateSimpleName(i++), maxTokenLength: 255) }; expectedIndexWithSpecialDefaults.TokenFilters = new TokenFilter[] { new DictionaryDecompounderTokenFilter( generateSimpleName(i++), wordList: new[] { "Bahnhof" }, minWordSize: 5, minSubwordSize: 2, maxSubwordSize: 15), new EdgeNGramTokenFilter(generateSimpleName(i++), minGram: 1, maxGram: 2, side: EdgeNGramTokenFilterSide.Front), new LengthTokenFilter(generateSimpleName(i++), max: int.MaxValue), new LimitTokenFilter(generateSimpleName(i++), maxTokenCount: 1), new NGramTokenFilter(generateSimpleName(i++), minGram: 1, maxGram: 2), new PatternCaptureTokenFilter(generateSimpleName(i++), patterns: new[] { "[a-z]*" }, preserveOriginal: true), new PhoneticTokenFilter(generateSimpleName(i++), encoder: PhoneticEncoder.Metaphone, replaceOriginalTokens: true), new ShingleTokenFilter( generateSimpleName(i++), maxShingleSize: 2, minShingleSize: 2, outputUnigrams: true, tokenSeparator: " ", filterToken: "_"), new StopwordsTokenFilter(generateSimpleName(i++), stopwordsList: StopwordsList.English, removeTrailingStopWords: true), new SynonymTokenFilter(generateSimpleName(i++), synonyms: new[] { "mutt, canine => dog" }, expand: true), new TruncateTokenFilter(generateSimpleName(i++), length: 300), new WordDelimiterTokenFilter( generateSimpleName(i++), generateWordParts: true, generateNumberParts: true, splitOnCaseChange: true, splitOnNumerics: true, stemEnglishPossessive: true) }; // This is to make sure we didn't forget any components in this test. AssertIndexContainsAllAnalysisComponents(index, indexWithSpecialDefaults); TestAnalysisComponents(index); TestAnalysisComponents(indexWithSpecialDefaults, expectedIndexWithSpecialDefaults); }); }