public static async Task UploadDocumentsAsync(ISearchServiceClient serviceClient, string indexName, string dataRootFolder) { // // Uploads metadata // await UploadDocumentsCoreAsync(serviceClient, indexName, Document.ReadMetadataAsDocuments()); // // Uploads entities // const int BufferSize = 20000; var buffer = new List <Document>(); await foreach (var entity in ReadEntitiesAsDocumentsAsync(dataRootFolder)) { buffer.Add(entity); if (buffer.Count == BufferSize) { await UploadDocumentsCoreAsync(serviceClient, indexName, buffer); buffer.Clear(); } } if (buffer.Count > 0) { await UploadDocumentsCoreAsync(serviceClient, indexName, buffer); buffer.Clear(); } }
public static async Task ForceCreateIndexAsync(ISearchServiceClient serviceClient, string indexName) { if (await serviceClient.Indexes.ExistsAsync(indexName)) { Console.WriteLine($"Index '{indexName}' exists. Deleting the existing index..."); await serviceClient.Indexes.DeleteAsync(indexName); Console.WriteLine($"Index '{indexName}' was deleted."); } Console.WriteLine($"Creating index '{indexName}'..."); var definition = new Index() { Name = indexName, Fields = FieldBuilder.BuildForType <Document>(), ScoringProfiles = new List <ScoringProfile>() { Document.CreatePrimaryFieldFavoredScoringProfile() }, }; await serviceClient.Indexes.CreateAsync(definition); Console.WriteLine($"Index '{indexName}' was created."); }
private static async IAsyncEnumerable <Document> ReadEntitiesAsDocumentsAsync(string csvFile, string entityName, string entityIdAttributeName) { var propertiesToIndex = Document.GetPropertiesToIndex(entityName); using (var streamReader = new StreamReader(csvFile)) { using (var csvReader = new CsvReader(streamReader, CultureInfo.InvariantCulture)) { await foreach (IDictionary <string, object> entity in csvReader.GetRecordsAsync <dynamic>()) { var document = new Document(); // // Fill in the common fields. // document.EntityId = entity[entityIdAttributeName]?.ToString(); // Normalize entity id. if (!Guid.TryParse(document.EntityId, out var entityId)) { continue; } document.EntityId = entityId.ToString(); document.EntityName = entityName; // // Fill in the fields to be indexed. // foreach (var propertyToIndex in propertiesToIndex) { string value = entity[propertyToIndex.CdsAttributeName]?.ToString(); if (string.IsNullOrEmpty(value)) { continue; } // // Appends a list of synonyms to the value if applicable. // if (propertyToIndex.CdsAttributeName == "address1_city" && SynonymMap.CitySynonymMap.TryGetSynonyms(value.Trim(), out string synonyms)) { value = $"{value}{Document.SynonymDelimiter}{synonyms}"; } if (propertyToIndex.CdsAttributeName == "address1_stateorprovince" && SynonymMap.StateOrProvinceSynonymMap.TryGetSynonyms(value.Trim(), out synonyms)) { value = $"{value}{Document.SynonymDelimiter}{synonyms}"; } if (propertyToIndex.CdsAttributeName == "address1_country" && SynonymMap.CountrySynonymMap.TryGetSynonyms(value.Trim(), out synonyms)) { value = $"{value}{Document.SynonymDelimiter}{synonyms}"; } if (entityName == "account" && propertyToIndex.CdsAttributeName == "name" && SynonymMap.OrganizationSynonymMap.TryGetSynonyms(value.Trim(), out synonyms)) { value = $"{value}{Document.SynonymDelimiter}{synonyms}"; } propertyToIndex.PropertyInfo.SetValue(document, value); } yield return(document); } } } }
public async Task ProcessAsync(SearchResultHandlerContext context) { await Task.Yield(); foreach (var searchResult in context.SearchResults) { string entityName = searchResult.Document[Document.EntityNameFieldName].ToString(); if (entityName == Document.MetadataEntityName) { continue; } string cdsEntityName = entityName; foreach (var highlight in searchResult.Highlights) { if (Document.TryResolveCdsAttributeName(highlight.Key, cdsEntityName, out string cdsAttributeName)) { string fieldValue = searchResult.Document[highlight.Key].ToString(); if (ContainsSynonyms(cdsEntityName, cdsAttributeName, fieldValue)) { string[] synonyms = fieldValue.Split(Document.SynonymDelimiter, StringSplitOptions.RemoveEmptyEntries).Select(s => s.Trim()).ToArray(); for (int i = 0; i < synonyms.Length; i++) { int startOffset = -1; // TODO: Synonym can be matched in multiple places in the search text. Need to deal with the case. if (synonyms[i].Split(' ', StringSplitOptions.RemoveEmptyEntries).Length == 1) { // The synonym is a single token. Match it with search tokens. var matchedToken = context.SearchTokens.FirstOrDefault(t => StringComparer.OrdinalIgnoreCase.Equals(synonyms[i], t.Token)); if (matchedToken != null) { startOffset = (int)matchedToken.StartOffset; } } else { // TODO: have a better algorithm to match the synonym that contains multiple tokens. startOffset = context.SearchText.IndexOf(synonyms[i], StringComparison.OrdinalIgnoreCase); } if (startOffset >= 0) { string matchedText = context.SearchText.Substring(startOffset, synonyms[i].Length); var matchedTerm = new MatchedTerm { Text = matchedText, StartIndex = startOffset, Length = matchedText.Length, TermBindings = new HashSet <TermBinding>(), }; matchedTerm.TermBindings.Add(new TermBinding() { BindingType = BindingType.InstanceValue, SearchScope = new SearchScope() { Table = cdsEntityName, Column = cdsAttributeName, }, Value = synonyms[0], // The actual value is the first synonym. IsExactlyMatch = true, IsSynonymMatch = !StringComparer.OrdinalIgnoreCase.Equals(synonyms[0], synonyms[i]), }); context.MatchedTerms.Add(matchedTerm); } } // // TODO: Design a better data structure to support synonym in the same field. // string firstSynonymFragment = highlight.Value[0]?.Split(Document.SynonymDelimiter, StringSplitOptions.RemoveEmptyEntries)?[0]; if (!string.IsNullOrEmpty(firstSynonymFragment)) { foreach ((string matchedText, int startOffset) in FindMatchedTexts(context, firstSynonymFragment)) { var matchedTerm = new MatchedTerm { Text = matchedText, StartIndex = startOffset, Length = matchedText.Length, TermBindings = new HashSet <TermBinding>(), }; matchedTerm.TermBindings.Add(new TermBinding() { BindingType = BindingType.InstanceValue, SearchScope = new SearchScope() { Table = cdsEntityName, Column = cdsAttributeName, }, Value = synonyms[0], IsExactlyMatch = StringComparer.OrdinalIgnoreCase.Equals(matchedText, synonyms[0]), IsSynonymMatch = false, }); context.MatchedTerms.Add(matchedTerm); } } } else { foreach (string fragment in highlight.Value) { foreach ((string matchedText, int startOffset) in FindMatchedTexts(context, fragment)) { var matchedTerm = new MatchedTerm { Text = matchedText, StartIndex = startOffset, Length = matchedText.Length, TermBindings = new HashSet <TermBinding>(), }; matchedTerm.TermBindings.Add(new TermBinding() { BindingType = BindingType.InstanceValue, SearchScope = new SearchScope() { Table = cdsEntityName, Column = cdsAttributeName, }, Value = fieldValue, IsExactlyMatch = StringComparer.OrdinalIgnoreCase.Equals(matchedText, fieldValue), IsSynonymMatch = false, }); context.MatchedTerms.Add(matchedTerm); } } } } } } }