Exemple #1
0
        public static async Task UploadDocumentsAsync(ISearchServiceClient serviceClient, string indexName, string dataRootFolder)
        {
            //
            // Uploads metadata
            //

            await UploadDocumentsCoreAsync(serviceClient, indexName, Document.ReadMetadataAsDocuments());

            //
            // Uploads entities
            //

            const int BufferSize = 20000;

            var buffer = new List <Document>();

            await foreach (var entity in ReadEntitiesAsDocumentsAsync(dataRootFolder))
            {
                buffer.Add(entity);

                if (buffer.Count == BufferSize)
                {
                    await UploadDocumentsCoreAsync(serviceClient, indexName, buffer);

                    buffer.Clear();
                }
            }

            if (buffer.Count > 0)
            {
                await UploadDocumentsCoreAsync(serviceClient, indexName, buffer);

                buffer.Clear();
            }
        }
Exemple #2
0
        public static async Task ForceCreateIndexAsync(ISearchServiceClient serviceClient, string indexName)
        {
            if (await serviceClient.Indexes.ExistsAsync(indexName))
            {
                Console.WriteLine($"Index '{indexName}' exists. Deleting the existing index...");

                await serviceClient.Indexes.DeleteAsync(indexName);

                Console.WriteLine($"Index '{indexName}' was deleted.");
            }

            Console.WriteLine($"Creating index '{indexName}'...");

            var definition = new Index()
            {
                Name            = indexName,
                Fields          = FieldBuilder.BuildForType <Document>(),
                ScoringProfiles = new List <ScoringProfile>()
                {
                    Document.CreatePrimaryFieldFavoredScoringProfile()
                },
            };

            await serviceClient.Indexes.CreateAsync(definition);

            Console.WriteLine($"Index '{indexName}' was created.");
        }
Exemple #3
0
        private static async IAsyncEnumerable <Document> ReadEntitiesAsDocumentsAsync(string csvFile, string entityName, string entityIdAttributeName)
        {
            var propertiesToIndex = Document.GetPropertiesToIndex(entityName);

            using (var streamReader = new StreamReader(csvFile))
            {
                using (var csvReader = new CsvReader(streamReader, CultureInfo.InvariantCulture))
                {
                    await foreach (IDictionary <string, object> entity in csvReader.GetRecordsAsync <dynamic>())
                    {
                        var document = new Document();

                        //
                        // Fill in the common fields.
                        //

                        document.EntityId = entity[entityIdAttributeName]?.ToString();

                        // Normalize entity id.
                        if (!Guid.TryParse(document.EntityId, out var entityId))
                        {
                            continue;
                        }

                        document.EntityId = entityId.ToString();

                        document.EntityName = entityName;

                        //
                        // Fill in the fields to be indexed.
                        //

                        foreach (var propertyToIndex in propertiesToIndex)
                        {
                            string value = entity[propertyToIndex.CdsAttributeName]?.ToString();

                            if (string.IsNullOrEmpty(value))
                            {
                                continue;
                            }

                            //
                            // Appends a list of synonyms to the value if applicable.
                            //

                            if (propertyToIndex.CdsAttributeName == "address1_city" && SynonymMap.CitySynonymMap.TryGetSynonyms(value.Trim(), out string synonyms))
                            {
                                value = $"{value}{Document.SynonymDelimiter}{synonyms}";
                            }

                            if (propertyToIndex.CdsAttributeName == "address1_stateorprovince" && SynonymMap.StateOrProvinceSynonymMap.TryGetSynonyms(value.Trim(), out synonyms))
                            {
                                value = $"{value}{Document.SynonymDelimiter}{synonyms}";
                            }

                            if (propertyToIndex.CdsAttributeName == "address1_country" && SynonymMap.CountrySynonymMap.TryGetSynonyms(value.Trim(), out synonyms))
                            {
                                value = $"{value}{Document.SynonymDelimiter}{synonyms}";
                            }

                            if (entityName == "account" && propertyToIndex.CdsAttributeName == "name" && SynonymMap.OrganizationSynonymMap.TryGetSynonyms(value.Trim(), out synonyms))
                            {
                                value = $"{value}{Document.SynonymDelimiter}{synonyms}";
                            }

                            propertyToIndex.PropertyInfo.SetValue(document, value);
                        }

                        yield return(document);
                    }
                }
            }
        }
        public async Task ProcessAsync(SearchResultHandlerContext context)
        {
            await Task.Yield();

            foreach (var searchResult in context.SearchResults)
            {
                string entityName = searchResult.Document[Document.EntityNameFieldName].ToString();

                if (entityName == Document.MetadataEntityName)
                {
                    continue;
                }

                string cdsEntityName = entityName;

                foreach (var highlight in searchResult.Highlights)
                {
                    if (Document.TryResolveCdsAttributeName(highlight.Key, cdsEntityName, out string cdsAttributeName))
                    {
                        string fieldValue = searchResult.Document[highlight.Key].ToString();

                        if (ContainsSynonyms(cdsEntityName, cdsAttributeName, fieldValue))
                        {
                            string[] synonyms = fieldValue.Split(Document.SynonymDelimiter, StringSplitOptions.RemoveEmptyEntries).Select(s => s.Trim()).ToArray();

                            for (int i = 0; i < synonyms.Length; i++)
                            {
                                int startOffset = -1;

                                // TODO: Synonym can be matched in multiple places in the search text. Need to deal with the case.
                                if (synonyms[i].Split(' ', StringSplitOptions.RemoveEmptyEntries).Length == 1)
                                {
                                    // The synonym is a single token. Match it with search tokens.
                                    var matchedToken = context.SearchTokens.FirstOrDefault(t => StringComparer.OrdinalIgnoreCase.Equals(synonyms[i], t.Token));
                                    if (matchedToken != null)
                                    {
                                        startOffset = (int)matchedToken.StartOffset;
                                    }
                                }
                                else
                                {
                                    // TODO: have a better algorithm to match the synonym that contains multiple tokens.
                                    startOffset = context.SearchText.IndexOf(synonyms[i], StringComparison.OrdinalIgnoreCase);
                                }

                                if (startOffset >= 0)
                                {
                                    string matchedText = context.SearchText.Substring(startOffset, synonyms[i].Length);

                                    var matchedTerm = new MatchedTerm
                                    {
                                        Text         = matchedText,
                                        StartIndex   = startOffset,
                                        Length       = matchedText.Length,
                                        TermBindings = new HashSet <TermBinding>(),
                                    };

                                    matchedTerm.TermBindings.Add(new TermBinding()
                                    {
                                        BindingType = BindingType.InstanceValue,
                                        SearchScope = new SearchScope()
                                        {
                                            Table  = cdsEntityName,
                                            Column = cdsAttributeName,
                                        },
                                        Value          = synonyms[0], // The actual value is the first synonym.
                                        IsExactlyMatch = true,
                                        IsSynonymMatch = !StringComparer.OrdinalIgnoreCase.Equals(synonyms[0], synonyms[i]),
                                    });

                                    context.MatchedTerms.Add(matchedTerm);
                                }
                            }

                            //
                            // TODO: Design a better data structure to support synonym in the same field.
                            //

                            string firstSynonymFragment = highlight.Value[0]?.Split(Document.SynonymDelimiter, StringSplitOptions.RemoveEmptyEntries)?[0];

                            if (!string.IsNullOrEmpty(firstSynonymFragment))
                            {
                                foreach ((string matchedText, int startOffset) in FindMatchedTexts(context, firstSynonymFragment))
                                {
                                    var matchedTerm = new MatchedTerm
                                    {
                                        Text         = matchedText,
                                        StartIndex   = startOffset,
                                        Length       = matchedText.Length,
                                        TermBindings = new HashSet <TermBinding>(),
                                    };

                                    matchedTerm.TermBindings.Add(new TermBinding()
                                    {
                                        BindingType = BindingType.InstanceValue,
                                        SearchScope = new SearchScope()
                                        {
                                            Table  = cdsEntityName,
                                            Column = cdsAttributeName,
                                        },
                                        Value          = synonyms[0],
                                        IsExactlyMatch = StringComparer.OrdinalIgnoreCase.Equals(matchedText, synonyms[0]),
                                        IsSynonymMatch = false,
                                    });

                                    context.MatchedTerms.Add(matchedTerm);
                                }
                            }
                        }
                        else
                        {
                            foreach (string fragment in highlight.Value)
                            {
                                foreach ((string matchedText, int startOffset) in FindMatchedTexts(context, fragment))
                                {
                                    var matchedTerm = new MatchedTerm
                                    {
                                        Text         = matchedText,
                                        StartIndex   = startOffset,
                                        Length       = matchedText.Length,
                                        TermBindings = new HashSet <TermBinding>(),
                                    };

                                    matchedTerm.TermBindings.Add(new TermBinding()
                                    {
                                        BindingType = BindingType.InstanceValue,
                                        SearchScope = new SearchScope()
                                        {
                                            Table  = cdsEntityName,
                                            Column = cdsAttributeName,
                                        },
                                        Value          = fieldValue,
                                        IsExactlyMatch = StringComparer.OrdinalIgnoreCase.Equals(matchedText, fieldValue),
                                        IsSynonymMatch = false,
                                    });

                                    context.MatchedTerms.Add(matchedTerm);
                                }
                            }
                        }
                    }
                }
            }
        }