public async Task ProcessAsync(SearchResultHandlerContext context)
        {
            await Task.Yield();

            foreach (var searchResult in context.SearchResults)
            {
                string entityName = searchResult.Document[Document.EntityNameFieldName].ToString();

                if (entityName != Document.MetadataEntityName)
                {
                    continue;
                }

                foreach (var highlight in searchResult.Highlights)
                {
                    foreach (string fragment in highlight.Value)
                    {
                        foreach (var fragmentToken in FragmentHelper.GetTokensFromFragment(fragment, context.SearchParameters.HighlightPreTag, context.SearchParameters.HighlightPostTag))
                        {
                            //
                            // Question: what if the same word shows in multiple positions?
                            //
                            var searchToken = context.SearchTokens.FirstOrDefault(p => StringComparer.OrdinalIgnoreCase.Equals(p.Token, fragmentToken.Token));

                            if (searchToken == null)
                            {
                                _logger.LogWarning($"Token value '{fragmentToken.Token}' isn't matched.");
                                continue;
                            }

                            //
                            // Question: why offset is nullable?
                            //
                            string matchedText = context.SearchText[(int)searchToken.StartOffset..(int)searchToken.EndOffset];
        private IEnumerable <(string, int)> FindMatchedTexts(SearchResultHandlerContext context, string fragment)
        {
            var fragmentTokens = FragmentHelper.GetTokensFromFragment(fragment, context.SearchParameters.HighlightPreTag, context.SearchParameters.HighlightPostTag);

            var fragmentTokenBindings = CreateTokenBindings(fragmentTokens.ToList(), context.SearchTokens);

            for (int length = fragmentTokenBindings.Count; length > 0; length--)
            {
                for (int index = 0; index + length - 1 < fragmentTokenBindings.Count; index++)
                {
                    var startFragmentTokenBindings = fragmentTokenBindings[index];
                    var endFragmentTokenBindings   = fragmentTokenBindings[index + length - 1];

                    if (startFragmentTokenBindings == endFragmentTokenBindings)
                    {
                        foreach (var startSearchToken in startFragmentTokenBindings.SearchTokens)
                        {
                            yield return(context.SearchText[((int)startSearchToken.StartOffset)..((int)startSearchToken.EndOffset)], (int)startSearchToken.StartOffset);
Exemplo n.º 3
0
        public async Task <IReadOnlyCollection <MatchedTerm> > SearchAsync(string searchText)
        {
            if (string.IsNullOrEmpty(searchText))
            {
                return(Array.Empty <MatchedTerm>());
            }

            var analyzeRequest = new AnalyzeRequest()
            {
                Text     = searchText,
                Analyzer = Document.DefaultAnalyzerName,
            };

            IList <TokenInfo> searchTokens = null;

            using (var benchmarkScope = new BenchmarkScope(_logger, "analyzing text"))
            {
                var searchServiceClient = _searchClientProvider.CreateSearchServiceClient();
                searchTokens = (await searchServiceClient.Indexes.AnalyzeAsync(_configuration["SearchIndexName"], analyzeRequest)).Tokens;
            }

            var searchParameters = new SearchParameters()
            {
                SearchMode       = SearchMode.Any,
                SearchFields     = Document.SearchableFields,
                ScoringProfile   = Document.PrimaryFieldFavoredScoringProfile,
                HighlightFields  = Document.SearchableFields,
                HighlightPreTag  = "<em>",
                HighlightPostTag = "</em>",
                Top = 500,
            };

            var searchResults = new List <SearchResult <AzureSearchDocument> >();

            using (var benchmarkScope = new BenchmarkScope(_logger, "searching text"))
            {
                var searchIndexClient = _searchClientProvider.CreateSearchIndexClient();

                var currentResult = await searchIndexClient.Documents.SearchAsync(searchText, searchParameters);

                searchResults.AddRange(currentResult.Results);

                while (currentResult.ContinuationToken != null)
                {
                    currentResult = await searchIndexClient.Documents.ContinueSearchAsync(currentResult.ContinuationToken);

                    searchResults.AddRange(currentResult.Results);
                }
            }

            var matchedTerms = new HashSet <MatchedTerm>();

            var searchResultHandlerContext = new SearchResultHandlerContext(searchText, searchTokens.ToList(), searchParameters, searchResults, matchedTerms);

            foreach (var searchResultHandler in _searchResultHandlers)
            {
                await searchResultHandler.ProcessAsync(searchResultHandlerContext);
            }

            //
            // Merge term bindings
            //

            var consolidatedMatchedTerms = new List <MatchedTerm>();

            foreach (var matchedTerm in matchedTerms)
            {
                int index = consolidatedMatchedTerms.FindIndex(p => StringComparer.OrdinalIgnoreCase.Equals(p.Text, matchedTerm.Text) && p.StartIndex == matchedTerm.StartIndex && p.Length == matchedTerm.Length);

                if (index < 0)
                {
                    consolidatedMatchedTerms.Add(new MatchedTerm()
                    {
                        Text         = matchedTerm.Text,
                        StartIndex   = matchedTerm.StartIndex,
                        Length       = matchedTerm.Length,
                        TermBindings = new HashSet <TermBinding>(),
                    });

                    index = consolidatedMatchedTerms.Count - 1;
                }

                consolidatedMatchedTerms[index].TermBindings.UnionWith(matchedTerm.TermBindings);
            }

            return(consolidatedMatchedTerms);
        }
        public async Task ProcessAsync(SearchResultHandlerContext context)
        {
            await Task.Yield();

            foreach (var searchResult in context.SearchResults)
            {
                string entityName = searchResult.Document[Document.EntityNameFieldName].ToString();

                if (entityName == Document.MetadataEntityName)
                {
                    continue;
                }

                string cdsEntityName = entityName;

                foreach (var highlight in searchResult.Highlights)
                {
                    if (Document.TryResolveCdsAttributeName(highlight.Key, cdsEntityName, out string cdsAttributeName))
                    {
                        string fieldValue = searchResult.Document[highlight.Key].ToString();

                        if (ContainsSynonyms(cdsEntityName, cdsAttributeName, fieldValue))
                        {
                            string[] synonyms = fieldValue.Split(Document.SynonymDelimiter, StringSplitOptions.RemoveEmptyEntries).Select(s => s.Trim()).ToArray();

                            for (int i = 0; i < synonyms.Length; i++)
                            {
                                int startOffset = -1;

                                // TODO: Synonym can be matched in multiple places in the search text. Need to deal with the case.
                                if (synonyms[i].Split(' ', StringSplitOptions.RemoveEmptyEntries).Length == 1)
                                {
                                    // The synonym is a single token. Match it with search tokens.
                                    var matchedToken = context.SearchTokens.FirstOrDefault(t => StringComparer.OrdinalIgnoreCase.Equals(synonyms[i], t.Token));
                                    if (matchedToken != null)
                                    {
                                        startOffset = (int)matchedToken.StartOffset;
                                    }
                                }
                                else
                                {
                                    // TODO: have a better algorithm to match the synonym that contains multiple tokens.
                                    startOffset = context.SearchText.IndexOf(synonyms[i], StringComparison.OrdinalIgnoreCase);
                                }

                                if (startOffset >= 0)
                                {
                                    string matchedText = context.SearchText.Substring(startOffset, synonyms[i].Length);

                                    var matchedTerm = new MatchedTerm
                                    {
                                        Text         = matchedText,
                                        StartIndex   = startOffset,
                                        Length       = matchedText.Length,
                                        TermBindings = new HashSet <TermBinding>(),
                                    };

                                    matchedTerm.TermBindings.Add(new TermBinding()
                                    {
                                        BindingType = BindingType.InstanceValue,
                                        SearchScope = new SearchScope()
                                        {
                                            Table  = cdsEntityName,
                                            Column = cdsAttributeName,
                                        },
                                        Value          = synonyms[0], // The actual value is the first synonym.
                                        IsExactlyMatch = true,
                                        IsSynonymMatch = !StringComparer.OrdinalIgnoreCase.Equals(synonyms[0], synonyms[i]),
                                    });

                                    context.MatchedTerms.Add(matchedTerm);
                                }
                            }

                            //
                            // TODO: Design a better data structure to support synonym in the same field.
                            //

                            string firstSynonymFragment = highlight.Value[0]?.Split(Document.SynonymDelimiter, StringSplitOptions.RemoveEmptyEntries)?[0];

                            if (!string.IsNullOrEmpty(firstSynonymFragment))
                            {
                                foreach ((string matchedText, int startOffset) in FindMatchedTexts(context, firstSynonymFragment))
                                {
                                    var matchedTerm = new MatchedTerm
                                    {
                                        Text         = matchedText,
                                        StartIndex   = startOffset,
                                        Length       = matchedText.Length,
                                        TermBindings = new HashSet <TermBinding>(),
                                    };

                                    matchedTerm.TermBindings.Add(new TermBinding()
                                    {
                                        BindingType = BindingType.InstanceValue,
                                        SearchScope = new SearchScope()
                                        {
                                            Table  = cdsEntityName,
                                            Column = cdsAttributeName,
                                        },
                                        Value          = synonyms[0],
                                        IsExactlyMatch = StringComparer.OrdinalIgnoreCase.Equals(matchedText, synonyms[0]),
                                        IsSynonymMatch = false,
                                    });

                                    context.MatchedTerms.Add(matchedTerm);
                                }
                            }
                        }
                        else
                        {
                            foreach (string fragment in highlight.Value)
                            {
                                foreach ((string matchedText, int startOffset) in FindMatchedTexts(context, fragment))
                                {
                                    var matchedTerm = new MatchedTerm
                                    {
                                        Text         = matchedText,
                                        StartIndex   = startOffset,
                                        Length       = matchedText.Length,
                                        TermBindings = new HashSet <TermBinding>(),
                                    };

                                    matchedTerm.TermBindings.Add(new TermBinding()
                                    {
                                        BindingType = BindingType.InstanceValue,
                                        SearchScope = new SearchScope()
                                        {
                                            Table  = cdsEntityName,
                                            Column = cdsAttributeName,
                                        },
                                        Value          = fieldValue,
                                        IsExactlyMatch = StringComparer.OrdinalIgnoreCase.Equals(matchedText, fieldValue),
                                        IsSynonymMatch = false,
                                    });

                                    context.MatchedTerms.Add(matchedTerm);
                                }
                            }
                        }
                    }
                }
            }
        }