public async Task ProcessAsync(SearchResultHandlerContext context) { await Task.Yield(); foreach (var searchResult in context.SearchResults) { string entityName = searchResult.Document[Document.EntityNameFieldName].ToString(); if (entityName != Document.MetadataEntityName) { continue; } foreach (var highlight in searchResult.Highlights) { foreach (string fragment in highlight.Value) { foreach (var fragmentToken in FragmentHelper.GetTokensFromFragment(fragment, context.SearchParameters.HighlightPreTag, context.SearchParameters.HighlightPostTag)) { // // Question: what if the same word shows in multiple positions? // var searchToken = context.SearchTokens.FirstOrDefault(p => StringComparer.OrdinalIgnoreCase.Equals(p.Token, fragmentToken.Token)); if (searchToken == null) { _logger.LogWarning($"Token value '{fragmentToken.Token}' isn't matched."); continue; } // // Question: why offset is nullable? // string matchedText = context.SearchText[(int)searchToken.StartOffset..(int)searchToken.EndOffset];
private IEnumerable <(string, int)> FindMatchedTexts(SearchResultHandlerContext context, string fragment) { var fragmentTokens = FragmentHelper.GetTokensFromFragment(fragment, context.SearchParameters.HighlightPreTag, context.SearchParameters.HighlightPostTag); var fragmentTokenBindings = CreateTokenBindings(fragmentTokens.ToList(), context.SearchTokens); for (int length = fragmentTokenBindings.Count; length > 0; length--) { for (int index = 0; index + length - 1 < fragmentTokenBindings.Count; index++) { var startFragmentTokenBindings = fragmentTokenBindings[index]; var endFragmentTokenBindings = fragmentTokenBindings[index + length - 1]; if (startFragmentTokenBindings == endFragmentTokenBindings) { foreach (var startSearchToken in startFragmentTokenBindings.SearchTokens) { yield return(context.SearchText[((int)startSearchToken.StartOffset)..((int)startSearchToken.EndOffset)], (int)startSearchToken.StartOffset);
public async Task <IReadOnlyCollection <MatchedTerm> > SearchAsync(string searchText) { if (string.IsNullOrEmpty(searchText)) { return(Array.Empty <MatchedTerm>()); } var analyzeRequest = new AnalyzeRequest() { Text = searchText, Analyzer = Document.DefaultAnalyzerName, }; IList <TokenInfo> searchTokens = null; using (var benchmarkScope = new BenchmarkScope(_logger, "analyzing text")) { var searchServiceClient = _searchClientProvider.CreateSearchServiceClient(); searchTokens = (await searchServiceClient.Indexes.AnalyzeAsync(_configuration["SearchIndexName"], analyzeRequest)).Tokens; } var searchParameters = new SearchParameters() { SearchMode = SearchMode.Any, SearchFields = Document.SearchableFields, ScoringProfile = Document.PrimaryFieldFavoredScoringProfile, HighlightFields = Document.SearchableFields, HighlightPreTag = "<em>", HighlightPostTag = "</em>", Top = 500, }; var searchResults = new List <SearchResult <AzureSearchDocument> >(); using (var benchmarkScope = new BenchmarkScope(_logger, "searching text")) { var searchIndexClient = _searchClientProvider.CreateSearchIndexClient(); var currentResult = await searchIndexClient.Documents.SearchAsync(searchText, searchParameters); searchResults.AddRange(currentResult.Results); while (currentResult.ContinuationToken != null) { currentResult = await searchIndexClient.Documents.ContinueSearchAsync(currentResult.ContinuationToken); searchResults.AddRange(currentResult.Results); } } var matchedTerms = new HashSet <MatchedTerm>(); var searchResultHandlerContext = new SearchResultHandlerContext(searchText, searchTokens.ToList(), searchParameters, searchResults, matchedTerms); foreach (var searchResultHandler in _searchResultHandlers) { await searchResultHandler.ProcessAsync(searchResultHandlerContext); } // // Merge term bindings // var consolidatedMatchedTerms = new List <MatchedTerm>(); foreach (var matchedTerm in matchedTerms) { int index = consolidatedMatchedTerms.FindIndex(p => StringComparer.OrdinalIgnoreCase.Equals(p.Text, matchedTerm.Text) && p.StartIndex == matchedTerm.StartIndex && p.Length == matchedTerm.Length); if (index < 0) { consolidatedMatchedTerms.Add(new MatchedTerm() { Text = matchedTerm.Text, StartIndex = matchedTerm.StartIndex, Length = matchedTerm.Length, TermBindings = new HashSet <TermBinding>(), }); index = consolidatedMatchedTerms.Count - 1; } consolidatedMatchedTerms[index].TermBindings.UnionWith(matchedTerm.TermBindings); } return(consolidatedMatchedTerms); }
public async Task ProcessAsync(SearchResultHandlerContext context) { await Task.Yield(); foreach (var searchResult in context.SearchResults) { string entityName = searchResult.Document[Document.EntityNameFieldName].ToString(); if (entityName == Document.MetadataEntityName) { continue; } string cdsEntityName = entityName; foreach (var highlight in searchResult.Highlights) { if (Document.TryResolveCdsAttributeName(highlight.Key, cdsEntityName, out string cdsAttributeName)) { string fieldValue = searchResult.Document[highlight.Key].ToString(); if (ContainsSynonyms(cdsEntityName, cdsAttributeName, fieldValue)) { string[] synonyms = fieldValue.Split(Document.SynonymDelimiter, StringSplitOptions.RemoveEmptyEntries).Select(s => s.Trim()).ToArray(); for (int i = 0; i < synonyms.Length; i++) { int startOffset = -1; // TODO: Synonym can be matched in multiple places in the search text. Need to deal with the case. if (synonyms[i].Split(' ', StringSplitOptions.RemoveEmptyEntries).Length == 1) { // The synonym is a single token. Match it with search tokens. var matchedToken = context.SearchTokens.FirstOrDefault(t => StringComparer.OrdinalIgnoreCase.Equals(synonyms[i], t.Token)); if (matchedToken != null) { startOffset = (int)matchedToken.StartOffset; } } else { // TODO: have a better algorithm to match the synonym that contains multiple tokens. startOffset = context.SearchText.IndexOf(synonyms[i], StringComparison.OrdinalIgnoreCase); } if (startOffset >= 0) { string matchedText = context.SearchText.Substring(startOffset, synonyms[i].Length); var matchedTerm = new MatchedTerm { Text = matchedText, StartIndex = startOffset, Length = matchedText.Length, TermBindings = new HashSet <TermBinding>(), }; matchedTerm.TermBindings.Add(new TermBinding() { BindingType = BindingType.InstanceValue, SearchScope = new SearchScope() { Table = cdsEntityName, Column = cdsAttributeName, }, Value = synonyms[0], // The actual value is the first synonym. IsExactlyMatch = true, IsSynonymMatch = !StringComparer.OrdinalIgnoreCase.Equals(synonyms[0], synonyms[i]), }); context.MatchedTerms.Add(matchedTerm); } } // // TODO: Design a better data structure to support synonym in the same field. // string firstSynonymFragment = highlight.Value[0]?.Split(Document.SynonymDelimiter, StringSplitOptions.RemoveEmptyEntries)?[0]; if (!string.IsNullOrEmpty(firstSynonymFragment)) { foreach ((string matchedText, int startOffset) in FindMatchedTexts(context, firstSynonymFragment)) { var matchedTerm = new MatchedTerm { Text = matchedText, StartIndex = startOffset, Length = matchedText.Length, TermBindings = new HashSet <TermBinding>(), }; matchedTerm.TermBindings.Add(new TermBinding() { BindingType = BindingType.InstanceValue, SearchScope = new SearchScope() { Table = cdsEntityName, Column = cdsAttributeName, }, Value = synonyms[0], IsExactlyMatch = StringComparer.OrdinalIgnoreCase.Equals(matchedText, synonyms[0]), IsSynonymMatch = false, }); context.MatchedTerms.Add(matchedTerm); } } } else { foreach (string fragment in highlight.Value) { foreach ((string matchedText, int startOffset) in FindMatchedTexts(context, fragment)) { var matchedTerm = new MatchedTerm { Text = matchedText, StartIndex = startOffset, Length = matchedText.Length, TermBindings = new HashSet <TermBinding>(), }; matchedTerm.TermBindings.Add(new TermBinding() { BindingType = BindingType.InstanceValue, SearchScope = new SearchScope() { Table = cdsEntityName, Column = cdsAttributeName, }, Value = fieldValue, IsExactlyMatch = StringComparer.OrdinalIgnoreCase.Equals(matchedText, fieldValue), IsSynonymMatch = false, }); context.MatchedTerms.Add(matchedTerm); } } } } } } }