public SearchText Autocomplete(AutocompleteRequest request) { if (string.IsNullOrWhiteSpace(request.Query)) { return(GetMatchAllDocuments(request.IncludeTestData)); } // Query package ids. If autocompleting package ids, allow prefix matches. var builder = new AzureSearchTextBuilder(); if (request.Type == AutocompleteRequestType.PackageIds) { var trimmedQuery = request.Query.Trim(); builder.AppendTerm( fieldName: IndexFields.PackageId, term: trimmedQuery, prefixSearch: true); var pieces = trimmedQuery.Split(PackageIdSeparators); foreach (var piece in pieces) { if (string.IsNullOrWhiteSpace(piece)) { continue; } builder.AppendTerm( fieldName: IndexFields.TokenizedPackageId, term: piece, op: Operator.Required, prefixSearch: true); } if (IsId(trimmedQuery)) { builder.AppendExactMatchPackageIdBoost(trimmedQuery, _options.Value.ExactMatchBoost); } } else { builder.AppendTerm( fieldName: IndexFields.PackageId, term: request.Query, prefixSearch: false); } if (!request.IncludeTestData) { ExcludeTestData(builder); } return(new SearchText(builder.ToString(), isDefaultSearch: false)); }
public SearchText Build(ParsedQuery parsed) { if (!parsed.Grouping.Any()) { return(GetMatchAllDocuments(parsed.IncludeTestData)); } var scopedTerms = parsed.Grouping.Where(g => g.Key != QueryField.Any && g.Key != QueryField.Invalid).ToList(); var unscopedTerms = parsed.Grouping.Where(g => g.Key == QueryField.Any) .Select(g => g.Value) .SingleOrDefault()? .ToList(); // Don't bother generating Azure Search text if all terms are scoped to invalid fields. var hasUnscopedTerms = unscopedTerms != null && unscopedTerms.Count > 0; if (scopedTerms.Count == 0 && !hasUnscopedTerms) { return(GetMatchAllDocuments(parsed.IncludeTestData)); } // Add the terms that are scoped to specific fields. var builder = new AzureSearchTextBuilder(); var requireScopedTerms = hasUnscopedTerms || scopedTerms.Count > 1; foreach (var scopedTerm in scopedTerms) { var fieldName = FieldNames[scopedTerm.Key]; var values = ProcessFieldValues(scopedTerm.Key, scopedTerm.Value).ToList(); if (values.Count == 0) { // This happens if tags have only delimiters. continue; } else if (values.Count > 1) { builder.AppendScopedTerms(fieldName, values, required: requireScopedTerms); } else { builder.AppendTerm( fieldName, term: values.First(), op: requireScopedTerms ? Operator.Required : Operator.None); } } // Add the terms that can match any fields. if (hasUnscopedTerms) { // All but the last unscoped tokens must match some part of tokenized package metadata. This ensures // that a term that the user adds to their search text is effective. In general, if tokens are optional, // any score boost on, say, download count can cause highly popular but largely irrelevant packages to // appear at the top. For the last token, allow a prefix match to support instant search scenarios. var separatorTokens = unscopedTerms.SelectMany(TokenizeWithSeparators).ToList(); // The last instance of a token should use the prefix search. Also, attempt to keep the tokens in their // original order for readability. var uniqueSeparatorTokens = separatorTokens.ToHashSet(); separatorTokens = separatorTokens .AsEnumerable() .Reverse() .Where(t => uniqueSeparatorTokens.Remove(t)) .Reverse() .ToList(); foreach (var token in separatorTokens) { var isLastToken = token == separatorTokens.Last(); var uniqueCamelSplitTokens = TokenizeWithCamelSplit(token).ToHashSet(StringComparer.OrdinalIgnoreCase); var lowerToken = token.ToLowerInvariant(); if (uniqueCamelSplitTokens.Count > 1) { builder.AppendRequiredAlternatives( prefixSearchSingleOptions: isLastToken, alternatives: new ICollection <string>[] { new[] { lowerToken }, uniqueCamelSplitTokens, }); } else { builder.AppendTerm( fieldName: null, term: lowerToken, prefixSearch: isLastToken, op: Operator.Required); } // Favor tokens that match without camel-case split. if (lowerToken.Length > 3) { builder.AppendTerm( fieldName: null, term: lowerToken, boost: _options.Value.SeparatorSplitBoost); } } // If our in-memory tokenization yielded no tokens, just add the original unscoped terms. This should // only happen for search queries with only uncommon characters. if (!separatorTokens.Any()) { foreach (var term in unscopedTerms) { builder.AppendTerm(fieldName: null, term: term); } } // When there is a single unscoped term that could be a namespace, favor package IDs that start with // the term. if (unscopedTerms.Count == 1 && unscopedTerms[0].IndexOfAny(PackageIdSeparators) > -1 && IsId(unscopedTerms[0].TrimEnd(PackageIdSeparators))) { builder.AppendTerm( fieldName: IndexFields.PackageId, term: unscopedTerms[0], prefixSearch: true, boost: _options.Value.NamespaceBoost); } } // Handle the exact match case. If the search query is a single unscoped term is also a valid package // ID, mega boost the document that has this package ID. Only consider the query to be a package ID has // symbols (a.k.a. separators) in it. if (scopedTerms.Count == 0 && unscopedTerms.Count == 1 && IsIdWithSeparator(unscopedTerms[0])) { builder.AppendExactMatchPackageIdBoost(unscopedTerms[0], _options.Value.ExactMatchBoost); } if (!parsed.IncludeTestData) { ExcludeTestData(builder); } var result = builder.ToString(); if (string.IsNullOrWhiteSpace(result)) { return(GetMatchAllDocuments(parsed.IncludeTestData)); } return(new SearchText(result, isDefaultSearch: false)); }
public SearchText Build(ParsedQuery parsed) { if (!parsed.Grouping.Any()) { return(GetMatchAllDocuments(parsed.IncludeTestData)); } var scopedTerms = parsed.Grouping.Where(g => g.Key != QueryField.Any && g.Key != QueryField.Invalid).ToList(); var unscopedTerms = parsed.Grouping.Where(g => g.Key == QueryField.Any) .Select(g => g.Value) .SingleOrDefault()? .ToList(); // Don't bother generating Azure Search text if all terms are scoped to invalid fields. var hasUnscopedTerms = unscopedTerms != null && unscopedTerms.Count > 0; if (scopedTerms.Count == 0 && !hasUnscopedTerms) { return(GetMatchAllDocuments(parsed.IncludeTestData)); } // Add the terms that are scoped to specific fields. var builder = new AzureSearchTextBuilder(); var requireScopedTerms = hasUnscopedTerms || scopedTerms.Count > 1; foreach (var scopedTerm in scopedTerms) { var fieldName = FieldNames[scopedTerm.Key]; var values = ProcessFieldValues(scopedTerm.Key, scopedTerm.Value).ToList(); if (values.Count == 0) { // This happens if tags have only delimiters. continue; } else if (values.Count > 1) { builder.AppendScopedTerms(fieldName, values, required: requireScopedTerms); } else { builder.AppendScopedTerm(fieldName, values.First(), prefix: requireScopedTerms ? TermPrefix.And : TermPrefix.None); } } // Add the terms that can match any fields. if (hasUnscopedTerms) { builder.AppendTerms(unscopedTerms); // Favor results that match all unscoped terms. // We don't need to include scoped terms as these are required. if (unscopedTerms.Count > 1) { builder.AppendBoostIfMatchAllTerms(unscopedTerms, _options.Value.MatchAllTermsBoost); } // Try to favor results that match all unscoped terms after tokenization. // Don't generate this clause if it is equal to or a subset of the "match all unscoped terms" clause. var tokenizedUnscopedTerms = new HashSet <string>(unscopedTerms.SelectMany(Tokenize)); if (tokenizedUnscopedTerms.Count > unscopedTerms.Count || !tokenizedUnscopedTerms.All(unscopedTerms.Contains)) { builder.AppendBoostIfMatchAllTerms(tokenizedUnscopedTerms.ToList(), _options.Value.MatchAllTermsBoost); } // Favor results that prefix match the last unscoped term for an "instant search" experience. if (scopedTerms.Count == 0) { var lastUnscopedTerm = unscopedTerms.Last(); if (IsIdWithSeparator(lastUnscopedTerm)) { builder.AppendScopedTerm( fieldName: IndexFields.PackageId, term: lastUnscopedTerm, prefix: TermPrefix.None, prefixSearch: true, boost: _options.Value.PrefixMatchBoost); } else { var boost = lastUnscopedTerm.Length < 4 ? _options.Value.PrefixMatchBoost : 1; builder.AppendScopedTerm( fieldName: IndexFields.TokenizedPackageId, term: lastUnscopedTerm, prefix: TermPrefix.None, prefixSearch: true, boost: boost); } } } // Handle the exact match case. If the search query is a single unscoped term is also a valid package // ID, mega boost the document that has this package ID. Only consider the query to be a package ID has // symbols (a.k.a. separators) in it. if (scopedTerms.Count == 0 && unscopedTerms.Count == 1 && IsIdWithSeparator(unscopedTerms[0])) { builder.AppendExactMatchPackageIdBoost(unscopedTerms[0], _options.Value.ExactMatchBoost); } if (!parsed.IncludeTestData) { ExcludeTestData(builder); } var result = builder.ToString(); if (string.IsNullOrWhiteSpace(result)) { return(GetMatchAllDocuments(parsed.IncludeTestData)); } return(new SearchText(result, isDefaultSearch: false)); }