public SearchText Autocomplete(AutocompleteRequest request)
        {
            if (string.IsNullOrWhiteSpace(request.Query))
            {
                return(GetMatchAllDocuments(request.IncludeTestData));
            }

            // Query package ids. If autocompleting package ids, allow prefix matches.
            var builder = new AzureSearchTextBuilder();

            if (request.Type == AutocompleteRequestType.PackageIds)
            {
                var trimmedQuery = request.Query.Trim();

                builder.AppendTerm(
                    fieldName: IndexFields.PackageId,
                    term: trimmedQuery,
                    prefixSearch: true);

                var pieces = trimmedQuery.Split(PackageIdSeparators);
                foreach (var piece in pieces)
                {
                    if (string.IsNullOrWhiteSpace(piece))
                    {
                        continue;
                    }

                    builder.AppendTerm(
                        fieldName: IndexFields.TokenizedPackageId,
                        term: piece,
                        op: Operator.Required,
                        prefixSearch: true);
                }

                if (IsId(trimmedQuery))
                {
                    builder.AppendExactMatchPackageIdBoost(trimmedQuery, _options.Value.ExactMatchBoost);
                }
            }
            else
            {
                builder.AppendTerm(
                    fieldName: IndexFields.PackageId,
                    term: request.Query,
                    prefixSearch: false);
            }

            if (!request.IncludeTestData)
            {
                ExcludeTestData(builder);
            }

            return(new SearchText(builder.ToString(), isDefaultSearch: false));
        }
        private SearchText GetMatchAllDocuments(bool includeTestData)
        {
            if (includeTestData ||
                _options.Value.TestOwners == null ||
                _options.Value.TestOwners.Count == 0)
            {
                return(MatchAllDocumentsIncludingTestData);
            }

            var builder = new AzureSearchTextBuilder();

            // We can't use '*' to match all documents here since it doesn't work in conjunction with any other terms.
            // Instead, we match all documents by finding every doument that has a package ID (which is all documents).
            builder.AppendMatchAll(IndexFields.PackageId);

            ExcludeTestData(builder);

            return(new SearchText(builder.ToString(), isDefaultSearch: true));
        }
        public SearchText Build(ParsedQuery parsed)
        {
            if (!parsed.Grouping.Any())
            {
                return(GetMatchAllDocuments(parsed.IncludeTestData));
            }

            var scopedTerms   = parsed.Grouping.Where(g => g.Key != QueryField.Any && g.Key != QueryField.Invalid).ToList();
            var unscopedTerms = parsed.Grouping.Where(g => g.Key == QueryField.Any)
                                .Select(g => g.Value)
                                .SingleOrDefault()?
                                .ToList();

            // Don't bother generating Azure Search text if all terms are scoped to invalid fields.
            var hasUnscopedTerms = unscopedTerms != null && unscopedTerms.Count > 0;

            if (scopedTerms.Count == 0 && !hasUnscopedTerms)
            {
                return(GetMatchAllDocuments(parsed.IncludeTestData));
            }

            // Add the terms that are scoped to specific fields.
            var builder            = new AzureSearchTextBuilder();
            var requireScopedTerms = hasUnscopedTerms || scopedTerms.Count > 1;

            foreach (var scopedTerm in scopedTerms)
            {
                var fieldName = FieldNames[scopedTerm.Key];
                var values    = ProcessFieldValues(scopedTerm.Key, scopedTerm.Value).ToList();

                if (values.Count == 0)
                {
                    // This happens if tags have only delimiters.
                    continue;
                }
                else if (values.Count > 1)
                {
                    builder.AppendScopedTerms(fieldName, values, required: requireScopedTerms);
                }
                else
                {
                    builder.AppendTerm(
                        fieldName,
                        term: values.First(),
                        op: requireScopedTerms ? Operator.Required : Operator.None);
                }
            }

            // Add the terms that can match any fields.
            if (hasUnscopedTerms)
            {
                // All but the last unscoped tokens must match some part of tokenized package metadata. This ensures
                // that a term that the user adds to their search text is effective. In general, if tokens are optional,
                // any score boost on, say, download count can cause highly popular but largely irrelevant packages to
                // appear at the top. For the last token, allow a prefix match to support instant search scenarios.
                var separatorTokens = unscopedTerms.SelectMany(TokenizeWithSeparators).ToList();

                // The last instance of a token should use the prefix search. Also, attempt to keep the tokens in their
                // original order for readability.
                var uniqueSeparatorTokens = separatorTokens.ToHashSet();
                separatorTokens = separatorTokens
                                  .AsEnumerable()
                                  .Reverse()
                                  .Where(t => uniqueSeparatorTokens.Remove(t))
                                  .Reverse()
                                  .ToList();

                foreach (var token in separatorTokens)
                {
                    var isLastToken            = token == separatorTokens.Last();
                    var uniqueCamelSplitTokens = TokenizeWithCamelSplit(token).ToHashSet(StringComparer.OrdinalIgnoreCase);
                    var lowerToken             = token.ToLowerInvariant();
                    if (uniqueCamelSplitTokens.Count > 1)
                    {
                        builder.AppendRequiredAlternatives(
                            prefixSearchSingleOptions: isLastToken,
                            alternatives: new ICollection <string>[]
                        {
                            new[] { lowerToken },
                            uniqueCamelSplitTokens,
                        });
                    }
                    else
                    {
                        builder.AppendTerm(
                            fieldName: null,
                            term: lowerToken,
                            prefixSearch: isLastToken,
                            op: Operator.Required);
                    }

                    // Favor tokens that match without camel-case split.
                    if (lowerToken.Length > 3)
                    {
                        builder.AppendTerm(
                            fieldName: null,
                            term: lowerToken,
                            boost: _options.Value.SeparatorSplitBoost);
                    }
                }

                // If our in-memory tokenization yielded no tokens, just add the original unscoped terms. This should
                // only happen for search queries with only uncommon characters.
                if (!separatorTokens.Any())
                {
                    foreach (var term in unscopedTerms)
                    {
                        builder.AppendTerm(fieldName: null, term: term);
                    }
                }

                // When there is a single unscoped term that could be a namespace, favor package IDs that start with
                // the term.
                if (unscopedTerms.Count == 1 &&
                    unscopedTerms[0].IndexOfAny(PackageIdSeparators) > -1 &&
                    IsId(unscopedTerms[0].TrimEnd(PackageIdSeparators)))
                {
                    builder.AppendTerm(
                        fieldName: IndexFields.PackageId,
                        term: unscopedTerms[0],
                        prefixSearch: true,
                        boost: _options.Value.NamespaceBoost);
                }
            }

            // Handle the exact match case. If the search query is a single unscoped term is also a valid package
            // ID, mega boost the document that has this package ID. Only consider the query to be a package ID has
            // symbols (a.k.a. separators) in it.
            if (scopedTerms.Count == 0 &&
                unscopedTerms.Count == 1 &&
                IsIdWithSeparator(unscopedTerms[0]))
            {
                builder.AppendExactMatchPackageIdBoost(unscopedTerms[0], _options.Value.ExactMatchBoost);
            }

            if (!parsed.IncludeTestData)
            {
                ExcludeTestData(builder);
            }

            var result = builder.ToString();

            if (string.IsNullOrWhiteSpace(result))
            {
                return(GetMatchAllDocuments(parsed.IncludeTestData));
            }

            return(new SearchText(result, isDefaultSearch: false));
        }
Exemple #4
0
        public SearchText Build(ParsedQuery parsed)
        {
            if (!parsed.Grouping.Any())
            {
                return(GetMatchAllDocuments(parsed.IncludeTestData));
            }

            var scopedTerms   = parsed.Grouping.Where(g => g.Key != QueryField.Any && g.Key != QueryField.Invalid).ToList();
            var unscopedTerms = parsed.Grouping.Where(g => g.Key == QueryField.Any)
                                .Select(g => g.Value)
                                .SingleOrDefault()?
                                .ToList();

            // Don't bother generating Azure Search text if all terms are scoped to invalid fields.
            var hasUnscopedTerms = unscopedTerms != null && unscopedTerms.Count > 0;

            if (scopedTerms.Count == 0 && !hasUnscopedTerms)
            {
                return(GetMatchAllDocuments(parsed.IncludeTestData));
            }

            // Add the terms that are scoped to specific fields.
            var builder            = new AzureSearchTextBuilder();
            var requireScopedTerms = hasUnscopedTerms || scopedTerms.Count > 1;

            foreach (var scopedTerm in scopedTerms)
            {
                var fieldName = FieldNames[scopedTerm.Key];
                var values    = ProcessFieldValues(scopedTerm.Key, scopedTerm.Value).ToList();

                if (values.Count == 0)
                {
                    // This happens if tags have only delimiters.
                    continue;
                }
                else if (values.Count > 1)
                {
                    builder.AppendScopedTerms(fieldName, values, required: requireScopedTerms);
                }
                else
                {
                    builder.AppendScopedTerm(fieldName, values.First(), prefix: requireScopedTerms ? TermPrefix.And : TermPrefix.None);
                }
            }

            // Add the terms that can match any fields.
            if (hasUnscopedTerms)
            {
                builder.AppendTerms(unscopedTerms);

                // Favor results that match all unscoped terms.
                // We don't need to include scoped terms as these are required.
                if (unscopedTerms.Count > 1)
                {
                    builder.AppendBoostIfMatchAllTerms(unscopedTerms, _options.Value.MatchAllTermsBoost);
                }

                // Try to favor results that match all unscoped terms after tokenization.
                // Don't generate this clause if it is equal to or a subset of the "match all unscoped terms" clause.
                var tokenizedUnscopedTerms = new HashSet <string>(unscopedTerms.SelectMany(Tokenize));
                if (tokenizedUnscopedTerms.Count > unscopedTerms.Count || !tokenizedUnscopedTerms.All(unscopedTerms.Contains))
                {
                    builder.AppendBoostIfMatchAllTerms(tokenizedUnscopedTerms.ToList(), _options.Value.MatchAllTermsBoost);
                }

                // Favor results that prefix match the last unscoped term for an "instant search" experience.
                if (scopedTerms.Count == 0)
                {
                    var lastUnscopedTerm = unscopedTerms.Last();
                    if (IsIdWithSeparator(lastUnscopedTerm))
                    {
                        builder.AppendScopedTerm(
                            fieldName: IndexFields.PackageId,
                            term: lastUnscopedTerm,
                            prefix: TermPrefix.None,
                            prefixSearch: true,
                            boost: _options.Value.PrefixMatchBoost);
                    }
                    else
                    {
                        var boost = lastUnscopedTerm.Length < 4
                            ? _options.Value.PrefixMatchBoost
                            : 1;

                        builder.AppendScopedTerm(
                            fieldName: IndexFields.TokenizedPackageId,
                            term: lastUnscopedTerm,
                            prefix: TermPrefix.None,
                            prefixSearch: true,
                            boost: boost);
                    }
                }
            }

            // Handle the exact match case. If the search query is a single unscoped term is also a valid package
            // ID, mega boost the document that has this package ID. Only consider the query to be a package ID has
            // symbols (a.k.a. separators) in it.
            if (scopedTerms.Count == 0 &&
                unscopedTerms.Count == 1 &&
                IsIdWithSeparator(unscopedTerms[0]))
            {
                builder.AppendExactMatchPackageIdBoost(unscopedTerms[0], _options.Value.ExactMatchBoost);
            }

            if (!parsed.IncludeTestData)
            {
                ExcludeTestData(builder);
            }

            var result = builder.ToString();

            if (string.IsNullOrWhiteSpace(result))
            {
                return(GetMatchAllDocuments(parsed.IncludeTestData));
            }

            return(new SearchText(result, isDefaultSearch: false));
        }