private bool TryGetSingleVersion(
            ParsedQuery parsed,
            out string normalizedVersion)
        {
            if (parsed.Grouping.TryGetValue(QueryField.Version, out var terms) &&
                terms.Count == 1)
            {
                if (NuGetVersion.TryParse(terms.First(), out var parsedVersion))
                {
                    normalizedVersion = parsedVersion.ToNormalizedString();
                    return(true);
                }
            }

            normalizedVersion = null;
            return(false);
        }
        private bool TryGetSinglePackageId(
            ParsedQuery parsed,
            out string packageId)
        {
            if (parsed.Grouping.TryGetValue(QueryField.PackageId, out var terms) &&
                terms.Count == 1)
            {
                packageId = terms.First();
                if (packageId.Length <= PackageIdValidator.MaxPackageIdLength &&
                    PackageIdValidator.IsValidPackageIdWithTimeout(packageId))
                {
                    return(true);
                }
            }

            packageId = null;
            return(false);
        }
            public Facts()
            {
                TextBuilder       = new Mock <ISearchTextBuilder>();
                ParametersBuilder = new Mock <ISearchParametersBuilder>();

                AutocompleteRequest = new AutocompleteRequest {
                    Skip = 0, Take = 20
                };
                V2SearchRequest = new V2SearchRequest {
                    Skip = 0, Take = 20
                };
                V3SearchRequest = new V3SearchRequest {
                    Skip = 0, Take = 20
                };
                Text        = "";
                Parameters  = new SearchParameters();
                ParsedQuery = new ParsedQuery(new Dictionary <QueryField, HashSet <string> >());

                TextBuilder
                .Setup(x => x.Autocomplete(It.IsAny <AutocompleteRequest>()))
                .Returns(() => Text);
                TextBuilder
                .Setup(x => x.ParseV2Search(It.IsAny <V2SearchRequest>()))
                .Returns(() => ParsedQuery);
                TextBuilder
                .Setup(x => x.ParseV3Search(It.IsAny <V3SearchRequest>()))
                .Returns(() => ParsedQuery);
                TextBuilder
                .Setup(x => x.Build(It.IsAny <ParsedQuery>()))
                .Returns(() => Text);
                ParametersBuilder
                .Setup(x => x.Autocomplete(It.IsAny <AutocompleteRequest>(), It.IsAny <bool>()))
                .Returns(() => Parameters);
                ParametersBuilder
                .Setup(x => x.V2Search(It.IsAny <V2SearchRequest>(), It.IsAny <bool>()))
                .Returns(() => Parameters);
                ParametersBuilder
                .Setup(x => x.V3Search(It.IsAny <V3SearchRequest>(), It.IsAny <bool>()))
                .Returns(() => Parameters);

                Target = new IndexOperationBuilder(
                    TextBuilder.Object,
                    ParametersBuilder.Object);
            }
        private bool TryGetHijackDocumentByKey(
            SearchRequest request,
            ParsedQuery parsed,
            out IndexOperation indexOperation)
        {
            if (PagedToFirstItem(request) &&
                parsed.Grouping.Count == 2 &&
                TryGetSinglePackageId(parsed, out var packageId) &&
                TryGetSingleVersion(parsed, out var normalizedVersion))
            {
                var documentKey = DocumentUtilities.GetHijackDocumentKey(packageId, normalizedVersion);

                indexOperation = IndexOperation.Get(documentKey);
                return(true);
            }

            indexOperation = null;
            return(false);
        }
        private bool TryGetSearchDocumentByKey(
            SearchRequest request,
            ParsedQuery parsed,
            out IndexOperation indexOperation)
        {
            if (PagedToFirstItem(request) &&
                parsed.Grouping.Count == 1 &&
                TryGetSinglePackageId(parsed, out var packageId))
            {
                var searchFilters = _parametersBuilder.GetSearchFilters(request);
                var documentKey   = DocumentUtilities.GetSearchDocumentKey(packageId, searchFilters);

                indexOperation = IndexOperation.Get(documentKey);
                return(true);
            }

            indexOperation = null;
            return(false);
        }
Ejemplo n.º 6
0
        public SearchText Build(ParsedQuery parsed)
        {
            if (!parsed.Grouping.Any())
            {
                return(GetMatchAllDocuments(parsed.IncludeTestData));
            }

            var scopedTerms   = parsed.Grouping.Where(g => g.Key != QueryField.Any && g.Key != QueryField.Invalid).ToList();
            var unscopedTerms = parsed.Grouping.Where(g => g.Key == QueryField.Any)
                                .Select(g => g.Value)
                                .SingleOrDefault()?
                                .ToList();

            // Don't bother generating Azure Search text if all terms are scoped to invalid fields.
            var hasUnscopedTerms = unscopedTerms != null && unscopedTerms.Count > 0;

            if (scopedTerms.Count == 0 && !hasUnscopedTerms)
            {
                return(GetMatchAllDocuments(parsed.IncludeTestData));
            }

            // Add the terms that are scoped to specific fields.
            var builder            = new AzureSearchTextBuilder();
            var requireScopedTerms = hasUnscopedTerms || scopedTerms.Count > 1;

            foreach (var scopedTerm in scopedTerms)
            {
                var fieldName = FieldNames[scopedTerm.Key];
                var values    = ProcessFieldValues(scopedTerm.Key, scopedTerm.Value).ToList();

                if (values.Count == 0)
                {
                    // This happens if tags have only delimiters.
                    continue;
                }
                else if (values.Count > 1)
                {
                    builder.AppendScopedTerms(fieldName, values, required: requireScopedTerms);
                }
                else
                {
                    builder.AppendTerm(
                        fieldName,
                        term: values.First(),
                        op: requireScopedTerms ? Operator.Required : Operator.None);
                }
            }

            // Add the terms that can match any fields.
            if (hasUnscopedTerms)
            {
                // All but the last unscoped tokens must match some part of tokenized package metadata. This ensures
                // that a term that the user adds to their search text is effective. In general, if tokens are optional,
                // any score boost on, say, download count can cause highly popular but largely irrelevant packages to
                // appear at the top. For the last token, allow a prefix match to support instant search scenarios.
                var separatorTokens = unscopedTerms.SelectMany(TokenizeWithSeparators).ToList();

                // The last instance of a token should use the prefix search. Also, attempt to keep the tokens in their
                // original order for readability.
                var uniqueSeparatorTokens = separatorTokens.ToHashSet();
                separatorTokens = separatorTokens
                                  .AsEnumerable()
                                  .Reverse()
                                  .Where(t => uniqueSeparatorTokens.Remove(t))
                                  .Reverse()
                                  .ToList();

                foreach (var token in separatorTokens)
                {
                    var isLastToken            = token == separatorTokens.Last();
                    var uniqueCamelSplitTokens = TokenizeWithCamelSplit(token).ToHashSet(StringComparer.OrdinalIgnoreCase);
                    var lowerToken             = token.ToLowerInvariant();
                    if (uniqueCamelSplitTokens.Count > 1)
                    {
                        builder.AppendRequiredAlternatives(
                            prefixSearchSingleOptions: isLastToken,
                            alternatives: new ICollection <string>[]
                        {
                            new[] { lowerToken },
                            uniqueCamelSplitTokens,
                        });
                    }
                    else
                    {
                        builder.AppendTerm(
                            fieldName: null,
                            term: lowerToken,
                            prefixSearch: isLastToken,
                            op: Operator.Required);
                    }

                    // Favor tokens that match without camel-case split.
                    if (lowerToken.Length > 3)
                    {
                        builder.AppendTerm(
                            fieldName: null,
                            term: lowerToken,
                            boost: _options.Value.SeparatorSplitBoost);
                    }
                }

                // If our in-memory tokenization yielded no tokens, just add the original unscoped terms. This should
                // only happen for search queries with only uncommon characters.
                if (!separatorTokens.Any())
                {
                    foreach (var term in unscopedTerms)
                    {
                        builder.AppendTerm(fieldName: null, term: term);
                    }
                }

                // When there is a single unscoped term that could be a namespace, favor package IDs that start with
                // the term.
                if (unscopedTerms.Count == 1 &&
                    unscopedTerms[0].IndexOfAny(PackageIdSeparators) > -1 &&
                    IsId(unscopedTerms[0].TrimEnd(PackageIdSeparators)))
                {
                    builder.AppendTerm(
                        fieldName: IndexFields.PackageId,
                        term: unscopedTerms[0],
                        prefixSearch: true,
                        boost: _options.Value.NamespaceBoost);
                }
            }

            // Handle the exact match case. If the search query is a single unscoped term is also a valid package
            // ID, mega boost the document that has this package ID. Only consider the query to be a package ID has
            // symbols (a.k.a. separators) in it.
            if (scopedTerms.Count == 0 &&
                unscopedTerms.Count == 1 &&
                IsIdWithSeparator(unscopedTerms[0]))
            {
                builder.AppendExactMatchPackageIdBoost(unscopedTerms[0], _options.Value.ExactMatchBoost);
            }

            if (!parsed.IncludeTestData)
            {
                ExcludeTestData(builder);
            }

            var result = builder.ToString();

            if (string.IsNullOrWhiteSpace(result))
            {
                return(GetMatchAllDocuments(parsed.IncludeTestData));
            }

            return(new SearchText(result, isDefaultSearch: false));
        }
Ejemplo n.º 7
0
        public SearchText Build(ParsedQuery parsed)
        {
            if (!parsed.Grouping.Any())
            {
                return(GetMatchAllDocuments(parsed.IncludeTestData));
            }

            var scopedTerms   = parsed.Grouping.Where(g => g.Key != QueryField.Any && g.Key != QueryField.Invalid).ToList();
            var unscopedTerms = parsed.Grouping.Where(g => g.Key == QueryField.Any)
                                .Select(g => g.Value)
                                .SingleOrDefault()?
                                .ToList();

            // Don't bother generating Azure Search text if all terms are scoped to invalid fields.
            var hasUnscopedTerms = unscopedTerms != null && unscopedTerms.Count > 0;

            if (scopedTerms.Count == 0 && !hasUnscopedTerms)
            {
                return(GetMatchAllDocuments(parsed.IncludeTestData));
            }

            // Add the terms that are scoped to specific fields.
            var builder            = new AzureSearchTextBuilder();
            var requireScopedTerms = hasUnscopedTerms || scopedTerms.Count > 1;

            foreach (var scopedTerm in scopedTerms)
            {
                var fieldName = FieldNames[scopedTerm.Key];
                var values    = ProcessFieldValues(scopedTerm.Key, scopedTerm.Value).ToList();

                if (values.Count == 0)
                {
                    // This happens if tags have only delimiters.
                    continue;
                }
                else if (values.Count > 1)
                {
                    builder.AppendScopedTerms(fieldName, values, required: requireScopedTerms);
                }
                else
                {
                    builder.AppendScopedTerm(fieldName, values.First(), prefix: requireScopedTerms ? TermPrefix.And : TermPrefix.None);
                }
            }

            // Add the terms that can match any fields.
            if (hasUnscopedTerms)
            {
                builder.AppendTerms(unscopedTerms);

                // Favor results that match all unscoped terms.
                // We don't need to include scoped terms as these are required.
                if (unscopedTerms.Count > 1)
                {
                    builder.AppendBoostIfMatchAllTerms(unscopedTerms, _options.Value.MatchAllTermsBoost);
                }

                // Try to favor results that match all unscoped terms after tokenization.
                // Don't generate this clause if it is equal to or a subset of the "match all unscoped terms" clause.
                var tokenizedUnscopedTerms = new HashSet <string>(unscopedTerms.SelectMany(Tokenize));
                if (tokenizedUnscopedTerms.Count > unscopedTerms.Count || !tokenizedUnscopedTerms.All(unscopedTerms.Contains))
                {
                    builder.AppendBoostIfMatchAllTerms(tokenizedUnscopedTerms.ToList(), _options.Value.MatchAllTermsBoost);
                }

                // Favor results that prefix match the last unscoped term for an "instant search" experience.
                if (scopedTerms.Count == 0)
                {
                    var lastUnscopedTerm = unscopedTerms.Last();
                    if (IsIdWithSeparator(lastUnscopedTerm))
                    {
                        builder.AppendScopedTerm(
                            fieldName: IndexFields.PackageId,
                            term: lastUnscopedTerm,
                            prefix: TermPrefix.None,
                            prefixSearch: true,
                            boost: _options.Value.PrefixMatchBoost);
                    }
                    else
                    {
                        var boost = lastUnscopedTerm.Length < 4
                            ? _options.Value.PrefixMatchBoost
                            : 1;

                        builder.AppendScopedTerm(
                            fieldName: IndexFields.TokenizedPackageId,
                            term: lastUnscopedTerm,
                            prefix: TermPrefix.None,
                            prefixSearch: true,
                            boost: boost);
                    }
                }
            }

            // Handle the exact match case. If the search query is a single unscoped term is also a valid package
            // ID, mega boost the document that has this package ID. Only consider the query to be a package ID has
            // symbols (a.k.a. separators) in it.
            if (scopedTerms.Count == 0 &&
                unscopedTerms.Count == 1 &&
                IsIdWithSeparator(unscopedTerms[0]))
            {
                builder.AppendExactMatchPackageIdBoost(unscopedTerms[0], _options.Value.ExactMatchBoost);
            }

            if (!parsed.IncludeTestData)
            {
                ExcludeTestData(builder);
            }

            var result = builder.ToString();

            if (string.IsNullOrWhiteSpace(result))
            {
                return(GetMatchAllDocuments(parsed.IncludeTestData));
            }

            return(new SearchText(result, isDefaultSearch: false));
        }