Ejemplo n.º 1
0
        public void RecordAlreadyPagedItemsInPreviousPage(TopDocs search)
        {
            if (_query.Start == 0)
            {
                return;
            }

            if (_query.SkipDuplicateChecking)
            {
                return;
            }

            // we are paging, we need to check that we don't have duplicates in the previous pages
            // see here for details: http://groups.google.com/group/ravendb/browse_frm/thread/d71c44aa9e2a7c6e

            if (_indexType.IsMap() && _fieldsToFetch.IsProjection == false && search.ScoreDocs.Length >= _query.Start)
            {
                if (_isSortingQuery)
                {
                    // we need to scan all records from the beginning to requested 'start' position
                    for (var i = 0; i < _query.Start && i < search.ScoreDocs.Length; i++)
                    {
                        var scoreDoc        = search.ScoreDocs[i];
                        var document        = _searcher.Doc(scoreDoc.Doc);
                        var alreadyPagedKey = document.Get(Constants.Indexing.Fields.DocumentIdFieldName);

                        _alreadySeenDocumentKeysInPreviousPage.Add(alreadyPagedKey);
                        HasMultipleIndexOutputs = true;
                    }
                }
                else
                {
                    // that's not a sorted query so we need just to ensure that we won't return the last item of the previous page
                    var scoreDoc        = search.ScoreDocs[_query.Start - 1];
                    var document        = _searcher.Doc(scoreDoc.Doc);
                    var alreadyPagedKey = document.Get(Constants.Indexing.Fields.DocumentIdFieldName);

                    _alreadySeenDocumentKeysInPreviousPage.Add(alreadyPagedKey);
                    HasMultipleIndexOutputs = true;
                }
            }

            if (_fieldsToFetch.IsDistinct == false)
            {
                return;
            }

            for (; _alreadyScannedForDuplicates < _query.Start; _alreadyScannedForDuplicates++)
            {
                var scoreDoc = search.ScoreDocs[_alreadyScannedForDuplicates];
                var document = _retriever.Get(_searcher.Doc(scoreDoc.Doc), scoreDoc.Score);

                if (document.Data.Count > 0) // we don't consider empty projections to be relevant for distinct operations
                {
                    _alreadySeenProjections.Add(document.DataHash);
                }
            }
        }
Ejemplo n.º 2
0
        public IEnumerable <(Document Result, Dictionary <string, Dictionary <string, string[]> > Highlightings, ExplanationResult Explanation)> Query(IndexQueryServerSide query, QueryTimingsScope queryTimings, FieldsToFetch fieldsToFetch, Reference <int> totalResults, Reference <int> skippedResults, IQueryResultRetriever retriever, DocumentsOperationContext documentsContext, Func <string, SpatialField> getSpatialField, CancellationToken token)
        {
            ExplanationOptions explanationOptions = null;

            var pageSize        = query.PageSize;
            var isDistinctCount = pageSize == 0 && query.Metadata.IsDistinct;

            if (isDistinctCount)
            {
                pageSize = int.MaxValue;
            }

            pageSize = GetPageSize(_searcher, pageSize);

            var docsToGet = pageSize;
            var position  = query.Start;

            if (query.Metadata.IsOptimizedSortOnly && _index.Definition.HasDynamicFields == false)
            {
                foreach (var result in QuerySortOnly(query, retriever, position, pageSize, totalResults, token))
                {
                    yield return(result);
                }

                yield break;
            }

            QueryTimingsScope luceneScope       = null;
            QueryTimingsScope highlightingScope = null;
            QueryTimingsScope explanationsScope = null;

            if (queryTimings != null)
            {
                luceneScope       = queryTimings.For(nameof(QueryTimingsScope.Names.Lucene), start: false);
                highlightingScope = query.Metadata.HasHighlightings
                    ? queryTimings.For(nameof(QueryTimingsScope.Names.Highlightings), start: false)
                    : null;
                explanationsScope = query.Metadata.HasExplanations
                    ? queryTimings.For(nameof(QueryTimingsScope.Names.Explanations), start: false)
                    : null;
            }

            var returnedResults = 0;

            var luceneQuery = GetLuceneQuery(documentsContext, query.Metadata, query.QueryParameters, _analyzer, _queryBuilderFactories);
            var sort        = GetSort(query, _index, getSpatialField, documentsContext);

            using (var scope = new IndexQueryingScope(_indexType, query, fieldsToFetch, _searcher, retriever, _state))
            {
                if (query.Metadata.HasHighlightings)
                {
                    using (highlightingScope?.For(nameof(QueryTimingsScope.Names.Setup)))
                        SetupHighlighter(query, luceneQuery, documentsContext);
                }

                while (true)
                {
                    token.ThrowIfCancellationRequested();

                    TopDocs search;
                    using (luceneScope?.Start())
                        search = ExecuteQuery(luceneQuery, query.Start, docsToGet, sort);

                    totalResults.Value = search.TotalHits;

                    scope.RecordAlreadyPagedItemsInPreviousPage(search);

                    for (; position < search.ScoreDocs.Length && pageSize > 0; position++)
                    {
                        token.ThrowIfCancellationRequested();

                        var scoreDoc = search.ScoreDocs[position];

                        global::Lucene.Net.Documents.Document document;
                        using (luceneScope?.Start())
                            document = _searcher.Doc(scoreDoc.Doc, _state);

                        if (retriever.TryGetKey(document, _state, out string key) && scope.WillProbablyIncludeInResults(key) == false)
                        {
                            skippedResults.Value++;
                            continue;
                        }

                        var result = retriever.Get(document, scoreDoc.Score, _state);
                        if (scope.TryIncludeInResults(result) == false)
                        {
                            skippedResults.Value++;
                            continue;
                        }

                        returnedResults++;

                        if (isDistinctCount == false)
                        {
                            Dictionary <string, Dictionary <string, string[]> > highlightings = null;
                            if (query.Metadata.HasHighlightings)
                            {
                                using (highlightingScope?.Start())
                                    highlightings = GetHighlighterResults(query, _searcher, scoreDoc, result, document, documentsContext);
                            }

                            ExplanationResult explanation = null;
                            if (query.Metadata.HasExplanations)
                            {
                                using (explanationsScope?.Start())
                                {
                                    if (explanationOptions == null)
                                    {
                                        explanationOptions = query.Metadata.Explanation.GetOptions(documentsContext, query.QueryParameters);
                                    }

                                    explanation = GetQueryExplanations(explanationOptions, luceneQuery, _searcher, scoreDoc, result, document);
                                }
                            }

                            yield return(result, highlightings, explanation);
                        }

                        if (returnedResults == pageSize)
                        {
                            yield break;
                        }
                    }

                    if (search.TotalHits == search.ScoreDocs.Length)
                    {
                        break;
                    }

                    if (returnedResults >= pageSize)
                    {
                        break;
                    }

                    Debug.Assert(_maxNumberOfOutputsPerDocument > 0);

                    docsToGet += GetPageSize(_searcher, (long)(pageSize - returnedResults) * _maxNumberOfOutputsPerDocument);
                }

                if (isDistinctCount)
                {
                    totalResults.Value = returnedResults;
                }
            }
        }
Ejemplo n.º 3
0
        public IEnumerable <Document> Query(IndexQueryServerSide query, FieldsToFetch fieldsToFetch, Reference <int> totalResults, Reference <int> skippedResults, IQueryResultRetriever retriever, JsonOperationContext documentsContext, Func <string, SpatialField> getSpatialField, CancellationToken token)
        {
            var pageSize  = GetPageSize(_searcher, query.PageSize);
            var docsToGet = pageSize;
            var position  = query.Start;

            var luceneQuery     = GetLuceneQuery(documentsContext, query.Metadata, query.QueryParameters, _analyzer, getSpatialField);
            var sort            = GetSort(query, getSpatialField);
            var returnedResults = 0;

            using (var scope = new IndexQueryingScope(_indexType, query, fieldsToFetch, _searcher, retriever, _state))
            {
                while (true)
                {
                    token.ThrowIfCancellationRequested();

                    var search = ExecuteQuery(luceneQuery, query.Start, docsToGet, sort);

                    totalResults.Value = search.TotalHits;

                    scope.RecordAlreadyPagedItemsInPreviousPage(search);

                    for (; position < search.ScoreDocs.Length && pageSize > 0; position++)
                    {
                        token.ThrowIfCancellationRequested();

                        var scoreDoc = search.ScoreDocs[position];
                        var document = _searcher.Doc(scoreDoc.Doc, _state);

                        if (retriever.TryGetKey(document, _state, out string key) && scope.WillProbablyIncludeInResults(key) == false)
                        {
                            skippedResults.Value++;
                            continue;
                        }

                        var result = retriever.Get(document, scoreDoc.Score, _state);
                        if (scope.TryIncludeInResults(result) == false)
                        {
                            skippedResults.Value++;
                            continue;
                        }

                        returnedResults++;
                        yield return(result);

                        if (returnedResults == pageSize)
                        {
                            yield break;
                        }
                    }

                    docsToGet += GetPageSize(_searcher, (long)(pageSize - returnedResults) * _maxNumberOfOutputsPerDocument);
                    if (search.TotalHits == search.ScoreDocs.Length)
                    {
                        break;
                    }

                    if (returnedResults >= pageSize)
                    {
                        break;
                    }
                }
            }
        }
Ejemplo n.º 4
0
        public IEnumerable <Document> IntersectQuery(IndexQueryServerSide query, FieldsToFetch fieldsToFetch, Reference <int> totalResults, Reference <int> skippedResults, IQueryResultRetriever retriever, JsonOperationContext documentsContext, Func <string, SpatialField> getSpatialField, CancellationToken token)
        {
            var method = query.Metadata.Query.Where as MethodExpression;

            if (method == null)
            {
                throw new InvalidQueryException($"Invalid intersect query. WHERE clause must contains just an intersect() method call while it got {query.Metadata.Query.Where.Type} expression", query.Metadata.QueryText, query.QueryParameters);
            }

            var methodName = method.Name;

            if (string.Equals("intersect", methodName) == false)
            {
                throw new InvalidQueryException($"Invalid intersect query. WHERE clause must contains just a single intersect() method call while it got '{methodName}' method", query.Metadata.QueryText, query.QueryParameters);
            }

            if (method.Arguments.Count <= 1)
            {
                throw new InvalidQueryException("The valid intersect query must have multiple intersect clauses.", query.Metadata.QueryText, query.QueryParameters);
            }

            var subQueries = new Query[method.Arguments.Count];

            for (var i = 0; i < subQueries.Length; i++)
            {
                var whereExpression = method.Arguments[i] as QueryExpression;

                if (whereExpression == null)
                {
                    throw new InvalidQueryException($"Invalid intersect query. The intersect clause at position {i} isn't a valid expression", query.Metadata.QueryText, query.QueryParameters);
                }

                subQueries[i] = GetLuceneQuery(documentsContext, query.Metadata, whereExpression, query.QueryParameters, _analyzer, getSpatialField);
            }

            //Not sure how to select the page size here??? The problem is that only docs in this search can be part
            //of the final result because we're doing an intersection query (but we might exclude some of them)
            var pageSize                    = GetPageSize(_searcher, query.PageSize);
            int pageSizeBestGuess           = GetPageSize(_searcher, ((long)query.Start + query.PageSize) * 2);
            int skippedResultsInCurrentLoop = 0;
            int previousBaseQueryMatches    = 0;

            var firstSubDocumentQuery = subQueries[0];
            var sort = GetSort(query, getSpatialField);

            using (var scope = new IndexQueryingScope(_indexType, query, fieldsToFetch, _searcher, retriever, _state))
            {
                //Do the first sub-query in the normal way, so that sorting, filtering etc is accounted for
                var search = ExecuteQuery(firstSubDocumentQuery, 0, pageSizeBestGuess, sort);
                var currentBaseQueryMatches = search.ScoreDocs.Length;
                var intersectionCollector   = new IntersectionCollector(_searcher, search.ScoreDocs, _state);

                int intersectMatches;
                do
                {
                    token.ThrowIfCancellationRequested();
                    if (skippedResultsInCurrentLoop > 0)
                    {
                        // We get here because out first attempt didn't get enough docs (after INTERSECTION was calculated)
                        pageSizeBestGuess = pageSizeBestGuess * 2;

                        search = ExecuteQuery(firstSubDocumentQuery, 0, pageSizeBestGuess, sort);
                        previousBaseQueryMatches = currentBaseQueryMatches;
                        currentBaseQueryMatches  = search.ScoreDocs.Length;
                        intersectionCollector    = new IntersectionCollector(_searcher, search.ScoreDocs, _state);
                    }

                    for (var i = 1; i < subQueries.Length; i++)
                    {
                        _searcher.Search(subQueries[i], null, intersectionCollector, _state);
                    }

                    var currentIntersectResults = intersectionCollector.DocumentsIdsForCount(subQueries.Length).ToList();
                    intersectMatches            = currentIntersectResults.Count;
                    skippedResultsInCurrentLoop = pageSizeBestGuess - intersectMatches;
                } while (intersectMatches < pageSize &&                       //stop if we've got enough results to satisfy the pageSize
                         currentBaseQueryMatches < search.TotalHits &&        //stop if increasing the page size wouldn't make any difference
                         previousBaseQueryMatches < currentBaseQueryMatches); //stop if increasing the page size didn't result in any more "base query" results

                var intersectResults = intersectionCollector.DocumentsIdsForCount(subQueries.Length).ToList();
                //It's hard to know what to do here, the TotalHits from the base search isn't really the TotalSize,
                //because it's before the INTERSECTION has been applied, so only some of those results make it out.
                //Trying to give an accurate answer is going to be too costly, so we aren't going to try.
                totalResults.Value   = search.TotalHits;
                skippedResults.Value = skippedResultsInCurrentLoop;

                //Using the final set of results in the intersectionCollector
                int returnedResults = 0;
                for (int i = query.Start; i < intersectResults.Count && (i - query.Start) < pageSizeBestGuess; i++)
                {
                    var indexResult = intersectResults[i];
                    var document    = _searcher.Doc(indexResult.LuceneId, _state);

                    if (retriever.TryGetKey(document, _state, out string key) && scope.WillProbablyIncludeInResults(key) == false)
                    {
                        skippedResults.Value++;
                        skippedResultsInCurrentLoop++;
                        continue;
                    }

                    var result = retriever.Get(document, indexResult.Score, _state);
                    if (scope.TryIncludeInResults(result) == false)
                    {
                        skippedResults.Value++;
                        skippedResultsInCurrentLoop++;
                        continue;
                    }

                    returnedResults++;
                    yield return(result);

                    if (returnedResults == pageSize)
                    {
                        yield break;
                    }
                }
            }
        }
Ejemplo n.º 5
0
        public IEnumerable <Document> MoreLikeThis(
            IndexQueryServerSide query,
            IQueryResultRetriever retriever,
            DocumentsOperationContext context,
            CancellationToken token)
        {
            IDisposable releaseServerContext          = null;
            IDisposable closeServerTransaction        = null;
            TransactionOperationContext serverContext = null;
            MoreLikeThisQuery           moreLikeThisQuery;

            try
            {
                if (query.Metadata.HasCmpXchg)
                {
                    releaseServerContext   = context.DocumentDatabase.ServerStore.ContextPool.AllocateOperationContext(out serverContext);
                    closeServerTransaction = serverContext.OpenReadTransaction();
                }

                using (closeServerTransaction)
                    moreLikeThisQuery = QueryBuilder.BuildMoreLikeThisQuery(serverContext, context, query.Metadata, query.Metadata.Query.Where, query.QueryParameters, _analyzer, _queryBuilderFactories);
            }
            finally
            {
                releaseServerContext?.Dispose();
            }

            var options = moreLikeThisQuery.Options != null?JsonDeserializationServer.MoreLikeThisOptions(moreLikeThisQuery.Options) : MoreLikeThisOptions.Default;

            HashSet <string> stopWords = null;

            if (string.IsNullOrWhiteSpace(options.StopWordsDocumentId) == false)
            {
                var stopWordsDoc = context.DocumentDatabase.DocumentsStorage.Get(context, options.StopWordsDocumentId);
                if (stopWordsDoc == null)
                {
                    throw new InvalidOperationException($"Stop words document {options.StopWordsDocumentId} could not be found");
                }

                if (stopWordsDoc.Data.TryGet(nameof(MoreLikeThisStopWords.StopWords), out BlittableJsonReaderArray value) && value != null)
                {
                    stopWords = new HashSet <string>(StringComparer.OrdinalIgnoreCase);
                    for (var i = 0; i < value.Length; i++)
                    {
                        stopWords.Add(value.GetStringByIndex(i));
                    }
                }
            }

            var ir  = _searcher.IndexReader;
            var mlt = new RavenMoreLikeThis(ir, options, _state);

            int?baseDocId = null;

            if (moreLikeThisQuery.BaseDocument == null)
            {
                var td = _searcher.Search(moreLikeThisQuery.BaseDocumentQuery, 1, _state);

                // get the current Lucene docid for the given RavenDB doc ID
                if (td.ScoreDocs.Length == 0)
                {
                    throw new InvalidOperationException("Given filtering expression did not yield any documents that could be used as a base of comparison");
                }

                baseDocId = td.ScoreDocs[0].Doc;
            }

            if (stopWords != null)
            {
                mlt.SetStopWords(stopWords);
            }

            string[] fieldNames;
            if (options.Fields != null && options.Fields.Length > 0)
            {
                fieldNames = options.Fields;
            }
            else
            {
                fieldNames = ir.GetFieldNames(IndexReader.FieldOption.INDEXED)
                             .Where(x => x != Constants.Documents.Indexing.Fields.DocumentIdFieldName && x != Constants.Documents.Indexing.Fields.ReduceKeyHashFieldName)
                             .ToArray();
            }

            mlt.SetFieldNames(fieldNames);
            mlt.Analyzer = _analyzer;

            var pageSize = GetPageSize(_searcher, query.PageSize);

            Query mltQuery;

            if (baseDocId.HasValue)
            {
                mltQuery = mlt.Like(baseDocId.Value);
            }
            else
            {
                using (var blittableJson = ParseJsonStringIntoBlittable(moreLikeThisQuery.BaseDocument, context))
                    mltQuery = mlt.Like(blittableJson);
            }

            var tsdc = TopScoreDocCollector.Create(pageSize, true);

            if (moreLikeThisQuery.FilterQuery != null && moreLikeThisQuery.FilterQuery is MatchAllDocsQuery == false)
            {
                mltQuery = new BooleanQuery
                {
                    { mltQuery, Occur.MUST },
                    { moreLikeThisQuery.FilterQuery, Occur.MUST }
                };
            }

            _searcher.Search(mltQuery, tsdc, _state);
            var hits = tsdc.TopDocs().ScoreDocs;

            var ids = new HashSet <string>(StringComparer.OrdinalIgnoreCase);

            foreach (var hit in hits)
            {
                if (hit.Doc == baseDocId)
                {
                    continue;
                }

                var doc = _searcher.Doc(hit.Doc, _state);
                var id  = doc.Get(Constants.Documents.Indexing.Fields.DocumentIdFieldName, _state) ?? doc.Get(Constants.Documents.Indexing.Fields.ReduceKeyHashFieldName, _state);
                if (id == null)
                {
                    continue;
                }

                if (ids.Add(id) == false)
                {
                    continue;
                }

                yield return(retriever.Get(doc, hit.Score, _state));
            }
        }
Ejemplo n.º 6
0
        public IEnumerable <QueryResult> Query(IndexQueryServerSide query, QueryTimingsScope queryTimings, FieldsToFetch fieldsToFetch, Reference <int> totalResults, Reference <int> skippedResults, Reference <int> scannedDocuments, IQueryResultRetriever retriever, DocumentsOperationContext documentsContext, Func <string, SpatialField> getSpatialField, CancellationToken token)
        {
            ExplanationOptions explanationOptions = null;

            var pageSize        = query.PageSize;
            var isDistinctCount = pageSize == 0 && query.Metadata.IsDistinct;

            if (isDistinctCount)
            {
                pageSize = int.MaxValue;
            }

            pageSize = GetPageSize(_searcher, pageSize);

            var docsToGet = pageSize;
            var position  = query.Start;

            QueryTimingsScope luceneScope       = null;
            QueryTimingsScope highlightingScope = null;
            QueryTimingsScope explanationsScope = null;

            if (queryTimings != null)
            {
                luceneScope       = queryTimings.For(nameof(QueryTimingsScope.Names.Lucene), start: false);
                highlightingScope = query.Metadata.HasHighlightings
                    ? queryTimings.For(nameof(QueryTimingsScope.Names.Highlightings), start: false)
                    : null;
                explanationsScope = query.Metadata.HasExplanations
                    ? queryTimings.For(nameof(QueryTimingsScope.Names.Explanations), start: false)
                    : null;
            }

            var returnedResults = 0;

            var luceneQuery = GetLuceneQuery(documentsContext, query.Metadata, query.QueryParameters, _analyzer, _queryBuilderFactories);

            using (var queryFilter = GetQueryFilter(_index, query, documentsContext, skippedResults, scannedDocuments, retriever, queryTimings))
                using (GetSort(query, _index, getSpatialField, documentsContext, out var sort))
                    using (var scope = new IndexQueryingScope(_indexType, query, fieldsToFetch, _searcher, retriever, _state))
                    {
                        if (query.Metadata.HasHighlightings)
                        {
                            using (highlightingScope?.For(nameof(QueryTimingsScope.Names.Setup)))
                                SetupHighlighter(query, luceneQuery, documentsContext);
                        }

                        while (true)
                        {
                            token.ThrowIfCancellationRequested();

                            TopDocs search;
                            using (luceneScope?.Start())
                                search = ExecuteQuery(luceneQuery, query.Start, docsToGet, sort);

                            totalResults.Value = search.TotalHits;

                            scope.RecordAlreadyPagedItemsInPreviousPage(search, token);

                            for (; position < search.ScoreDocs.Length && pageSize > 0; position++)
                            {
                                token.ThrowIfCancellationRequested();

                                var scoreDoc = search.ScoreDocs[position];

                                global::Lucene.Net.Documents.Document document;
                                using (luceneScope?.Start())
                                    document = _searcher.Doc(scoreDoc.Doc, _state);


                                if (retriever.TryGetKey(document, _state, out string key) && scope.WillProbablyIncludeInResults(key) == false)
                                {
                                    skippedResults.Value++;
                                    continue;
                                }

                                var filterResult = queryFilter?.Apply(document, key, _state);
                                if (filterResult is not null and not FilterResult.Accepted)
                                {
                                    if (filterResult is FilterResult.Skipped)
                                    {
                                        continue;
                                    }
                                    if (filterResult is FilterResult.LimitReached)
                                    {
                                        break;
                                    }
                                }

                                bool markedAsSkipped = false;
                                var  r = retriever.Get(document, scoreDoc, _state, token);
                                if (r.Document != null)
                                {
                                    var qr = CreateQueryResult(r.Document);
                                    if (qr.Result == null)
                                    {
                                        continue;
                                    }
                                    yield return(qr);
                                }
                                else if (r.List != null)
                                {
                                    int numberOfProjectedResults = 0;
                                    foreach (Document item in r.List)
                                    {
                                        var qr = CreateQueryResult(item);
                                        if (qr.Result == null)
                                        {
                                            continue;
                                        }
                                        yield return(qr);

                                        numberOfProjectedResults++;
                                    }

                                    if (numberOfProjectedResults > 1)
                                    {
                                        totalResults.Value += numberOfProjectedResults - 1;
                                    }
                                }
                                else
                                {
                                    skippedResults.Value++;
                                }

                                QueryResult CreateQueryResult(Document d)
                                {
                                    if (scope.TryIncludeInResults(d) == false)
                                    {
                                        d?.Dispose();

                                        if (markedAsSkipped == false)
                                        {
                                            skippedResults.Value++;
                                            markedAsSkipped = true;
                                        }

                                        return(default);
Ejemplo n.º 7
0
        public IEnumerable <Document> Query(IndexQueryServerSide query, FieldsToFetch fieldsToFetch, Reference <int> totalResults, Reference <int> skippedResults, IQueryResultRetriever retriever, CancellationToken token)
        {
            var docsToGet = query.PageSize;
            var position  = query.Start;

            var luceneQuery     = GetLuceneQuery(query.Query, query.DefaultOperator, query.DefaultField, _analyzer);
            var sort            = GetSort(query.SortedFields);
            var returnedResults = 0;

            using (var scope = new IndexQueryingScope(_indexType, query, fieldsToFetch, _searcher, retriever, _maxIndexOutputsPerDocument, _actualMaxIndexOutputsPerDocument))
            {
                while (true)
                {
                    token.ThrowIfCancellationRequested();

                    var search = ExecuteQuery(luceneQuery, query.Start, docsToGet, sort);

                    totalResults.Value = search.TotalHits;

                    scope.RecordAlreadyPagedItemsInPreviousPage(search);

                    for (; position < search.ScoreDocs.Length && query.PageSize > 0; position++)
                    {
                        token.ThrowIfCancellationRequested();

                        var scoreDoc = search.ScoreDocs[position];
                        var document = _searcher.Doc(scoreDoc.Doc);

                        string key;
                        if (retriever.TryGetKey(document, out key) && scope.WillProbablyIncludeInResults(key) == false)
                        {
                            skippedResults.Value++;
                            continue;
                        }

                        var result = retriever.Get(document, scoreDoc.Score);
                        if (scope.TryIncludeInResults(result) == false)
                        {
                            skippedResults.Value++;
                            continue;
                        }

                        returnedResults++;
                        yield return(result);

                        if (returnedResults == query.PageSize)
                        {
                            yield break;
                        }
                    }

                    if (scope.HasMultipleIndexOutputs)
                    {
                        docsToGet += (query.PageSize - returnedResults) * scope.MaxNumberOfIndexOutputs;
                    }
                    else
                    {
                        docsToGet += query.PageSize - returnedResults;
                    }

                    if (search.TotalHits == search.ScoreDocs.Length)
                    {
                        break;
                    }

                    if (returnedResults >= query.PageSize)
                    {
                        break;
                    }
                }
            }
        }
Ejemplo n.º 8
0
        public IEnumerable <Document> IntersectQuery(IndexQueryServerSide query, FieldsToFetch fieldsToFetch, Reference <int> totalResults, Reference <int> skippedResults, IQueryResultRetriever retriever, CancellationToken token)
        {
            var subQueries = query.Query.Split(IntersectSeparators, StringSplitOptions.RemoveEmptyEntries);

            if (subQueries.Length <= 1)
            {
                throw new InvalidOperationException("Invalid INTERSECT query, must have multiple intersect clauses.");
            }

            //Not sure how to select the page size here??? The problem is that only docs in this search can be part
            //of the final result because we're doing an intersection query (but we might exclude some of them)
            int pageSizeBestGuess = (query.Start + query.PageSize) * 2;
            int intersectMatches, skippedResultsInCurrentLoop = 0;
            int previousBaseQueryMatches = 0, currentBaseQueryMatches;

            var firstSubDocumentQuery = GetLuceneQuery(subQueries[0], query.DefaultOperator, query.DefaultField, _analyzer);
            var sort = GetSort(query.SortedFields);

            using (var scope = new IndexQueryingScope(_indexType, query, fieldsToFetch, _searcher, retriever, _maxIndexOutputsPerDocument, _actualMaxIndexOutputsPerDocument))
            {
                //Do the first sub-query in the normal way, so that sorting, filtering etc is accounted for
                var search = ExecuteQuery(firstSubDocumentQuery, 0, pageSizeBestGuess, sort);
                currentBaseQueryMatches = search.ScoreDocs.Length;
                var intersectionCollector = new IntersectionCollector(_searcher, search.ScoreDocs);

                do
                {
                    token.ThrowIfCancellationRequested();
                    if (skippedResultsInCurrentLoop > 0)
                    {
                        // We get here because out first attempt didn't get enough docs (after INTERSECTION was calculated)
                        pageSizeBestGuess = pageSizeBestGuess * 2;

                        search = ExecuteQuery(firstSubDocumentQuery, 0, pageSizeBestGuess, sort);
                        previousBaseQueryMatches = currentBaseQueryMatches;
                        currentBaseQueryMatches  = search.ScoreDocs.Length;
                        intersectionCollector    = new IntersectionCollector(_searcher, search.ScoreDocs);
                    }

                    for (var i = 1; i < subQueries.Length; i++)
                    {
                        var luceneSubQuery = GetLuceneQuery(subQueries[i], query.DefaultOperator, query.DefaultField, _analyzer);
                        _searcher.Search(luceneSubQuery, null, intersectionCollector);
                    }

                    var currentIntersectResults = intersectionCollector.DocumentsIdsForCount(subQueries.Length).ToList();
                    intersectMatches            = currentIntersectResults.Count;
                    skippedResultsInCurrentLoop = pageSizeBestGuess - intersectMatches;
                } while (intersectMatches < query.PageSize &&                 //stop if we've got enough results to satisfy the pageSize
                         currentBaseQueryMatches < search.TotalHits &&        //stop if increasing the page size wouldn't make any difference
                         previousBaseQueryMatches < currentBaseQueryMatches); //stop if increasing the page size didn't result in any more "base query" results

                var intersectResults = intersectionCollector.DocumentsIdsForCount(subQueries.Length).ToList();
                //It's hard to know what to do here, the TotalHits from the base search isn't really the TotalSize,
                //because it's before the INTERSECTION has been applied, so only some of those results make it out.
                //Trying to give an accurate answer is going to be too costly, so we aren't going to try.
                totalResults.Value   = search.TotalHits;
                skippedResults.Value = skippedResultsInCurrentLoop;

                //Using the final set of results in the intersectionCollector
                int returnedResults = 0;
                for (int i = query.Start; i < intersectResults.Count && (i - query.Start) < pageSizeBestGuess; i++)
                {
                    var indexResult = intersectResults[i];
                    var document    = _searcher.Doc(indexResult.LuceneId);

                    string key;
                    if (retriever.TryGetKey(document, out key) && scope.WillProbablyIncludeInResults(key) == false)
                    {
                        skippedResults.Value++;
                        skippedResultsInCurrentLoop++;
                        continue;
                    }

                    var result = retriever.Get(document, indexResult.Score);
                    if (scope.TryIncludeInResults(result) == false)
                    {
                        skippedResults.Value++;
                        skippedResultsInCurrentLoop++;
                        continue;
                    }

                    returnedResults++;
                    yield return(result);

                    if (returnedResults == query.PageSize)
                    {
                        yield break;
                    }
                }
            }
        }