public IEnumerable <(Document Result, Dictionary <string, Dictionary <string, string[]> > Highlightings, ExplanationResult Explanation)> Query(IndexQueryServerSide query, QueryTimingsScope queryTimings, FieldsToFetch fieldsToFetch, Reference <int> totalResults, Reference <int> skippedResults, IQueryResultRetriever retriever, DocumentsOperationContext documentsContext, Func <string, SpatialField> getSpatialField, CancellationToken token) { ExplanationOptions explanationOptions = null; var pageSize = query.PageSize; var isDistinctCount = pageSize == 0 && query.Metadata.IsDistinct; if (isDistinctCount) { pageSize = int.MaxValue; } pageSize = GetPageSize(_searcher, pageSize); var docsToGet = pageSize; var position = query.Start; if (query.Metadata.IsOptimizedSortOnly && _index.Definition.HasDynamicFields == false) { foreach (var result in QuerySortOnly(query, retriever, position, pageSize, totalResults, token)) { yield return(result); } yield break; } QueryTimingsScope luceneScope = null; QueryTimingsScope highlightingScope = null; QueryTimingsScope explanationsScope = null; if (queryTimings != null) { luceneScope = queryTimings.For(nameof(QueryTimingsScope.Names.Lucene), start: false); highlightingScope = query.Metadata.HasHighlightings ? queryTimings.For(nameof(QueryTimingsScope.Names.Highlightings), start: false) : null; explanationsScope = query.Metadata.HasExplanations ? queryTimings.For(nameof(QueryTimingsScope.Names.Explanations), start: false) : null; } var returnedResults = 0; var luceneQuery = GetLuceneQuery(documentsContext, query.Metadata, query.QueryParameters, _analyzer, _queryBuilderFactories); var sort = GetSort(query, _index, getSpatialField, documentsContext); using (var scope = new IndexQueryingScope(_indexType, query, fieldsToFetch, _searcher, retriever, _state)) { if (query.Metadata.HasHighlightings) { using (highlightingScope?.For(nameof(QueryTimingsScope.Names.Setup))) SetupHighlighter(query, luceneQuery, documentsContext); } while (true) { token.ThrowIfCancellationRequested(); TopDocs search; using (luceneScope?.Start()) search = ExecuteQuery(luceneQuery, query.Start, docsToGet, sort); totalResults.Value = search.TotalHits; scope.RecordAlreadyPagedItemsInPreviousPage(search); for (; position < search.ScoreDocs.Length && pageSize > 0; position++) { token.ThrowIfCancellationRequested(); var scoreDoc = search.ScoreDocs[position]; global::Lucene.Net.Documents.Document document; using (luceneScope?.Start()) document = _searcher.Doc(scoreDoc.Doc, _state); if (retriever.TryGetKey(document, _state, out string key) && scope.WillProbablyIncludeInResults(key) == false) { skippedResults.Value++; continue; } var result = retriever.Get(document, scoreDoc.Score, _state); if (scope.TryIncludeInResults(result) == false) { skippedResults.Value++; continue; } returnedResults++; if (isDistinctCount == false) { Dictionary <string, Dictionary <string, string[]> > highlightings = null; if (query.Metadata.HasHighlightings) { using (highlightingScope?.Start()) highlightings = GetHighlighterResults(query, _searcher, scoreDoc, result, document, documentsContext); } ExplanationResult explanation = null; if (query.Metadata.HasExplanations) { using (explanationsScope?.Start()) { if (explanationOptions == null) { explanationOptions = query.Metadata.Explanation.GetOptions(documentsContext, query.QueryParameters); } explanation = GetQueryExplanations(explanationOptions, luceneQuery, _searcher, scoreDoc, result, document); } } yield return(result, highlightings, explanation); } if (returnedResults == pageSize) { yield break; } } if (search.TotalHits == search.ScoreDocs.Length) { break; } if (returnedResults >= pageSize) { break; } Debug.Assert(_maxNumberOfOutputsPerDocument > 0); docsToGet += GetPageSize(_searcher, (long)(pageSize - returnedResults) * _maxNumberOfOutputsPerDocument); } if (isDistinctCount) { totalResults.Value = returnedResults; } } }
public IEnumerable <Document> Query(IndexQueryServerSide query, FieldsToFetch fieldsToFetch, Reference <int> totalResults, Reference <int> skippedResults, IQueryResultRetriever retriever, JsonOperationContext documentsContext, Func <string, SpatialField> getSpatialField, CancellationToken token) { var pageSize = GetPageSize(_searcher, query.PageSize); var docsToGet = pageSize; var position = query.Start; var luceneQuery = GetLuceneQuery(documentsContext, query.Metadata, query.QueryParameters, _analyzer, getSpatialField); var sort = GetSort(query, getSpatialField); var returnedResults = 0; using (var scope = new IndexQueryingScope(_indexType, query, fieldsToFetch, _searcher, retriever, _state)) { while (true) { token.ThrowIfCancellationRequested(); var search = ExecuteQuery(luceneQuery, query.Start, docsToGet, sort); totalResults.Value = search.TotalHits; scope.RecordAlreadyPagedItemsInPreviousPage(search); for (; position < search.ScoreDocs.Length && pageSize > 0; position++) { token.ThrowIfCancellationRequested(); var scoreDoc = search.ScoreDocs[position]; var document = _searcher.Doc(scoreDoc.Doc, _state); if (retriever.TryGetKey(document, _state, out string key) && scope.WillProbablyIncludeInResults(key) == false) { skippedResults.Value++; continue; } var result = retriever.Get(document, scoreDoc.Score, _state); if (scope.TryIncludeInResults(result) == false) { skippedResults.Value++; continue; } returnedResults++; yield return(result); if (returnedResults == pageSize) { yield break; } } docsToGet += GetPageSize(_searcher, (long)(pageSize - returnedResults) * _maxNumberOfOutputsPerDocument); if (search.TotalHits == search.ScoreDocs.Length) { break; } if (returnedResults >= pageSize) { break; } } } }
public IEnumerable <Document> IntersectQuery(IndexQueryServerSide query, FieldsToFetch fieldsToFetch, Reference <int> totalResults, Reference <int> skippedResults, IQueryResultRetriever retriever, JsonOperationContext documentsContext, Func <string, SpatialField> getSpatialField, CancellationToken token) { var method = query.Metadata.Query.Where as MethodExpression; if (method == null) { throw new InvalidQueryException($"Invalid intersect query. WHERE clause must contains just an intersect() method call while it got {query.Metadata.Query.Where.Type} expression", query.Metadata.QueryText, query.QueryParameters); } var methodName = method.Name; if (string.Equals("intersect", methodName) == false) { throw new InvalidQueryException($"Invalid intersect query. WHERE clause must contains just a single intersect() method call while it got '{methodName}' method", query.Metadata.QueryText, query.QueryParameters); } if (method.Arguments.Count <= 1) { throw new InvalidQueryException("The valid intersect query must have multiple intersect clauses.", query.Metadata.QueryText, query.QueryParameters); } var subQueries = new Query[method.Arguments.Count]; for (var i = 0; i < subQueries.Length; i++) { var whereExpression = method.Arguments[i] as QueryExpression; if (whereExpression == null) { throw new InvalidQueryException($"Invalid intersect query. The intersect clause at position {i} isn't a valid expression", query.Metadata.QueryText, query.QueryParameters); } subQueries[i] = GetLuceneQuery(documentsContext, query.Metadata, whereExpression, query.QueryParameters, _analyzer, getSpatialField); } //Not sure how to select the page size here??? The problem is that only docs in this search can be part //of the final result because we're doing an intersection query (but we might exclude some of them) var pageSize = GetPageSize(_searcher, query.PageSize); int pageSizeBestGuess = GetPageSize(_searcher, ((long)query.Start + query.PageSize) * 2); int skippedResultsInCurrentLoop = 0; int previousBaseQueryMatches = 0; var firstSubDocumentQuery = subQueries[0]; var sort = GetSort(query, getSpatialField); using (var scope = new IndexQueryingScope(_indexType, query, fieldsToFetch, _searcher, retriever, _state)) { //Do the first sub-query in the normal way, so that sorting, filtering etc is accounted for var search = ExecuteQuery(firstSubDocumentQuery, 0, pageSizeBestGuess, sort); var currentBaseQueryMatches = search.ScoreDocs.Length; var intersectionCollector = new IntersectionCollector(_searcher, search.ScoreDocs, _state); int intersectMatches; do { token.ThrowIfCancellationRequested(); if (skippedResultsInCurrentLoop > 0) { // We get here because out first attempt didn't get enough docs (after INTERSECTION was calculated) pageSizeBestGuess = pageSizeBestGuess * 2; search = ExecuteQuery(firstSubDocumentQuery, 0, pageSizeBestGuess, sort); previousBaseQueryMatches = currentBaseQueryMatches; currentBaseQueryMatches = search.ScoreDocs.Length; intersectionCollector = new IntersectionCollector(_searcher, search.ScoreDocs, _state); } for (var i = 1; i < subQueries.Length; i++) { _searcher.Search(subQueries[i], null, intersectionCollector, _state); } var currentIntersectResults = intersectionCollector.DocumentsIdsForCount(subQueries.Length).ToList(); intersectMatches = currentIntersectResults.Count; skippedResultsInCurrentLoop = pageSizeBestGuess - intersectMatches; } while (intersectMatches < pageSize && //stop if we've got enough results to satisfy the pageSize currentBaseQueryMatches < search.TotalHits && //stop if increasing the page size wouldn't make any difference previousBaseQueryMatches < currentBaseQueryMatches); //stop if increasing the page size didn't result in any more "base query" results var intersectResults = intersectionCollector.DocumentsIdsForCount(subQueries.Length).ToList(); //It's hard to know what to do here, the TotalHits from the base search isn't really the TotalSize, //because it's before the INTERSECTION has been applied, so only some of those results make it out. //Trying to give an accurate answer is going to be too costly, so we aren't going to try. totalResults.Value = search.TotalHits; skippedResults.Value = skippedResultsInCurrentLoop; //Using the final set of results in the intersectionCollector int returnedResults = 0; for (int i = query.Start; i < intersectResults.Count && (i - query.Start) < pageSizeBestGuess; i++) { var indexResult = intersectResults[i]; var document = _searcher.Doc(indexResult.LuceneId, _state); if (retriever.TryGetKey(document, _state, out string key) && scope.WillProbablyIncludeInResults(key) == false) { skippedResults.Value++; skippedResultsInCurrentLoop++; continue; } var result = retriever.Get(document, indexResult.Score, _state); if (scope.TryIncludeInResults(result) == false) { skippedResults.Value++; skippedResultsInCurrentLoop++; continue; } returnedResults++; yield return(result); if (returnedResults == pageSize) { yield break; } } } }
public IEnumerable <QueryResult> Query(IndexQueryServerSide query, QueryTimingsScope queryTimings, FieldsToFetch fieldsToFetch, Reference <int> totalResults, Reference <int> skippedResults, Reference <int> scannedDocuments, IQueryResultRetriever retriever, DocumentsOperationContext documentsContext, Func <string, SpatialField> getSpatialField, CancellationToken token) { ExplanationOptions explanationOptions = null; var pageSize = query.PageSize; var isDistinctCount = pageSize == 0 && query.Metadata.IsDistinct; if (isDistinctCount) { pageSize = int.MaxValue; } pageSize = GetPageSize(_searcher, pageSize); var docsToGet = pageSize; var position = query.Start; QueryTimingsScope luceneScope = null; QueryTimingsScope highlightingScope = null; QueryTimingsScope explanationsScope = null; if (queryTimings != null) { luceneScope = queryTimings.For(nameof(QueryTimingsScope.Names.Lucene), start: false); highlightingScope = query.Metadata.HasHighlightings ? queryTimings.For(nameof(QueryTimingsScope.Names.Highlightings), start: false) : null; explanationsScope = query.Metadata.HasExplanations ? queryTimings.For(nameof(QueryTimingsScope.Names.Explanations), start: false) : null; } var returnedResults = 0; var luceneQuery = GetLuceneQuery(documentsContext, query.Metadata, query.QueryParameters, _analyzer, _queryBuilderFactories); using (var queryFilter = GetQueryFilter(_index, query, documentsContext, skippedResults, scannedDocuments, retriever, queryTimings)) using (GetSort(query, _index, getSpatialField, documentsContext, out var sort)) using (var scope = new IndexQueryingScope(_indexType, query, fieldsToFetch, _searcher, retriever, _state)) { if (query.Metadata.HasHighlightings) { using (highlightingScope?.For(nameof(QueryTimingsScope.Names.Setup))) SetupHighlighter(query, luceneQuery, documentsContext); } while (true) { token.ThrowIfCancellationRequested(); TopDocs search; using (luceneScope?.Start()) search = ExecuteQuery(luceneQuery, query.Start, docsToGet, sort); totalResults.Value = search.TotalHits; scope.RecordAlreadyPagedItemsInPreviousPage(search, token); for (; position < search.ScoreDocs.Length && pageSize > 0; position++) { token.ThrowIfCancellationRequested(); var scoreDoc = search.ScoreDocs[position]; global::Lucene.Net.Documents.Document document; using (luceneScope?.Start()) document = _searcher.Doc(scoreDoc.Doc, _state); if (retriever.TryGetKey(document, _state, out string key) && scope.WillProbablyIncludeInResults(key) == false) { skippedResults.Value++; continue; } var filterResult = queryFilter?.Apply(document, key, _state); if (filterResult is not null and not FilterResult.Accepted) { if (filterResult is FilterResult.Skipped) { continue; } if (filterResult is FilterResult.LimitReached) { break; } } bool markedAsSkipped = false; var r = retriever.Get(document, scoreDoc, _state, token); if (r.Document != null) { var qr = CreateQueryResult(r.Document); if (qr.Result == null) { continue; } yield return(qr); } else if (r.List != null) { int numberOfProjectedResults = 0; foreach (Document item in r.List) { var qr = CreateQueryResult(item); if (qr.Result == null) { continue; } yield return(qr); numberOfProjectedResults++; } if (numberOfProjectedResults > 1) { totalResults.Value += numberOfProjectedResults - 1; } } else { skippedResults.Value++; } QueryResult CreateQueryResult(Document d) { if (scope.TryIncludeInResults(d) == false) { d?.Dispose(); if (markedAsSkipped == false) { skippedResults.Value++; markedAsSkipped = true; } return(default);
public IEnumerable <Document> Query(IndexQueryServerSide query, FieldsToFetch fieldsToFetch, Reference <int> totalResults, Reference <int> skippedResults, IQueryResultRetriever retriever, CancellationToken token) { var docsToGet = query.PageSize; var position = query.Start; var luceneQuery = GetLuceneQuery(query.Query, query.DefaultOperator, query.DefaultField, _analyzer); var sort = GetSort(query.SortedFields); var returnedResults = 0; using (var scope = new IndexQueryingScope(_indexType, query, fieldsToFetch, _searcher, retriever, _maxIndexOutputsPerDocument, _actualMaxIndexOutputsPerDocument)) { while (true) { token.ThrowIfCancellationRequested(); var search = ExecuteQuery(luceneQuery, query.Start, docsToGet, sort); totalResults.Value = search.TotalHits; scope.RecordAlreadyPagedItemsInPreviousPage(search); for (; position < search.ScoreDocs.Length && query.PageSize > 0; position++) { token.ThrowIfCancellationRequested(); var scoreDoc = search.ScoreDocs[position]; var document = _searcher.Doc(scoreDoc.Doc); string key; if (retriever.TryGetKey(document, out key) && scope.WillProbablyIncludeInResults(key) == false) { skippedResults.Value++; continue; } var result = retriever.Get(document, scoreDoc.Score); if (scope.TryIncludeInResults(result) == false) { skippedResults.Value++; continue; } returnedResults++; yield return(result); if (returnedResults == query.PageSize) { yield break; } } if (scope.HasMultipleIndexOutputs) { docsToGet += (query.PageSize - returnedResults) * scope.MaxNumberOfIndexOutputs; } else { docsToGet += query.PageSize - returnedResults; } if (search.TotalHits == search.ScoreDocs.Length) { break; } if (returnedResults >= query.PageSize) { break; } } } }
public IEnumerable <Document> IntersectQuery(IndexQueryServerSide query, FieldsToFetch fieldsToFetch, Reference <int> totalResults, Reference <int> skippedResults, IQueryResultRetriever retriever, CancellationToken token) { var subQueries = query.Query.Split(IntersectSeparators, StringSplitOptions.RemoveEmptyEntries); if (subQueries.Length <= 1) { throw new InvalidOperationException("Invalid INTERSECT query, must have multiple intersect clauses."); } //Not sure how to select the page size here??? The problem is that only docs in this search can be part //of the final result because we're doing an intersection query (but we might exclude some of them) int pageSizeBestGuess = (query.Start + query.PageSize) * 2; int intersectMatches, skippedResultsInCurrentLoop = 0; int previousBaseQueryMatches = 0, currentBaseQueryMatches; var firstSubDocumentQuery = GetLuceneQuery(subQueries[0], query.DefaultOperator, query.DefaultField, _analyzer); var sort = GetSort(query.SortedFields); using (var scope = new IndexQueryingScope(_indexType, query, fieldsToFetch, _searcher, retriever, _maxIndexOutputsPerDocument, _actualMaxIndexOutputsPerDocument)) { //Do the first sub-query in the normal way, so that sorting, filtering etc is accounted for var search = ExecuteQuery(firstSubDocumentQuery, 0, pageSizeBestGuess, sort); currentBaseQueryMatches = search.ScoreDocs.Length; var intersectionCollector = new IntersectionCollector(_searcher, search.ScoreDocs); do { token.ThrowIfCancellationRequested(); if (skippedResultsInCurrentLoop > 0) { // We get here because out first attempt didn't get enough docs (after INTERSECTION was calculated) pageSizeBestGuess = pageSizeBestGuess * 2; search = ExecuteQuery(firstSubDocumentQuery, 0, pageSizeBestGuess, sort); previousBaseQueryMatches = currentBaseQueryMatches; currentBaseQueryMatches = search.ScoreDocs.Length; intersectionCollector = new IntersectionCollector(_searcher, search.ScoreDocs); } for (var i = 1; i < subQueries.Length; i++) { var luceneSubQuery = GetLuceneQuery(subQueries[i], query.DefaultOperator, query.DefaultField, _analyzer); _searcher.Search(luceneSubQuery, null, intersectionCollector); } var currentIntersectResults = intersectionCollector.DocumentsIdsForCount(subQueries.Length).ToList(); intersectMatches = currentIntersectResults.Count; skippedResultsInCurrentLoop = pageSizeBestGuess - intersectMatches; } while (intersectMatches < query.PageSize && //stop if we've got enough results to satisfy the pageSize currentBaseQueryMatches < search.TotalHits && //stop if increasing the page size wouldn't make any difference previousBaseQueryMatches < currentBaseQueryMatches); //stop if increasing the page size didn't result in any more "base query" results var intersectResults = intersectionCollector.DocumentsIdsForCount(subQueries.Length).ToList(); //It's hard to know what to do here, the TotalHits from the base search isn't really the TotalSize, //because it's before the INTERSECTION has been applied, so only some of those results make it out. //Trying to give an accurate answer is going to be too costly, so we aren't going to try. totalResults.Value = search.TotalHits; skippedResults.Value = skippedResultsInCurrentLoop; //Using the final set of results in the intersectionCollector int returnedResults = 0; for (int i = query.Start; i < intersectResults.Count && (i - query.Start) < pageSizeBestGuess; i++) { var indexResult = intersectResults[i]; var document = _searcher.Doc(indexResult.LuceneId); string key; if (retriever.TryGetKey(document, out key) && scope.WillProbablyIncludeInResults(key) == false) { skippedResults.Value++; skippedResultsInCurrentLoop++; continue; } var result = retriever.Get(document, indexResult.Score); if (scope.TryIncludeInResults(result) == false) { skippedResults.Value++; skippedResultsInCurrentLoop++; continue; } returnedResults++; yield return(result); if (returnedResults == query.PageSize) { yield break; } } } }