private IEnumerable<IndexingBatchForIndex> FilterIndexes(IList<IndexToWorkOn> indexesToWorkOn, List<JsonDocument> jsonDocs, Etag highestETagInBatch) { var last = jsonDocs.Last(); Debug.Assert(last.Etag != null); Debug.Assert(last.LastModified != null); var lastEtag = last.Etag; var lastModified = last.LastModified.Value; var documentRetriever = new DocumentRetriever(null, context.ReadTriggers, context.Database.InFlightTransactionalState); var filteredDocs = BackgroundTaskExecuter.Instance.Apply(context, jsonDocs, doc => { var filteredDoc = documentRetriever.ExecuteReadTriggers(doc, null, ReadOperation.Index); return filteredDoc == null ? new { Doc = doc, Json = (object)new FilteredDocument(doc) } : new { Doc = filteredDoc, Json = JsonToExpando.Convert(doc.ToJson()) }; }); Log.Debug("After read triggers executed, {0} documents remained", filteredDocs.Count); var results = new IndexingBatchForIndex[indexesToWorkOn.Count]; var actions = new Action<IStorageActionsAccessor>[indexesToWorkOn.Count]; BackgroundTaskExecuter.Instance.ExecuteAll(context, indexesToWorkOn, (indexToWorkOn, i) => { var indexName = indexToWorkOn.IndexName; var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexName); if (viewGenerator == null) return; // probably deleted var batch = new IndexingBatch(highestETagInBatch); foreach (var item in filteredDocs) { if (prefetchingBehavior.FilterDocuments(item.Doc) == false) continue; // did we already indexed this document in this index? var etag = item.Doc.Etag; if (etag == null) continue; // is the Raven-Entity-Name a match for the things the index executes on? if (viewGenerator.ForEntityNames.Count != 0 && viewGenerator.ForEntityNames.Contains(item.Doc.Metadata.Value<string>(Constants.RavenEntityName)) == false) { continue; } batch.Add(item.Doc, item.Json, prefetchingBehavior.ShouldSkipDeleteFromIndex(item.Doc)); if (batch.DateTime == null) batch.DateTime = item.Doc.LastModified; else batch.DateTime = batch.DateTime > item.Doc.LastModified ? item.Doc.LastModified : batch.DateTime; } if (batch.Docs.Count == 0) { Log.Debug("All documents have been filtered for {0}, no indexing will be performed, updating to {1}, {2}", indexName, lastEtag, lastModified); // we use it this way to batch all the updates together actions[i] = accessor => accessor.Indexing.UpdateLastIndexed(indexName, lastEtag, lastModified); return; } if (Log.IsDebugEnabled) { Log.Debug("Going to index {0} documents in {1}: ({2})", batch.Ids.Count, indexToWorkOn, string.Join(", ", batch.Ids)); } results[i] = new IndexingBatchForIndex { Batch = batch, IndexName = indexToWorkOn.IndexName, Index = indexToWorkOn.Index, LastIndexedEtag = indexToWorkOn.LastIndexedEtag }; }); transactionalStorage.Batch(actionsAccessor => { foreach (var action in actions) { if (action != null) action(actionsAccessor); } }); return results.Where(x => x != null); }
private IEnumerable<Tuple<IndexToWorkOn, IndexingBatch>> FilterIndexes(IList<IndexToWorkOn> indexesToWorkOn, JsonDocument[] jsonDocs) { var last = jsonDocs.Last(); Debug.Assert(last.Etag != null); Debug.Assert(last.LastModified != null); var lastEtag = last.Etag.Value; var lastModified = last.LastModified.Value; var lastIndexedEtag = new ComparableByteArray(lastEtag.ToByteArray()); var documentRetriever = new DocumentRetriever(null, context.ReadTriggers); var filteredDocs = BackgroundTaskExecuter.Instance.Apply(jsonDocs, doc => { doc = documentRetriever.ExecuteReadTriggers(doc, null, ReadOperation.Index); return doc == null ? null : new {Doc = doc, Json = JsonToExpando.Convert(doc.ToJson())}; }); log.Debug("After read triggers executed, {0} documents remained", filteredDocs.Count); var results = new Tuple<IndexToWorkOn, IndexingBatch>[indexesToWorkOn.Count]; var actions = new Action<IStorageActionsAccessor>[indexesToWorkOn.Count]; BackgroundTaskExecuter.Instance.ExecuteAll(context.Configuration, scheduler, indexesToWorkOn, (indexToWorkOn, i) => { var indexLastInedexEtag = new ComparableByteArray(indexToWorkOn.LastIndexedEtag.ToByteArray()); if (indexLastInedexEtag.CompareTo(lastIndexedEtag) >= 0) return; var indexName = indexToWorkOn.IndexName; var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexName); if (viewGenerator == null) return; // probably deleted var batch = new IndexingBatch(); foreach (var item in filteredDocs) { // did we already indexed this document in this index? if (indexLastInedexEtag.CompareTo(new ComparableByteArray(item.Doc.Etag.Value.ToByteArray())) >= 0) continue; // is the Raven-Entity-Name a match for the things the index executes on? if (viewGenerator.ForEntityNames.Count != 0 && viewGenerator.ForEntityNames.Contains(item.Doc.Metadata.Value<string>(Constants.RavenEntityName)) == false) { continue; } batch.Add(item.Doc, item.Json); if (batch.DateTime == null) batch.DateTime = item.Doc.LastModified; else batch.DateTime = batch.DateTime > item.Doc.LastModified ? item.Doc.LastModified : batch.DateTime; } if (batch.Docs.Count == 0) { log.Debug("All documents have been filtered for {0}, no indexing will be performed, updating to {1}, {2}", indexName, lastEtag, lastModified); // we use it this way to batch all the updates together actions[i] = accessor => accessor.Indexing.UpdateLastIndexed(indexName, lastEtag, lastModified); return; } log.Debug("Going to index {0} documents in {1}", batch.Ids.Count, indexToWorkOn); results[i] = Tuple.Create(indexToWorkOn, batch); }); transactionalStorage.Batch(actionsAccessor => { foreach (var action in actions) { if (action != null) action(actionsAccessor); } }); return results.Where(x => x != null); }
private void IndexDocuments(IStorageActionsAccessor actions, string index, JsonDocument[] jsonDocs) { var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(index); if (viewGenerator == null) return; // index was deleted, probably var dateTime = jsonDocs.Min(x => x.LastModified) ?? DateTime.MinValue; var documentRetriever = new DocumentRetriever(null, context.ReadTriggers); try { log.DebugFormat("Indexing {0} documents for index: {1}", jsonDocs.Length, index); context.IndexStorage.Index(index, viewGenerator, jsonDocs .Select(doc => documentRetriever .ExecuteReadTriggers(doc, null, ReadOperation.Index)) .Where(doc => doc != null) .Select(x => JsonToExpando.Convert(x.ToJson())), context, actions, dateTime); } catch (Exception e) { if (actions.IsWriteConflict(e)) return; log.WarnFormat(e, "Failed to index documents for index: {0}", index); } }
public void GetDocuments(int start, int pageSize, Etag etag, CancellationToken token, Action<RavenJObject> addDocument) { TransactionalStorage.Batch(actions => { bool returnedDocs = false; while (true) { var documents = etag == null ? actions.Documents.GetDocumentsByReverseUpdateOrder(start, pageSize) : actions.Documents.GetDocumentsAfter(etag, pageSize, WorkContext.CancellationToken); var documentRetriever = new DocumentRetriever(actions, Database.ReadTriggers, Database.InFlightTransactionalState); int docCount = 0; foreach (var doc in documents) { docCount++; token.ThrowIfCancellationRequested(); if (etag != null) etag = doc.Etag; DocumentRetriever.EnsureIdInMetadata(doc); var nonAuthoritativeInformationBehavior = Database.InFlightTransactionalState.GetNonAuthoritativeInformationBehavior<JsonDocument>(null, doc.Key); var document = nonAuthoritativeInformationBehavior == null ? doc : nonAuthoritativeInformationBehavior(doc); document = documentRetriever .ExecuteReadTriggers(document, null, ReadOperation.Load); if (document == null) continue; addDocument(document.ToJson()); returnedDocs = true; } if (returnedDocs || docCount == 0) break; start += docCount; } }); }
public void GetDocumentsWithIdStartingWith(string idPrefix, string matches, string exclude, int start, int pageSize, CancellationToken token, ref int nextStart, Action<RavenJObject> addDoc, string transformer = null, Dictionary<string, RavenJToken> transformerParameters = null, string skipAfter = null) { if (idPrefix == null) throw new ArgumentNullException("idPrefix"); idPrefix = idPrefix.Trim(); var canPerformRapidPagination = nextStart > 0 && start == nextStart; var actualStart = canPerformRapidPagination ? start : 0; var addedDocs = 0; var matchedDocs = 0; TransactionalStorage.Batch( actions => { var docsToSkip = canPerformRapidPagination ? 0 : start; int docCount; AbstractTransformer storedTransformer = null; if (transformer != null) { storedTransformer = IndexDefinitionStorage.GetTransformer(transformer); if (storedTransformer == null) throw new InvalidOperationException("No transformer with the name: " + transformer); } do { docCount = 0; var docs = actions.Documents.GetDocumentsWithIdStartingWith(idPrefix, actualStart, pageSize, string.IsNullOrEmpty(skipAfter) ? null : skipAfter); var documentRetriever = new DocumentRetriever(actions, Database.ReadTriggers, Database.InFlightTransactionalState, transformerParameters); foreach (var doc in docs) { token.ThrowIfCancellationRequested(); docCount++; var keyTest = doc.Key.Substring(idPrefix.Length); if (!WildcardMatcher.Matches(matches, keyTest) || WildcardMatcher.MatchesExclusion(exclude, keyTest)) continue; DocumentRetriever.EnsureIdInMetadata(doc); var nonAuthoritativeInformationBehavior = Database.InFlightTransactionalState.GetNonAuthoritativeInformationBehavior<JsonDocument>(null, doc.Key); var document = nonAuthoritativeInformationBehavior != null ? nonAuthoritativeInformationBehavior(doc) : doc; document = documentRetriever.ExecuteReadTriggers(document, null, ReadOperation.Load); if (document == null) continue; matchedDocs++; if (matchedDocs <= docsToSkip) continue; token.ThrowIfCancellationRequested(); if (storedTransformer != null) { using (new CurrentTransformationScope(Database, documentRetriever)) { var transformed = storedTransformer.TransformResultsDefinition(new[] { new DynamicJsonObject(document.ToJson()) }) .Select(x => JsonExtensions.ToJObject(x)) .ToArray(); if (transformed.Length == 0) { throw new InvalidOperationException("The transform results function failed on a document: " + document.Key); } var transformedJsonDocument = new JsonDocument { Etag = document.Etag.HashWith(storedTransformer.GetHashCodeBytes()).HashWith(documentRetriever.Etag), NonAuthoritativeInformation = document.NonAuthoritativeInformation, LastModified = document.LastModified, DataAsJson = new RavenJObject { { "$values", new RavenJArray(transformed) } }, }; addDoc(transformedJsonDocument.ToJson()); } } else { addDoc(document.ToJson()); } addedDocs++; if (addedDocs >= pageSize) break; } actualStart += pageSize; } while (docCount > 0 && addedDocs < pageSize && actualStart > 0 && actualStart < int.MaxValue); }); if (addedDocs != pageSize) nextStart = start; // will mark as last page else if (canPerformRapidPagination) nextStart = start + matchedDocs; else nextStart = actualStart; }
public Etag GetDocumentsWithIdStartingWith(string idPrefix, int pageSize, Etag etag, CancellationToken token, Func<JsonDocument, bool> addDocument) { Etag lastDocumentReadEtag = null; TransactionalStorage.Batch(actions => { bool returnedDocs = false; while (true) { var documents = actions.Documents.GetDocumentsAfterWithIdStartingWith(etag, idPrefix, pageSize, token, timeout: TimeSpan.FromSeconds(2), lastProcessedDocument: x => lastDocumentReadEtag = x ); var documentRetriever = new DocumentRetriever(Database.Configuration, actions, Database.ReadTriggers); int docCount = 0; foreach (var doc in documents) { docCount++; token.ThrowIfCancellationRequested(); etag = doc.Etag; JsonDocument.EnsureIdInMetadata(doc); var nonAuthoritativeInformationBehavior = actions.InFlightStateSnapshot.GetNonAuthoritativeInformationBehavior<JsonDocument>(null, doc.Key); var document = nonAuthoritativeInformationBehavior == null ? doc : nonAuthoritativeInformationBehavior(doc); document = documentRetriever.ExecuteReadTriggers(document, null, ReadOperation.Load); if (document == null) continue; returnedDocs = true; Database.WorkContext.UpdateFoundWork(); bool canContinue = addDocument(document); if (!canContinue) break; } if (returnedDocs) break; // No document was found that matches the requested criteria if ( docCount == 0 ) { // If we had a failure happen, we update the etag as we don't need to process those documents again (no matches there anyways). if (lastDocumentReadEtag != null) etag = lastDocumentReadEtag; break; } } }); return etag; }
public Etag GetDocuments(int start, int pageSize, Etag etag, CancellationToken token, Func<JsonDocument, bool> addDocument) { Etag lastDocumentReadEtag = null; TransactionalStorage.Batch(actions => { bool returnedDocs = false; while (true) { var documents = etag == null ? actions.Documents.GetDocumentsByReverseUpdateOrder(start, pageSize) : actions.Documents.GetDocumentsAfter(etag, pageSize, token); var documentRetriever = new DocumentRetriever(Database.Configuration, actions, Database.ReadTriggers, Database.InFlightTransactionalState); int docCount = 0; foreach (var doc in documents) { docCount++; token.ThrowIfCancellationRequested(); if (etag != null) etag = doc.Etag; JsonDocument.EnsureIdInMetadata(doc); var nonAuthoritativeInformationBehavior = Database.InFlightTransactionalState.GetNonAuthoritativeInformationBehavior<JsonDocument>(null, doc.Key); var document = nonAuthoritativeInformationBehavior == null ? doc : nonAuthoritativeInformationBehavior(doc); document = documentRetriever.ExecuteReadTriggers(document, null, ReadOperation.Load); if (document == null) continue; returnedDocs = true; Database.WorkContext.UpdateFoundWork(); bool canContinue = addDocument(document); if (!canContinue) break; lastDocumentReadEtag = etag; } if (returnedDocs || docCount == 0) break; start += docCount; } }); return lastDocumentReadEtag; }
public Etag GetDocuments(int start, int pageSize, Etag etag, CancellationToken token, Func<JsonDocument, bool> addDocument, long? maxSize = null, TimeSpan? timeout = null) { Etag lastDocumentReadEtag = null; using (DocumentCacher.SkipSetDocumentsInDocumentCache()) TransactionalStorage.Batch(actions => { bool returnedDocs = false; while (true) { var documents = etag == null ? actions.Documents.GetDocumentsByReverseUpdateOrder(start, pageSize) : actions.Documents.GetDocumentsAfter(etag, pageSize, token, maxSize: maxSize, timeout: timeout); var documentRetriever = new DocumentRetriever(Database.Configuration, actions, Database.ReadTriggers); var docCount = 0; foreach (var doc in documents) { docCount++; token.ThrowIfCancellationRequested(); if (etag != null) etag = doc.Etag; JsonDocument.EnsureIdInMetadata(doc); var nonAuthoritativeInformationBehavior = actions.InFlightStateSnapshot.GetNonAuthoritativeInformationBehavior<JsonDocument>(null, doc.Key); var document = nonAuthoritativeInformationBehavior == null ? doc : nonAuthoritativeInformationBehavior(doc); document = documentRetriever.ExecuteReadTriggers(document, null, ReadOperation.Load); if (document == null) continue; returnedDocs = true; Database.WorkContext.UpdateFoundWork(); bool canContinue = addDocument(document); if (!canContinue) break; lastDocumentReadEtag = etag; } if (returnedDocs || docCount == 0) break; // No document was found that matches the requested criteria // If we had a failure happen, we update the etag as we don't need to process those documents again (no matches there anyways). if (lastDocumentReadEtag != null) etag = lastDocumentReadEtag; start += docCount; } }); return lastDocumentReadEtag; }