private IEnumerable <IndexingBatchForIndex> FilterIndexes(IList <IndexToWorkOn> indexesToWorkOn, List <JsonDocument> jsonDocs, Etag highestETagInBatch) { var last = jsonDocs.Last(); Debug.Assert(last.Etag != null); Debug.Assert(last.LastModified != null); var lastEtag = last.Etag; var lastModified = last.LastModified.Value; var documentRetriever = new DocumentRetriever(null, null, context.ReadTriggers, context.Database.InFlightTransactionalState); var filteredDocs = BackgroundTaskExecuter.Instance.Apply(context, jsonDocs, doc => { var filteredDoc = documentRetriever.ExecuteReadTriggers(doc, null, ReadOperation.Index); return(filteredDoc == null ? new { Doc = doc, Json = (object)new FilteredDocument(doc) } : new { Doc = filteredDoc, Json = JsonToExpando.Convert(doc.ToJson()) }); }); Log.Debug("After read triggers executed, {0} documents remained", filteredDocs.Count); var results = new IndexingBatchForIndex[indexesToWorkOn.Count]; var actions = new Action <IStorageActionsAccessor> [indexesToWorkOn.Count]; BackgroundTaskExecuter.Instance.ExecuteAll(context, indexesToWorkOn, (indexToWorkOn, i) => { var indexName = indexToWorkOn.Index.PublicName; var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexName); if (viewGenerator == null) { return; // probably deleted } var batch = new IndexingBatch(highestETagInBatch); foreach (var item in filteredDocs) { if (defaultPrefetchingBehavior.FilterDocuments(item.Doc) == false) { continue; } // did we already indexed this document in this index? var etag = item.Doc.Etag; if (etag == null) { continue; } // is the Raven-Entity-Name a match for the things the index executes on? if (viewGenerator.ForEntityNames.Count != 0 && viewGenerator.ForEntityNames.Contains(item.Doc.Metadata.Value <string>(Constants.RavenEntityName)) == false) { continue; } batch.Add(item.Doc, item.Json, defaultPrefetchingBehavior.ShouldSkipDeleteFromIndex(item.Doc)); if (batch.DateTime == null) { batch.DateTime = item.Doc.LastModified; } else { batch.DateTime = batch.DateTime > item.Doc.LastModified ? item.Doc.LastModified : batch.DateTime; } } if (batch.Docs.Count == 0) { Log.Debug("All documents have been filtered for {0}, no indexing will be performed, updating to {1}, {2}", indexName, lastEtag, lastModified); // we use it this way to batch all the updates together actions[i] = accessor => { accessor.Indexing.UpdateLastIndexed(indexToWorkOn.Index.indexId, lastEtag, lastModified); accessor.AfterStorageCommit += () => { indexToWorkOn.Index.EnsureIndexWriter(); indexToWorkOn.Index.Flush(lastEtag); }; }; return; } if (Log.IsDebugEnabled) { Log.Debug("Going to index {0} documents in {1}: ({2})", batch.Ids.Count, indexToWorkOn, string.Join(", ", batch.Ids)); } results[i] = new IndexingBatchForIndex { Batch = batch, IndexId = indexToWorkOn.IndexId, Index = indexToWorkOn.Index, LastIndexedEtag = indexToWorkOn.LastIndexedEtag }; }); transactionalStorage.Batch(actionsAccessor => { foreach (var action in actions) { if (action != null) { action(actionsAccessor); } } }); return(results.Where(x => x != null)); }
private IEnumerable<IndexingBatchForIndex> FilterIndexes(IList<IndexToWorkOn> indexesToWorkOn, List<JsonDocument> jsonDocs, Etag highestETagInBatch) { var last = jsonDocs.Last(); Debug.Assert(last.Etag != null); Debug.Assert(last.LastModified != null); var lastEtag = last.Etag; var lastModified = last.LastModified.Value; var documentRetriever = new DocumentRetriever(null, context.ReadTriggers, context.Database.InFlightTransactionalState); var filteredDocs = BackgroundTaskExecuter.Instance.Apply(context, jsonDocs, doc => { var filteredDoc = documentRetriever.ExecuteReadTriggers(doc, null, ReadOperation.Index); return filteredDoc == null ? new { Doc = doc, Json = (object)new FilteredDocument(doc) } : new { Doc = filteredDoc, Json = JsonToExpando.Convert(doc.ToJson()) }; }); Log.Debug("After read triggers executed, {0} documents remained", filteredDocs.Count); var results = new IndexingBatchForIndex[indexesToWorkOn.Count]; var actions = new Action<IStorageActionsAccessor>[indexesToWorkOn.Count]; BackgroundTaskExecuter.Instance.ExecuteAll(context, indexesToWorkOn, (indexToWorkOn, i) => { var indexName = indexToWorkOn.IndexName; var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexName); if (viewGenerator == null) return; // probably deleted var batch = new IndexingBatch(highestETagInBatch); foreach (var item in filteredDocs) { if (prefetchingBehavior.FilterDocuments(item.Doc) == false) continue; // did we already indexed this document in this index? var etag = item.Doc.Etag; if (etag == null) continue; // is the Raven-Entity-Name a match for the things the index executes on? if (viewGenerator.ForEntityNames.Count != 0 && viewGenerator.ForEntityNames.Contains(item.Doc.Metadata.Value<string>(Constants.RavenEntityName)) == false) { continue; } batch.Add(item.Doc, item.Json, prefetchingBehavior.ShouldSkipDeleteFromIndex(item.Doc)); if (batch.DateTime == null) batch.DateTime = item.Doc.LastModified; else batch.DateTime = batch.DateTime > item.Doc.LastModified ? item.Doc.LastModified : batch.DateTime; } if (batch.Docs.Count == 0) { Log.Debug("All documents have been filtered for {0}, no indexing will be performed, updating to {1}, {2}", indexName, lastEtag, lastModified); // we use it this way to batch all the updates together actions[i] = accessor => accessor.Indexing.UpdateLastIndexed(indexName, lastEtag, lastModified); return; } if (Log.IsDebugEnabled) { Log.Debug("Going to index {0} documents in {1}: ({2})", batch.Ids.Count, indexToWorkOn, string.Join(", ", batch.Ids)); } results[i] = new IndexingBatchForIndex { Batch = batch, IndexName = indexToWorkOn.IndexName, Index = indexToWorkOn.Index, LastIndexedEtag = indexToWorkOn.LastIndexedEtag }; }); transactionalStorage.Batch(actionsAccessor => { foreach (var action in actions) { if (action != null) action(actionsAccessor); } }); return results.Where(x => x != null); }
private IEnumerable<IndexingBatchForIndex> FilterIndexes(IList<IndexToWorkOn> indexesToWorkOn, List<JsonDocument> jsonDocs, Etag highestETagInBatch, out List<IndexToWorkOn> filteredOutIndexes) { var innerFilteredOutIndexes = new ConcurrentStack<IndexToWorkOn>(); var last = jsonDocs.Last(); Debug.Assert(last.Etag != null); Debug.Assert(last.LastModified != null); var lastEtag = last.Etag; var lastModified = last.LastModified.Value; var documentRetriever = new DocumentRetriever(null, null, context.ReadTriggers); var filteredDocs = BackgroundTaskExecuter.Instance.Apply(context, jsonDocs, doc => { var filteredDoc = documentRetriever.ExecuteReadTriggers(doc, ReadOperation.Index); return filteredDoc == null ? new { Doc = doc, Json = (object)new FilteredDocument(doc) } : new { Doc = filteredDoc, Json = JsonToExpando.Convert(doc.ToJson()) }; }); if ( Log.IsDebugEnabled ) Log.Debug("After read triggers executed, {0} documents remained", filteredDocs.Count); var results = new ConcurrentQueue<IndexingBatchForIndex>(); var actions = new ConcurrentQueue<Action<IStorageActionsAccessor>>(); context.Database.MappingThreadPool.ExecuteBatch(indexesToWorkOn, indexToWorkOn => { var indexName = indexToWorkOn.Index.PublicName; var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexName); if (viewGenerator == null) return; // probably deleted var batch = new IndexingBatch(highestETagInBatch); foreach (var filteredDoc in filteredDocs) { var doc = filteredDoc.Doc; var json = filteredDoc.Json; if (defaultPrefetchingBehavior.FilterDocuments(doc) == false || doc.Etag.CompareTo(indexToWorkOn.LastIndexedEtag) <= 0) continue; // did we already indexed this document in this index? var etag = doc.Etag; if (etag == null) continue; // is the Raven-Entity-Name a match for the things the index executes on? if (viewGenerator.ForEntityNames.Count != 0 && viewGenerator.ForEntityNames.Contains(doc.Metadata.Value<string>(Constants.RavenEntityName)) == false) { continue; } batch.Add(doc, json, defaultPrefetchingBehavior.ShouldSkipDeleteFromIndex(doc)); if (batch.DateTime == null) batch.DateTime = doc.LastModified; else batch.DateTime = batch.DateTime > doc.LastModified ? doc.LastModified : batch.DateTime; } if (batch.Docs.Count == 0) { if ( Log.IsDebugEnabled ) Log.Debug("All documents have been filtered for {0}, no indexing will be performed, updating to {1}, {2}", indexName, lastEtag, lastModified); // we use it this way to batch all the updates together if (indexToWorkOn.LastIndexedEtag.CompareTo(lastEtag) < 0) actions.Enqueue(accessor => { accessor.Indexing.UpdateLastIndexed(indexToWorkOn.Index.indexId, lastEtag, lastModified); accessor.AfterStorageCommit += () => { indexToWorkOn.Index.EnsureIndexWriter(); indexToWorkOn.Index.Flush(lastEtag); }; }); innerFilteredOutIndexes.Push(indexToWorkOn); context.MarkIndexFilteredOut(indexName); return; } if (Log.IsDebugEnabled) Log.Debug("Going to index {0} documents in {1}: ({2})", batch.Ids.Count, indexToWorkOn, string.Join(", ", batch.Ids)); results.Enqueue(new IndexingBatchForIndex { Batch = batch, IndexId = indexToWorkOn.IndexId, Index = indexToWorkOn.Index, LastIndexedEtag = indexToWorkOn.LastIndexedEtag }); }, description: string.Format("Filtering documents for {0} indexes", indexesToWorkOn.Count)); filteredOutIndexes = innerFilteredOutIndexes.ToList(); foreach (var action in actions) { bool keepTrying = true; for (int i = 0; i < 10 && keepTrying; i++) { keepTrying = false; transactionalStorage.Batch(actionsAccessor => { if (action != null) { try { action(actionsAccessor); } catch (Exception e) { if (actionsAccessor.IsWriteConflict(e)) { keepTrying = true; return; } throw; } } }); if (keepTrying) Thread.Sleep(11); } } return results.Where(x => x != null); }
private IEnumerable<Tuple<IndexToWorkOn, IndexingBatch>> FilterIndexes(IList<IndexToWorkOn> indexesToWorkOn, JsonDocument[] jsonDocs) { var last = jsonDocs.Last(); Debug.Assert(last.Etag != null); Debug.Assert(last.LastModified != null); var lastEtag = last.Etag.Value; var lastModified = last.LastModified.Value; var lastIndexedEtag = new ComparableByteArray(lastEtag.ToByteArray()); var documentRetriever = new DocumentRetriever(null, context.ReadTriggers); var filteredDocs = BackgroundTaskExecuter.Instance.Apply(jsonDocs, doc => { doc = documentRetriever.ExecuteReadTriggers(doc, null, ReadOperation.Index); return doc == null ? null : new {Doc = doc, Json = JsonToExpando.Convert(doc.ToJson())}; }); log.Debug("After read triggers executed, {0} documents remained", filteredDocs.Count); var results = new Tuple<IndexToWorkOn, IndexingBatch>[indexesToWorkOn.Count]; var actions = new Action<IStorageActionsAccessor>[indexesToWorkOn.Count]; BackgroundTaskExecuter.Instance.ExecuteAll(context.Configuration, scheduler, indexesToWorkOn, (indexToWorkOn, i) => { var indexLastInedexEtag = new ComparableByteArray(indexToWorkOn.LastIndexedEtag.ToByteArray()); if (indexLastInedexEtag.CompareTo(lastIndexedEtag) >= 0) return; var indexName = indexToWorkOn.IndexName; var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexName); if (viewGenerator == null) return; // probably deleted var batch = new IndexingBatch(); foreach (var item in filteredDocs) { // did we already indexed this document in this index? if (indexLastInedexEtag.CompareTo(new ComparableByteArray(item.Doc.Etag.Value.ToByteArray())) >= 0) continue; // is the Raven-Entity-Name a match for the things the index executes on? if (viewGenerator.ForEntityNames.Count != 0 && viewGenerator.ForEntityNames.Contains(item.Doc.Metadata.Value<string>(Constants.RavenEntityName)) == false) { continue; } batch.Add(item.Doc, item.Json); if (batch.DateTime == null) batch.DateTime = item.Doc.LastModified; else batch.DateTime = batch.DateTime > item.Doc.LastModified ? item.Doc.LastModified : batch.DateTime; } if (batch.Docs.Count == 0) { log.Debug("All documents have been filtered for {0}, no indexing will be performed, updating to {1}, {2}", indexName, lastEtag, lastModified); // we use it this way to batch all the updates together actions[i] = accessor => accessor.Indexing.UpdateLastIndexed(indexName, lastEtag, lastModified); return; } log.Debug("Going to index {0} documents in {1}", batch.Ids.Count, indexToWorkOn); results[i] = Tuple.Create(indexToWorkOn, batch); }); transactionalStorage.Batch(actionsAccessor => { foreach (var action in actions) { if (action != null) action(actionsAccessor); } }); return results.Where(x => x != null); }
private IEnumerable <Tuple <IndexToWorkOn, IndexingBatch> > FilterIndexes(IList <IndexToWorkOn> indexesToWorkOn, JsonDocument[] jsonDocs) { var last = jsonDocs.Last(); Debug.Assert(last.Etag != null); Debug.Assert(last.LastModified != null); var lastEtag = last.Etag.Value; var lastModified = last.LastModified.Value; var lastIndexedEtag = new ComparableByteArray(lastEtag.ToByteArray()); var documentRetriever = new DocumentRetriever(null, context.ReadTriggers); var filteredDocs = BackgroundTaskExecuter.Instance.Apply(jsonDocs, doc => { doc = documentRetriever.ExecuteReadTriggers(doc, null, ReadOperation.Index); return(doc == null ? null : new { Doc = doc, Json = JsonToExpando.Convert(doc.ToJson()) }); }); log.Debug("After read triggers executed, {0} documents remained", filteredDocs.Count); var results = new Tuple <IndexToWorkOn, IndexingBatch> [indexesToWorkOn.Count]; var actions = new Action <IStorageActionsAccessor> [indexesToWorkOn.Count]; BackgroundTaskExecuter.Instance.ExecuteAll(context.Configuration, scheduler, indexesToWorkOn, (indexToWorkOn, i) => { var indexLastInedexEtag = new ComparableByteArray(indexToWorkOn.LastIndexedEtag.ToByteArray()); if (indexLastInedexEtag.CompareTo(lastIndexedEtag) >= 0) { return; } var indexName = indexToWorkOn.IndexName; var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexName); if (viewGenerator == null) { return; // probably deleted } var batch = new IndexingBatch(); foreach (var item in filteredDocs) { // did we already indexed this document in this index? if (indexLastInedexEtag.CompareTo(new ComparableByteArray(item.Doc.Etag.Value.ToByteArray())) >= 0) { continue; } // is the Raven-Entity-Name a match for the things the index executes on? if (viewGenerator.ForEntityNames.Count != 0 && viewGenerator.ForEntityNames.Contains(item.Doc.Metadata.Value <string>(Constants.RavenEntityName)) == false) { continue; } batch.Add(item.Doc, item.Json); if (batch.DateTime == null) { batch.DateTime = item.Doc.LastModified; } else { batch.DateTime = batch.DateTime > item.Doc.LastModified ? item.Doc.LastModified : batch.DateTime; } } if (batch.Docs.Count == 0) { log.Debug("All documents have been filtered for {0}, no indexing will be performed, updating to {1}, {2}", indexName, lastEtag, lastModified); // we use it this way to batch all the updates together actions[i] = accessor => accessor.Indexing.UpdateLastIndexed(indexName, lastEtag, lastModified); return; } log.Debug("Going to index {0} documents in {1}", batch.Ids.Count, indexToWorkOn); results[i] = Tuple.Create(indexToWorkOn, batch); }); transactionalStorage.Batch(actionsAccessor => { foreach (var action in actions) { if (action != null) { action(actionsAccessor); } } }); return(results.Where(x => x != null)); }