// Delete by query private static long ApplyQueryDeletes(IEnumerable <QueryAndLimit> queriesIter, ReadersAndUpdates rld, SegmentReader reader) { long delCount = 0; AtomicReaderContext readerContext = reader.AtomicContext; bool any = false; foreach (QueryAndLimit ent in queriesIter) { Query query = ent.Query; int? limit = ent.Limit; DocIdSet docs = (new QueryWrapperFilter(query)).GetDocIdSet(readerContext, reader.LiveDocs); if (docs != null) { DocIdSetIterator it = docs.GetIterator(); if (it != null) { while (true) { int doc = it.NextDoc(); if (doc >= limit) { break; } if (!any) { rld.InitWritableLiveDocs(); any = true; } if (rld.Delete(doc)) { delCount++; } } } } } return(delCount); }
/// <summary> /// Used by near real-time search </summary> internal static DirectoryReader Open(IndexWriter writer, SegmentInfos infos, bool applyAllDeletes) { // IndexWriter synchronizes externally before calling // us, which ensures infos will not change; so there's // no need to process segments in reverse order int numSegments = infos.Count; IList <SegmentReader> readers = new List <SegmentReader>(); Directory dir = writer.Directory; SegmentInfos segmentInfos = (SegmentInfos)infos.Clone(); int infosUpto = 0; bool success = false; try { for (int i = 0; i < numSegments; i++) { // NOTE: important that we use infos not // segmentInfos here, so that we are passing the // actual instance of SegmentInfoPerCommit in // IndexWriter's segmentInfos: SegmentCommitInfo info = infos.Info(i); Debug.Assert(info.Info.Dir == dir); ReadersAndUpdates rld = writer.readerPool.Get(info, true); try { SegmentReader reader = rld.GetReadOnlyClone(IOContext.READ); if (reader.NumDocs > 0 || writer.KeepFullyDeletedSegments) { // Steal the ref: readers.Add(reader); infosUpto++; } else { reader.DecRef(); segmentInfos.Remove(infosUpto); } } finally { writer.readerPool.Release(rld); } } writer.IncRefDeleter(segmentInfos); StandardDirectoryReader result = new StandardDirectoryReader(dir, readers.ToArray(), writer, segmentInfos, writer.Config.ReaderTermsIndexDivisor, applyAllDeletes); success = true; return(result); } finally { if (!success) { foreach (SegmentReader r in readers) { try { r.DecRef(); } #pragma warning disable 168 catch (Exception th) #pragma warning restore 168 { // ignore any exception that is thrown here to not mask any original // exception. } } } } }
// Delete by Term private long ApplyTermDeletes(IEnumerable <Term> termsIter, ReadersAndUpdates rld, SegmentReader reader) { lock (this) { long delCount = 0; Fields fields = reader.Fields; if (fields == null) { // this reader has no postings return(0); } TermsEnum termsEnum = null; string currentField = null; DocsEnum docs = null; Debug.Assert(CheckDeleteTerm(null)); bool any = false; //System.out.println(Thread.currentThread().getName() + " del terms reader=" + reader); foreach (Term term in termsIter) { // Since we visit terms sorted, we gain performance // by re-using the same TermsEnum and seeking only // forwards if (!term.Field().Equals(currentField)) { Debug.Assert(currentField == null || currentField.CompareTo(term.Field()) < 0); currentField = term.Field(); Terms terms = fields.Terms(currentField); if (terms != null) { termsEnum = terms.Iterator(termsEnum); } else { termsEnum = null; } } if (termsEnum == null) { continue; } Debug.Assert(CheckDeleteTerm(term)); // System.out.println(" term=" + term); if (termsEnum.SeekExact(term.Bytes())) { // we don't need term frequencies for this DocsEnum docsEnum = termsEnum.Docs(rld.LiveDocs, docs, DocsEnum.FLAG_NONE); //System.out.println("BDS: got docsEnum=" + docsEnum); if (docsEnum != null) { while (true) { int docID = docsEnum.NextDoc(); //System.out.println(Thread.currentThread().getName() + " del term=" + term + " doc=" + docID); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } if (!any) { rld.InitWritableLiveDocs(); any = true; } // NOTE: there is no limit check on the docID // when deleting by Term (unlike by Query) // because on flush we apply all Term deletes to // each segment. So all Term deleting here is // against prior segments: if (rld.Delete(docID)) { delCount++; } } } } } return(delCount); } }
// DocValues updates private void ApplyDocValuesUpdates <T1>(IEnumerable <T1> updates, ReadersAndUpdates rld, SegmentReader reader, DocValuesFieldUpdates.Container dvUpdatesContainer) where T1 : DocValuesUpdate { lock (this) { Fields fields = reader.Fields; if (fields == null) { // this reader has no postings return; } // TODO: we can process the updates per DV field, from last to first so that // if multiple terms affect same document for the same field, we add an update // only once (that of the last term). To do that, we can keep a bitset which // marks which documents have already been updated. So e.g. if term T1 // updates doc 7, and then we process term T2 and it updates doc 7 as well, // we don't apply the update since we know T1 came last and therefore wins // the update. // We can also use that bitset as 'liveDocs' to pass to TermEnum.docs(), so // that these documents aren't even returned. string currentField = null; TermsEnum termsEnum = null; DocsEnum docs = null; //System.out.println(Thread.currentThread().getName() + " numericDVUpdate reader=" + reader); foreach (DocValuesUpdate update in updates) { Term term = update.Term; int limit = update.DocIDUpto; // TODO: we traverse the terms in update order (not term order) so that we // apply the updates in the correct order, i.e. if two terms udpate the // same document, the last one that came in wins, irrespective of the // terms lexical order. // we can apply the updates in terms order if we keep an updatesGen (and // increment it with every update) and attach it to each NumericUpdate. Note // that we cannot rely only on docIDUpto because an app may send two updates // which will get same docIDUpto, yet will still need to respect the order // those updates arrived. if (!term.Field().Equals(currentField)) { // if we change the code to process updates in terms order, enable this assert // assert currentField == null || currentField.compareTo(term.field()) < 0; currentField = term.Field(); Terms terms = fields.Terms(currentField); if (terms != null) { termsEnum = terms.Iterator(termsEnum); } else { termsEnum = null; continue; // no terms in that field } } if (termsEnum == null) { continue; } // System.out.println(" term=" + term); if (termsEnum.SeekExact(term.Bytes())) { // we don't need term frequencies for this DocsEnum docsEnum = termsEnum.Docs(rld.LiveDocs, docs, DocsEnum.FLAG_NONE); //System.out.println("BDS: got docsEnum=" + docsEnum); DocValuesFieldUpdates dvUpdates = dvUpdatesContainer.GetUpdates(update.Field, update.Type); if (dvUpdates == null) { dvUpdates = dvUpdatesContainer.NewUpdates(update.Field, update.Type, reader.MaxDoc); } int doc; while ((doc = docsEnum.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { //System.out.println(Thread.currentThread().getName() + " numericDVUpdate term=" + term + " doc=" + docID); if (doc >= limit) { break; // no more docs that can be updated for this term } dvUpdates.Add(doc, update.Value); } } } } }
/// <summary> /// Resolves the buffered deleted Term/Query/docIDs, into /// actual deleted docIDs in the liveDocs MutableBits for /// each SegmentReader. /// </summary> public virtual ApplyDeletesResult ApplyDeletesAndUpdates(IndexWriter.ReaderPool readerPool, IList <SegmentCommitInfo> infos) { lock (this) { long t0 = DateTime.Now.Millisecond; if (infos.Count == 0) { return(new ApplyDeletesResult(false, NextGen_Renamed++, null)); } Debug.Assert(CheckDeleteStats()); if (!Any()) { if (InfoStream.IsEnabled("BD")) { InfoStream.Message("BD", "applyDeletes: no deletes; skipping"); } return(new ApplyDeletesResult(false, NextGen_Renamed++, null)); } if (InfoStream.IsEnabled("BD")) { InfoStream.Message("BD", "applyDeletes: infos=" + infos + " packetCount=" + Updates.Count); } long gen = NextGen_Renamed++; List <SegmentCommitInfo> infos2 = new List <SegmentCommitInfo>(); infos2.AddRange(infos); infos2.Sort(sortSegInfoByDelGen); CoalescedUpdates coalescedUpdates = null; bool anyNewDeletes = false; int infosIDX = infos2.Count - 1; int delIDX = Updates.Count - 1; IList <SegmentCommitInfo> allDeleted = null; while (infosIDX >= 0) { //System.out.println("BD: cycle delIDX=" + delIDX + " infoIDX=" + infosIDX); FrozenBufferedUpdates packet = delIDX >= 0 ? Updates[delIDX] : null; SegmentCommitInfo info = infos2[infosIDX]; long segGen = info.BufferedDeletesGen; if (packet != null && segGen < packet.DelGen) { // System.out.println(" coalesce"); if (coalescedUpdates == null) { coalescedUpdates = new CoalescedUpdates(); } if (!packet.IsSegmentPrivate) { /* * Only coalesce if we are NOT on a segment private del packet: the segment private del packet * must only applied to segments with the same delGen. Yet, if a segment is already deleted * from the SI since it had no more documents remaining after some del packets younger than * its segPrivate packet (higher delGen) have been applied, the segPrivate packet has not been * removed. */ coalescedUpdates.Update(packet); } delIDX--; } else if (packet != null && segGen == packet.DelGen) { Debug.Assert(packet.IsSegmentPrivate, "Packet and Segments deletegen can only match on a segment private del packet gen=" + segGen); //System.out.println(" eq"); // Lock order: IW -> BD -> RP Debug.Assert(readerPool.InfoIsLive(info)); ReadersAndUpdates rld = readerPool.Get(info, true); SegmentReader reader = rld.GetReader(IOContext.READ); int delCount = 0; bool segAllDeletes; try { DocValuesFieldUpdates.Container dvUpdates = new DocValuesFieldUpdates.Container(); if (coalescedUpdates != null) { //System.out.println(" del coalesced"); delCount += (int)ApplyTermDeletes(coalescedUpdates.TermsIterable(), rld, reader); delCount += (int)ApplyQueryDeletes(coalescedUpdates.QueriesIterable(), rld, reader); ApplyDocValuesUpdates(coalescedUpdates.NumericDVUpdates, rld, reader, dvUpdates); ApplyDocValuesUpdates(coalescedUpdates.BinaryDVUpdates, rld, reader, dvUpdates); } //System.out.println(" del exact"); // Don't delete by Term here; DocumentsWriterPerThread // already did that on flush: delCount += (int)ApplyQueryDeletes(packet.QueriesIterable(), rld, reader); ApplyDocValuesUpdates(Arrays.AsList(packet.NumericDVUpdates), rld, reader, dvUpdates); ApplyDocValuesUpdates(Arrays.AsList(packet.BinaryDVUpdates), rld, reader, dvUpdates); if (dvUpdates.Any()) { rld.WriteFieldUpdates(info.Info.Dir, dvUpdates); } int fullDelCount = rld.Info.DelCount + rld.PendingDeleteCount; Debug.Assert(fullDelCount <= rld.Info.Info.DocCount); segAllDeletes = fullDelCount == rld.Info.Info.DocCount; } finally { rld.Release(reader); readerPool.Release(rld); } anyNewDeletes |= delCount > 0; if (segAllDeletes) { if (allDeleted == null) { allDeleted = new List <SegmentCommitInfo>(); } allDeleted.Add(info); } if (InfoStream.IsEnabled("BD")) { InfoStream.Message("BD", "seg=" + info + " segGen=" + segGen + " segDeletes=[" + packet + "]; coalesced deletes=[" + (coalescedUpdates == null ? "null" : coalescedUpdates.ToString()) + "] newDelCount=" + delCount + (segAllDeletes ? " 100% deleted" : "")); } if (coalescedUpdates == null) { coalescedUpdates = new CoalescedUpdates(); } /* * Since we are on a segment private del packet we must not * update the coalescedDeletes here! We can simply advance to the * next packet and seginfo. */ delIDX--; infosIDX--; info.BufferedDeletesGen = gen; } else { //System.out.println(" gt"); if (coalescedUpdates != null) { // Lock order: IW -> BD -> RP Debug.Assert(readerPool.InfoIsLive(info)); ReadersAndUpdates rld = readerPool.Get(info, true); SegmentReader reader = rld.GetReader(IOContext.READ); int delCount = 0; bool segAllDeletes; try { delCount += (int)ApplyTermDeletes(coalescedUpdates.TermsIterable(), rld, reader); delCount += (int)ApplyQueryDeletes(coalescedUpdates.QueriesIterable(), rld, reader); DocValuesFieldUpdates.Container dvUpdates = new DocValuesFieldUpdates.Container(); ApplyDocValuesUpdates(coalescedUpdates.NumericDVUpdates, rld, reader, dvUpdates); ApplyDocValuesUpdates(coalescedUpdates.BinaryDVUpdates, rld, reader, dvUpdates); if (dvUpdates.Any()) { rld.WriteFieldUpdates(info.Info.Dir, dvUpdates); } int fullDelCount = rld.Info.DelCount + rld.PendingDeleteCount; Debug.Assert(fullDelCount <= rld.Info.Info.DocCount); segAllDeletes = fullDelCount == rld.Info.Info.DocCount; } finally { rld.Release(reader); readerPool.Release(rld); } anyNewDeletes |= delCount > 0; if (segAllDeletes) { if (allDeleted == null) { allDeleted = new List <SegmentCommitInfo>(); } allDeleted.Add(info); } if (InfoStream.IsEnabled("BD")) { InfoStream.Message("BD", "seg=" + info + " segGen=" + segGen + " coalesced deletes=[" + coalescedUpdates + "] newDelCount=" + delCount + (segAllDeletes ? " 100% deleted" : "")); } } info.BufferedDeletesGen = gen; infosIDX--; } } Debug.Assert(CheckDeleteStats()); if (InfoStream.IsEnabled("BD")) { InfoStream.Message("BD", "applyDeletes took " + (DateTime.Now.Millisecond - t0) + " msec"); } // assert infos != segmentInfos || !any() : "infos=" + infos + " segmentInfos=" + segmentInfos + " any=" + any; return(new ApplyDeletesResult(anyNewDeletes, gen, allDeleted)); } }
/// <summary> /// Obtain a ReadersAndLiveDocs instance from the /// readerPool. If create is true, you must later call /// <seealso cref="#release(ReadersAndUpdates)"/>. /// </summary> public virtual ReadersAndUpdates Get(SegmentCommitInfo info, bool create) { lock (this) { Debug.Assert(info.Info.Dir == OuterInstance.directory, "info.dir=" + info.Info.Dir + " vs " + OuterInstance.directory); ReadersAndUpdates rld; ReaderMap.TryGetValue(info, out rld); if (rld == null) { if (!create) { return null; } rld = new ReadersAndUpdates(OuterInstance, info); // Steal initial reference: ReaderMap[info] = rld; } else { Debug.Assert(rld.Info == info, "Infos are not equal");//, "rld.info=" + rld.Info + " info=" + info + " isLive?=" + InfoIsLive(rld.Info) + " vs " + InfoIsLive(info)); } if (create) { // Return ref to caller: rld.IncRef(); } Debug.Assert(NoDups()); return rld; } }
public virtual void Release(ReadersAndUpdates rld, bool assertInfoLive) { lock (this) { // Matches incRef in get: rld.DecRef(); // Pool still holds a ref: Debug.Assert(rld.RefCount() >= 1); if (!OuterInstance.PoolReaders && rld.RefCount() == 1) { // this is the last ref to this RLD, and we're not // pooling, so remove it: // System.out.println("[" + Thread.currentThread().getName() + "] ReaderPool.release: " + rld.info); if (rld.WriteLiveDocs(OuterInstance.directory)) { // Make sure we only write del docs for a live segment: Debug.Assert(assertInfoLive == false || InfoIsLive(rld.Info)); // Must checkpoint because we just // created new _X_N.del and field updates files; // don't call IW.checkpoint because that also // increments SIS.version, which we do not want to // do here: it was done previously (after we // invoked BDS.applyDeletes), whereas here all we // did was move the state to disk: OuterInstance.CheckpointNoSIS(); } //System.out.println("IW: done writeLiveDocs for info=" + rld.info); // System.out.println("[" + Thread.currentThread().getName() + "] ReaderPool.release: drop readers " + rld.info); rld.DropReaders(); ReaderMap.Remove(rld.Info); } } }
public virtual void Release(ReadersAndUpdates rld) { lock (this) { Release(rld, true); } }
internal void Init(ReaderPool readerPool, MergePolicy.OneMerge merge, MergeState mergeState, bool initWritableLiveDocs) { if (MergedDeletesAndUpdates_Renamed == null) { MergedDeletesAndUpdates_Renamed = readerPool.Get(merge.Info_Renamed, true); DocMap = merge.GetDocMap(mergeState); Debug.Assert(DocMap.IsConsistent(merge.Info_Renamed.Info.DocCount)); } if (initWritableLiveDocs && !InitializedWritableLiveDocs) { MergedDeletesAndUpdates_Renamed.InitWritableLiveDocs(); this.InitializedWritableLiveDocs = true; } }