示例#1
0
        // Delete by query
        private static long ApplyQueryDeletes(IEnumerable <QueryAndLimit> queriesIter, ReadersAndUpdates rld, SegmentReader reader)
        {
            long delCount = 0;
            AtomicReaderContext readerContext = reader.AtomicContext;
            bool any = false;

            foreach (QueryAndLimit ent in queriesIter)
            {
                Query    query = ent.Query;
                int?     limit = ent.Limit;
                DocIdSet docs  = (new QueryWrapperFilter(query)).GetDocIdSet(readerContext, reader.LiveDocs);
                if (docs != null)
                {
                    DocIdSetIterator it = docs.GetIterator();
                    if (it != null)
                    {
                        while (true)
                        {
                            int doc = it.NextDoc();
                            if (doc >= limit)
                            {
                                break;
                            }

                            if (!any)
                            {
                                rld.InitWritableLiveDocs();
                                any = true;
                            }

                            if (rld.Delete(doc))
                            {
                                delCount++;
                            }
                        }
                    }
                }
            }

            return(delCount);
        }
        /// <summary>
        /// Used by near real-time search </summary>
        internal static DirectoryReader Open(IndexWriter writer, SegmentInfos infos, bool applyAllDeletes)
        {
            // IndexWriter synchronizes externally before calling
            // us, which ensures infos will not change; so there's
            // no need to process segments in reverse order
            int numSegments = infos.Count;

            IList <SegmentReader> readers = new List <SegmentReader>();
            Directory             dir     = writer.Directory;

            SegmentInfos segmentInfos = (SegmentInfos)infos.Clone();
            int          infosUpto    = 0;
            bool         success      = false;

            try
            {
                for (int i = 0; i < numSegments; i++)
                {
                    // NOTE: important that we use infos not
                    // segmentInfos here, so that we are passing the
                    // actual instance of SegmentInfoPerCommit in
                    // IndexWriter's segmentInfos:
                    SegmentCommitInfo info = infos.Info(i);
                    Debug.Assert(info.Info.Dir == dir);
                    ReadersAndUpdates rld = writer.readerPool.Get(info, true);
                    try
                    {
                        SegmentReader reader = rld.GetReadOnlyClone(IOContext.READ);
                        if (reader.NumDocs > 0 || writer.KeepFullyDeletedSegments)
                        {
                            // Steal the ref:
                            readers.Add(reader);
                            infosUpto++;
                        }
                        else
                        {
                            reader.DecRef();
                            segmentInfos.Remove(infosUpto);
                        }
                    }
                    finally
                    {
                        writer.readerPool.Release(rld);
                    }
                }

                writer.IncRefDeleter(segmentInfos);

                StandardDirectoryReader result = new StandardDirectoryReader(dir, readers.ToArray(), writer, segmentInfos, writer.Config.ReaderTermsIndexDivisor, applyAllDeletes);
                success = true;
                return(result);
            }
            finally
            {
                if (!success)
                {
                    foreach (SegmentReader r in readers)
                    {
                        try
                        {
                            r.DecRef();
                        }
#pragma warning disable 168
                        catch (Exception th)
#pragma warning restore 168
                        {
                            // ignore any exception that is thrown here to not mask any original
                            // exception.
                        }
                    }
                }
            }
        }
示例#3
0
        // Delete by Term
        private long ApplyTermDeletes(IEnumerable <Term> termsIter, ReadersAndUpdates rld, SegmentReader reader)
        {
            lock (this)
            {
                long   delCount = 0;
                Fields fields   = reader.Fields;
                if (fields == null)
                {
                    // this reader has no postings
                    return(0);
                }

                TermsEnum termsEnum = null;

                string   currentField = null;
                DocsEnum docs         = null;

                Debug.Assert(CheckDeleteTerm(null));

                bool any = false;

                //System.out.println(Thread.currentThread().getName() + " del terms reader=" + reader);
                foreach (Term term in termsIter)
                {
                    // Since we visit terms sorted, we gain performance
                    // by re-using the same TermsEnum and seeking only
                    // forwards
                    if (!term.Field().Equals(currentField))
                    {
                        Debug.Assert(currentField == null || currentField.CompareTo(term.Field()) < 0);
                        currentField = term.Field();
                        Terms terms = fields.Terms(currentField);
                        if (terms != null)
                        {
                            termsEnum = terms.Iterator(termsEnum);
                        }
                        else
                        {
                            termsEnum = null;
                        }
                    }

                    if (termsEnum == null)
                    {
                        continue;
                    }
                    Debug.Assert(CheckDeleteTerm(term));

                    // System.out.println("  term=" + term);

                    if (termsEnum.SeekExact(term.Bytes()))
                    {
                        // we don't need term frequencies for this
                        DocsEnum docsEnum = termsEnum.Docs(rld.LiveDocs, docs, DocsEnum.FLAG_NONE);
                        //System.out.println("BDS: got docsEnum=" + docsEnum);

                        if (docsEnum != null)
                        {
                            while (true)
                            {
                                int docID = docsEnum.NextDoc();
                                //System.out.println(Thread.currentThread().getName() + " del term=" + term + " doc=" + docID);
                                if (docID == DocIdSetIterator.NO_MORE_DOCS)
                                {
                                    break;
                                }
                                if (!any)
                                {
                                    rld.InitWritableLiveDocs();
                                    any = true;
                                }
                                // NOTE: there is no limit check on the docID
                                // when deleting by Term (unlike by Query)
                                // because on flush we apply all Term deletes to
                                // each segment.  So all Term deleting here is
                                // against prior segments:
                                if (rld.Delete(docID))
                                {
                                    delCount++;
                                }
                            }
                        }
                    }
                }

                return(delCount);
            }
        }
示例#4
0
        // DocValues updates
        private void ApplyDocValuesUpdates <T1>(IEnumerable <T1> updates, ReadersAndUpdates rld, SegmentReader reader, DocValuesFieldUpdates.Container dvUpdatesContainer) where T1 : DocValuesUpdate
        {
            lock (this)
            {
                Fields fields = reader.Fields;
                if (fields == null)
                {
                    // this reader has no postings
                    return;
                }

                // TODO: we can process the updates per DV field, from last to first so that
                // if multiple terms affect same document for the same field, we add an update
                // only once (that of the last term). To do that, we can keep a bitset which
                // marks which documents have already been updated. So e.g. if term T1
                // updates doc 7, and then we process term T2 and it updates doc 7 as well,
                // we don't apply the update since we know T1 came last and therefore wins
                // the update.
                // We can also use that bitset as 'liveDocs' to pass to TermEnum.docs(), so
                // that these documents aren't even returned.

                string    currentField = null;
                TermsEnum termsEnum    = null;
                DocsEnum  docs         = null;

                //System.out.println(Thread.currentThread().getName() + " numericDVUpdate reader=" + reader);
                foreach (DocValuesUpdate update in updates)
                {
                    Term term  = update.Term;
                    int  limit = update.DocIDUpto;

                    // TODO: we traverse the terms in update order (not term order) so that we
                    // apply the updates in the correct order, i.e. if two terms udpate the
                    // same document, the last one that came in wins, irrespective of the
                    // terms lexical order.
                    // we can apply the updates in terms order if we keep an updatesGen (and
                    // increment it with every update) and attach it to each NumericUpdate. Note
                    // that we cannot rely only on docIDUpto because an app may send two updates
                    // which will get same docIDUpto, yet will still need to respect the order
                    // those updates arrived.

                    if (!term.Field().Equals(currentField))
                    {
                        // if we change the code to process updates in terms order, enable this assert
                        //        assert currentField == null || currentField.compareTo(term.field()) < 0;
                        currentField = term.Field();
                        Terms terms = fields.Terms(currentField);
                        if (terms != null)
                        {
                            termsEnum = terms.Iterator(termsEnum);
                        }
                        else
                        {
                            termsEnum = null;
                            continue; // no terms in that field
                        }
                    }

                    if (termsEnum == null)
                    {
                        continue;
                    }
                    // System.out.println("  term=" + term);

                    if (termsEnum.SeekExact(term.Bytes()))
                    {
                        // we don't need term frequencies for this
                        DocsEnum docsEnum = termsEnum.Docs(rld.LiveDocs, docs, DocsEnum.FLAG_NONE);

                        //System.out.println("BDS: got docsEnum=" + docsEnum);

                        DocValuesFieldUpdates dvUpdates = dvUpdatesContainer.GetUpdates(update.Field, update.Type);
                        if (dvUpdates == null)
                        {
                            dvUpdates = dvUpdatesContainer.NewUpdates(update.Field, update.Type, reader.MaxDoc);
                        }
                        int doc;
                        while ((doc = docsEnum.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                        {
                            //System.out.println(Thread.currentThread().getName() + " numericDVUpdate term=" + term + " doc=" + docID);
                            if (doc >= limit)
                            {
                                break; // no more docs that can be updated for this term
                            }
                            dvUpdates.Add(doc, update.Value);
                        }
                    }
                }
            }
        }
示例#5
0
        /// <summary>
        /// Resolves the buffered deleted Term/Query/docIDs, into
        ///  actual deleted docIDs in the liveDocs MutableBits for
        ///  each SegmentReader.
        /// </summary>
        public virtual ApplyDeletesResult ApplyDeletesAndUpdates(IndexWriter.ReaderPool readerPool, IList <SegmentCommitInfo> infos)
        {
            lock (this)
            {
                long t0 = DateTime.Now.Millisecond;

                if (infos.Count == 0)
                {
                    return(new ApplyDeletesResult(false, NextGen_Renamed++, null));
                }

                Debug.Assert(CheckDeleteStats());

                if (!Any())
                {
                    if (InfoStream.IsEnabled("BD"))
                    {
                        InfoStream.Message("BD", "applyDeletes: no deletes; skipping");
                    }
                    return(new ApplyDeletesResult(false, NextGen_Renamed++, null));
                }

                if (InfoStream.IsEnabled("BD"))
                {
                    InfoStream.Message("BD", "applyDeletes: infos=" + infos + " packetCount=" + Updates.Count);
                }

                long gen = NextGen_Renamed++;

                List <SegmentCommitInfo> infos2 = new List <SegmentCommitInfo>();
                infos2.AddRange(infos);
                infos2.Sort(sortSegInfoByDelGen);

                CoalescedUpdates coalescedUpdates = null;
                bool             anyNewDeletes    = false;

                int infosIDX = infos2.Count - 1;
                int delIDX   = Updates.Count - 1;

                IList <SegmentCommitInfo> allDeleted = null;

                while (infosIDX >= 0)
                {
                    //System.out.println("BD: cycle delIDX=" + delIDX + " infoIDX=" + infosIDX);

                    FrozenBufferedUpdates packet = delIDX >= 0 ? Updates[delIDX] : null;
                    SegmentCommitInfo     info   = infos2[infosIDX];
                    long segGen = info.BufferedDeletesGen;

                    if (packet != null && segGen < packet.DelGen)
                    {
                        //        System.out.println("  coalesce");
                        if (coalescedUpdates == null)
                        {
                            coalescedUpdates = new CoalescedUpdates();
                        }
                        if (!packet.IsSegmentPrivate)
                        {
                            /*
                             * Only coalesce if we are NOT on a segment private del packet: the segment private del packet
                             * must only applied to segments with the same delGen.  Yet, if a segment is already deleted
                             * from the SI since it had no more documents remaining after some del packets younger than
                             * its segPrivate packet (higher delGen) have been applied, the segPrivate packet has not been
                             * removed.
                             */
                            coalescedUpdates.Update(packet);
                        }

                        delIDX--;
                    }
                    else if (packet != null && segGen == packet.DelGen)
                    {
                        Debug.Assert(packet.IsSegmentPrivate, "Packet and Segments deletegen can only match on a segment private del packet gen=" + segGen);
                        //System.out.println("  eq");

                        // Lock order: IW -> BD -> RP
                        Debug.Assert(readerPool.InfoIsLive(info));
                        ReadersAndUpdates rld    = readerPool.Get(info, true);
                        SegmentReader     reader = rld.GetReader(IOContext.READ);
                        int  delCount            = 0;
                        bool segAllDeletes;
                        try
                        {
                            DocValuesFieldUpdates.Container dvUpdates = new DocValuesFieldUpdates.Container();
                            if (coalescedUpdates != null)
                            {
                                //System.out.println("    del coalesced");
                                delCount += (int)ApplyTermDeletes(coalescedUpdates.TermsIterable(), rld, reader);
                                delCount += (int)ApplyQueryDeletes(coalescedUpdates.QueriesIterable(), rld, reader);
                                ApplyDocValuesUpdates(coalescedUpdates.NumericDVUpdates, rld, reader, dvUpdates);
                                ApplyDocValuesUpdates(coalescedUpdates.BinaryDVUpdates, rld, reader, dvUpdates);
                            }
                            //System.out.println("    del exact");
                            // Don't delete by Term here; DocumentsWriterPerThread
                            // already did that on flush:
                            delCount += (int)ApplyQueryDeletes(packet.QueriesIterable(), rld, reader);
                            ApplyDocValuesUpdates(Arrays.AsList(packet.NumericDVUpdates), rld, reader, dvUpdates);
                            ApplyDocValuesUpdates(Arrays.AsList(packet.BinaryDVUpdates), rld, reader, dvUpdates);
                            if (dvUpdates.Any())
                            {
                                rld.WriteFieldUpdates(info.Info.Dir, dvUpdates);
                            }
                            int fullDelCount = rld.Info.DelCount + rld.PendingDeleteCount;
                            Debug.Assert(fullDelCount <= rld.Info.Info.DocCount);
                            segAllDeletes = fullDelCount == rld.Info.Info.DocCount;
                        }
                        finally
                        {
                            rld.Release(reader);
                            readerPool.Release(rld);
                        }
                        anyNewDeletes |= delCount > 0;

                        if (segAllDeletes)
                        {
                            if (allDeleted == null)
                            {
                                allDeleted = new List <SegmentCommitInfo>();
                            }
                            allDeleted.Add(info);
                        }

                        if (InfoStream.IsEnabled("BD"))
                        {
                            InfoStream.Message("BD", "seg=" + info + " segGen=" + segGen + " segDeletes=[" + packet + "]; coalesced deletes=[" + (coalescedUpdates == null ? "null" : coalescedUpdates.ToString()) + "] newDelCount=" + delCount + (segAllDeletes ? " 100% deleted" : ""));
                        }

                        if (coalescedUpdates == null)
                        {
                            coalescedUpdates = new CoalescedUpdates();
                        }

                        /*
                         * Since we are on a segment private del packet we must not
                         * update the coalescedDeletes here! We can simply advance to the
                         * next packet and seginfo.
                         */
                        delIDX--;
                        infosIDX--;
                        info.BufferedDeletesGen = gen;
                    }
                    else
                    {
                        //System.out.println("  gt");

                        if (coalescedUpdates != null)
                        {
                            // Lock order: IW -> BD -> RP
                            Debug.Assert(readerPool.InfoIsLive(info));
                            ReadersAndUpdates rld    = readerPool.Get(info, true);
                            SegmentReader     reader = rld.GetReader(IOContext.READ);
                            int  delCount            = 0;
                            bool segAllDeletes;
                            try
                            {
                                delCount += (int)ApplyTermDeletes(coalescedUpdates.TermsIterable(), rld, reader);
                                delCount += (int)ApplyQueryDeletes(coalescedUpdates.QueriesIterable(), rld, reader);
                                DocValuesFieldUpdates.Container dvUpdates = new DocValuesFieldUpdates.Container();
                                ApplyDocValuesUpdates(coalescedUpdates.NumericDVUpdates, rld, reader, dvUpdates);
                                ApplyDocValuesUpdates(coalescedUpdates.BinaryDVUpdates, rld, reader, dvUpdates);
                                if (dvUpdates.Any())
                                {
                                    rld.WriteFieldUpdates(info.Info.Dir, dvUpdates);
                                }
                                int fullDelCount = rld.Info.DelCount + rld.PendingDeleteCount;
                                Debug.Assert(fullDelCount <= rld.Info.Info.DocCount);
                                segAllDeletes = fullDelCount == rld.Info.Info.DocCount;
                            }
                            finally
                            {
                                rld.Release(reader);
                                readerPool.Release(rld);
                            }
                            anyNewDeletes |= delCount > 0;

                            if (segAllDeletes)
                            {
                                if (allDeleted == null)
                                {
                                    allDeleted = new List <SegmentCommitInfo>();
                                }
                                allDeleted.Add(info);
                            }

                            if (InfoStream.IsEnabled("BD"))
                            {
                                InfoStream.Message("BD", "seg=" + info + " segGen=" + segGen + " coalesced deletes=[" + coalescedUpdates + "] newDelCount=" + delCount + (segAllDeletes ? " 100% deleted" : ""));
                            }
                        }
                        info.BufferedDeletesGen = gen;

                        infosIDX--;
                    }
                }

                Debug.Assert(CheckDeleteStats());
                if (InfoStream.IsEnabled("BD"))
                {
                    InfoStream.Message("BD", "applyDeletes took " + (DateTime.Now.Millisecond - t0) + " msec");
                }
                // assert infos != segmentInfos || !any() : "infos=" + infos + " segmentInfos=" + segmentInfos + " any=" + any;

                return(new ApplyDeletesResult(anyNewDeletes, gen, allDeleted));
            }
        }
示例#6
0
            /// <summary>
            /// Obtain a ReadersAndLiveDocs instance from the
            /// readerPool.  If create is true, you must later call
            /// <seealso cref="#release(ReadersAndUpdates)"/>.
            /// </summary>
            public virtual ReadersAndUpdates Get(SegmentCommitInfo info, bool create)
            {
                lock (this)
                {
                    Debug.Assert(info.Info.Dir == OuterInstance.directory, "info.dir=" + info.Info.Dir + " vs " + OuterInstance.directory);

                    ReadersAndUpdates rld;
                    ReaderMap.TryGetValue(info, out rld);
                    if (rld == null)
                    {
                        if (!create)
                        {
                            return null;
                        }
                        rld = new ReadersAndUpdates(OuterInstance, info);
                        // Steal initial reference:
                        ReaderMap[info] = rld;
                    }
                    else
                    {
                        Debug.Assert(rld.Info == info, "Infos are not equal");//, "rld.info=" + rld.Info + " info=" + info + " isLive?=" + InfoIsLive(rld.Info) + " vs " + InfoIsLive(info));
                    }

                    if (create)
                    {
                        // Return ref to caller:
                        rld.IncRef();
                    }

                    Debug.Assert(NoDups());

                    return rld;
                }
            }
示例#7
0
            public virtual void Release(ReadersAndUpdates rld, bool assertInfoLive)
            {
                lock (this)
                {
                    // Matches incRef in get:
                    rld.DecRef();

                    // Pool still holds a ref:
                    Debug.Assert(rld.RefCount() >= 1);

                    if (!OuterInstance.PoolReaders && rld.RefCount() == 1)
                    {
                        // this is the last ref to this RLD, and we're not
                        // pooling, so remove it:
                        //        System.out.println("[" + Thread.currentThread().getName() + "] ReaderPool.release: " + rld.info);
                        if (rld.WriteLiveDocs(OuterInstance.directory))
                        {
                            // Make sure we only write del docs for a live segment:
                            Debug.Assert(assertInfoLive == false || InfoIsLive(rld.Info));
                            // Must checkpoint because we just
                            // created new _X_N.del and field updates files;
                            // don't call IW.checkpoint because that also
                            // increments SIS.version, which we do not want to
                            // do here: it was done previously (after we
                            // invoked BDS.applyDeletes), whereas here all we
                            // did was move the state to disk:
                            OuterInstance.CheckpointNoSIS();
                        }
                        //System.out.println("IW: done writeLiveDocs for info=" + rld.info);

                        //        System.out.println("[" + Thread.currentThread().getName() + "] ReaderPool.release: drop readers " + rld.info);
                        rld.DropReaders();
                        ReaderMap.Remove(rld.Info);
                    }
                }
            }
示例#8
0
 public virtual void Release(ReadersAndUpdates rld)
 {
     lock (this)
     {
         Release(rld, true);
     }
 }
示例#9
0
 internal void Init(ReaderPool readerPool, MergePolicy.OneMerge merge, MergeState mergeState, bool initWritableLiveDocs)
 {
     if (MergedDeletesAndUpdates_Renamed == null)
     {
         MergedDeletesAndUpdates_Renamed = readerPool.Get(merge.Info_Renamed, true);
         DocMap = merge.GetDocMap(mergeState);
         Debug.Assert(DocMap.IsConsistent(merge.Info_Renamed.Info.DocCount));
     }
     if (initWritableLiveDocs && !InitializedWritableLiveDocs)
     {
         MergedDeletesAndUpdates_Renamed.InitWritableLiveDocs();
         this.InitializedWritableLiveDocs = true;
     }
 }