public override void SetUp()
 {
     base.SetUp();
     Dir = NewDirectory();
     DocHelper.SetupDoc(TestDoc);
     Info = DocHelper.WriteDoc(Random(), Dir, TestDoc);
 }
        protected internal override long Size(SegmentCommitInfo info)
        {
            int hourOfDay = Calendar.Hour;
            if (hourOfDay < 6 || hourOfDay > 20 || Random.Next(23) == 5)
            // its 5 o'clock somewhere
            {
                Drink.Drink_e[] values = Enum.GetValues(typeof(Drink.Drink_e)).Cast<Drink.Drink_e>().ToArray();
                // pick a random drink during the day
                Drink.Drink_e drink = values[Random.Next(values.Length - 1)];
                return (long)drink * info.SizeInBytes();
            }

            return info.SizeInBytes();
        }
 internal FlushedSegment(SegmentCommitInfo segmentInfo, FieldInfos fieldInfos, BufferedUpdates segmentUpdates, MutableBits liveDocs, int delCount)
 {
     this.SegmentInfo = segmentInfo;
     this.FieldInfos = fieldInfos;
     this.SegmentUpdates = segmentUpdates != null && segmentUpdates.Any() ? new FrozenBufferedUpdates(segmentUpdates, true) : null;
     this.LiveDocs = liveDocs;
     this.DelCount = delCount;
 }
Example #4
0
 /// <summary>
 /// Returns true if a new segment (regardless of its origin) should use the
 /// compound file format. The default implementation returns <code>true</code>
 /// iff the size of the given mergedInfo is less or equal to
 /// <seealso cref="#getMaxCFSSegmentSizeMB()"/> and the size is less or equal to the
 /// TotalIndexSize * <seealso cref="#getNoCFSRatio()"/> otherwise <code>false</code>.
 /// </summary>
 public virtual bool UseCompoundFile(SegmentInfos infos, SegmentCommitInfo mergedInfo)
 {
     if (NoCFSRatio == 0.0)
     {
         return false;
     }
     long mergedInfoSize = Size(mergedInfo);
     if (mergedInfoSize > MaxCFSSegmentSize)
     {
         return false;
     }
     if (NoCFSRatio >= 1.0)
     {
         return true;
     }
     long totalSize = 0;
     foreach (SegmentCommitInfo info in infos.Segments)
     {
         totalSize += Size(info);
     }
     return mergedInfoSize <= NoCFSRatio * totalSize;
 }
Example #5
0
 /// <summary>
 /// Returns true if this single info is already fully merged (has no
 ///  pending deletes, is in the same dir as the
 ///  writer, and matches the current compound file setting
 /// </summary>
 protected internal bool IsMerged(SegmentInfos infos, SegmentCommitInfo info)
 {
     IndexWriter w = Writer.Get();
     Debug.Assert(w != null);
     bool hasDeletions = w.NumDeletedDocs(info) > 0;
     return !hasDeletions && !info.Info.HasSeparateNorms() && info.Info.Dir == w.Directory && UseCompoundFile(infos, info) == info.Info.UseCompoundFile;
 }
Example #6
0
 /// <summary>
 /// Returns index of the provided {@link
 ///  SegmentCommitInfo}.
 ///
 /// <p><b>WARNING</b>: O(N) cost
 /// </summary>
 internal int IndexOf(SegmentCommitInfo si)
 {
     return segments.IndexOf(si);
 }
Example #7
0
 public override bool UseCompoundFile(SegmentInfos segments, SegmentCommitInfo newSegment)
 {
     return useCompoundFile;
 }
        public virtual ApplyDeletesResult ApplyDeletesAndUpdates(IndexWriter.ReaderPool readerPool, IList <SegmentCommitInfo> infos)
        {
            lock (this)
            {
                long t0 = Environment.TickCount;

                if (infos.Count == 0)
                {
                    return(new ApplyDeletesResult(false, nextGen++, null));
                }

                Debug.Assert(CheckDeleteStats());

                if (!Any())
                {
                    if (infoStream.IsEnabled("BD"))
                    {
                        infoStream.Message("BD", "applyDeletes: no deletes; skipping");
                    }
                    return(new ApplyDeletesResult(false, nextGen++, null));
                }

                if (infoStream.IsEnabled("BD"))
                {
                    infoStream.Message("BD", "applyDeletes: infos=" + Arrays.ToString(infos) + " packetCount=" + updates.Count);
                }

                long gen = nextGen++;

                List <SegmentCommitInfo> infos2 = new List <SegmentCommitInfo>();
                infos2.AddRange(infos);
                infos2.Sort(sortSegInfoByDelGen);

                CoalescedUpdates coalescedUpdates = null;
                bool             anyNewDeletes    = false;

                int infosIDX = infos2.Count - 1;
                int delIDX   = updates.Count - 1;

                IList <SegmentCommitInfo> allDeleted = null;

                while (infosIDX >= 0)
                {
                    //System.out.println("BD: cycle delIDX=" + delIDX + " infoIDX=" + infosIDX);

                    FrozenBufferedUpdates packet = delIDX >= 0 ? updates[delIDX] : null;
                    SegmentCommitInfo     info   = infos2[infosIDX];
                    long segGen = info.BufferedDeletesGen;

                    if (packet != null && segGen < packet.DelGen)
                    {
                        //        System.out.println("  coalesce");
                        if (coalescedUpdates == null)
                        {
                            coalescedUpdates = new CoalescedUpdates();
                        }
                        if (!packet.isSegmentPrivate)
                        {
                            /*
                             * Only coalesce if we are NOT on a segment private del packet: the segment private del packet
                             * must only applied to segments with the same delGen.  Yet, if a segment is already deleted
                             * from the SI since it had no more documents remaining after some del packets younger than
                             * its segPrivate packet (higher delGen) have been applied, the segPrivate packet has not been
                             * removed.
                             */
                            coalescedUpdates.Update(packet);
                        }

                        delIDX--;
                    }
                    else if (packet != null && segGen == packet.DelGen)
                    {
                        Debug.Assert(packet.isSegmentPrivate, "Packet and Segments deletegen can only match on a segment private del packet gen=" + segGen);
                        //System.out.println("  eq");

                        // Lock order: IW -> BD -> RP
                        Debug.Assert(readerPool.InfoIsLive(info));
                        ReadersAndUpdates rld    = readerPool.Get(info, true);
                        SegmentReader     reader = rld.GetReader(IOContext.READ);
                        int  delCount            = 0;
                        bool segAllDeletes;
                        try
                        {
                            DocValuesFieldUpdates.Container dvUpdates = new DocValuesFieldUpdates.Container();
                            if (coalescedUpdates != null)
                            {
                                //System.out.println("    del coalesced");
                                delCount += (int)ApplyTermDeletes(coalescedUpdates.TermsIterable(), rld, reader);
                                delCount += (int)ApplyQueryDeletes(coalescedUpdates.QueriesIterable(), rld, reader);
                                ApplyDocValuesUpdates(coalescedUpdates.numericDVUpdates, rld, reader, dvUpdates);
                                ApplyDocValuesUpdates(coalescedUpdates.binaryDVUpdates, rld, reader, dvUpdates);
                            }
                            //System.out.println("    del exact");
                            // Don't delete by Term here; DocumentsWriterPerThread
                            // already did that on flush:
                            delCount += (int)ApplyQueryDeletes(packet.GetQueriesEnumerable(), rld, reader);
                            ApplyDocValuesUpdates(Arrays.AsList(packet.numericDVUpdates), rld, reader, dvUpdates);
                            ApplyDocValuesUpdates(Arrays.AsList(packet.binaryDVUpdates), rld, reader, dvUpdates);
                            if (dvUpdates.Any())
                            {
                                rld.WriteFieldUpdates(info.Info.Dir, dvUpdates);
                            }
                            int fullDelCount = rld.Info.DelCount + rld.PendingDeleteCount;
                            Debug.Assert(fullDelCount <= rld.Info.Info.DocCount);
                            segAllDeletes = fullDelCount == rld.Info.Info.DocCount;
                        }
                        finally
                        {
                            rld.Release(reader);
                            readerPool.Release(rld);
                        }
                        anyNewDeletes |= delCount > 0;

                        if (segAllDeletes)
                        {
                            if (allDeleted == null)
                            {
                                allDeleted = new List <SegmentCommitInfo>();
                            }
                            allDeleted.Add(info);
                        }

                        if (infoStream.IsEnabled("BD"))
                        {
                            infoStream.Message("BD", "seg=" + info + " segGen=" + segGen + " segDeletes=[" + packet + "]; coalesced deletes=[" + (coalescedUpdates == null ? "null" : coalescedUpdates.ToString()) + "] newDelCount=" + delCount + (segAllDeletes ? " 100% deleted" : ""));
                        }

                        if (coalescedUpdates == null)
                        {
                            coalescedUpdates = new CoalescedUpdates();
                        }

                        /*
                         * Since we are on a segment private del packet we must not
                         * update the coalescedDeletes here! We can simply advance to the
                         * next packet and seginfo.
                         */
                        delIDX--;
                        infosIDX--;
                        info.SetBufferedDeletesGen(gen);
                    }
                    else
                    {
                        //System.out.println("  gt");

                        if (coalescedUpdates != null)
                        {
                            // Lock order: IW -> BD -> RP
                            Debug.Assert(readerPool.InfoIsLive(info));
                            ReadersAndUpdates rld    = readerPool.Get(info, true);
                            SegmentReader     reader = rld.GetReader(IOContext.READ);
                            int  delCount            = 0;
                            bool segAllDeletes;
                            try
                            {
                                delCount += (int)ApplyTermDeletes(coalescedUpdates.TermsIterable(), rld, reader);
                                delCount += (int)ApplyQueryDeletes(coalescedUpdates.QueriesIterable(), rld, reader);
                                DocValuesFieldUpdates.Container dvUpdates = new DocValuesFieldUpdates.Container();
                                ApplyDocValuesUpdates(coalescedUpdates.numericDVUpdates, rld, reader, dvUpdates);
                                ApplyDocValuesUpdates(coalescedUpdates.binaryDVUpdates, rld, reader, dvUpdates);
                                if (dvUpdates.Any())
                                {
                                    rld.WriteFieldUpdates(info.Info.Dir, dvUpdates);
                                }
                                int fullDelCount = rld.Info.DelCount + rld.PendingDeleteCount;
                                Debug.Assert(fullDelCount <= rld.Info.Info.DocCount);
                                segAllDeletes = fullDelCount == rld.Info.Info.DocCount;
                            }
                            finally
                            {
                                rld.Release(reader);
                                readerPool.Release(rld);
                            }
                            anyNewDeletes |= delCount > 0;

                            if (segAllDeletes)
                            {
                                if (allDeleted == null)
                                {
                                    allDeleted = new List <SegmentCommitInfo>();
                                }
                                allDeleted.Add(info);
                            }

                            if (infoStream.IsEnabled("BD"))
                            {
                                infoStream.Message("BD", "seg=" + info + " segGen=" + segGen + " coalesced deletes=[" + coalescedUpdates + "] newDelCount=" + delCount + (segAllDeletes ? " 100% deleted" : ""));
                            }
                        }
                        info.SetBufferedDeletesGen(gen);

                        infosIDX--;
                    }
                }

                Debug.Assert(CheckDeleteStats());
                if (infoStream.IsEnabled("BD"))
                {
                    infoStream.Message("BD", "applyDeletes took " + (Environment.TickCount - t0) + " msec");
                }
                // assert infos != segmentInfos || !any() : "infos=" + infos + " segmentInfos=" + segmentInfos + " any=" + any;

                return(new ApplyDeletesResult(anyNewDeletes, gen, allDeleted));
            }
        }
 public override bool UseCompoundFile(SegmentInfos segments, SegmentCommitInfo newSegment)
 {
     return @base.UseCompoundFile(segments, newSegment);
 }
Example #10
0
 public ReadersAndUpdates(IndexWriter writer, SegmentCommitInfo info)
 {
     this.Info      = info;
     this.writer    = writer;
     liveDocsShared = true;
 }
        public override void SetUp()
        {
            base.SetUp();

            /*
             * for (int i = 0; i < testFields.Length; i++) {
             * fieldInfos.Add(testFields[i], true, true, testFieldsStorePos[i], testFieldsStoreOff[i]);
             * }
             */

            Array.Sort(TestTerms);
            int tokenUpto = 0;

            for (int i = 0; i < TestTerms.Length; i++)
            {
                Positions[i] = new int[TERM_FREQ];
                // first position must be 0
                for (int j = 0; j < TERM_FREQ; j++)
                {
                    // positions are always sorted in increasing order
                    Positions[i][j] = (int)(j * 10 + new Random(1).NextDouble() * 10);
                    TestToken token = Tokens[tokenUpto++] = new TestToken(this);
                    token.Text        = TestTerms[i];
                    token.Pos         = Positions[i][j];
                    token.StartOffset = j * 10;
                    token.EndOffset   = j * 10 + TestTerms[i].Length;
                }
            }
            Array.Sort(Tokens);

            Dir = NewDirectory();
            IndexWriter writer = new IndexWriter(Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MyAnalyzer(this)).SetMaxBufferedDocs(-1).SetMergePolicy(NewLogMergePolicy(false, 10)).SetUseCompoundFile(false));

            Document doc = new Document();

            for (int i = 0; i < TestFields.Length; i++)
            {
                FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
                if (TestFieldsStorePos[i] && TestFieldsStoreOff[i])
                {
                    customType.StoreTermVectors         = true;
                    customType.StoreTermVectorPositions = true;
                    customType.StoreTermVectorOffsets   = true;
                }
                else if (TestFieldsStorePos[i] && !TestFieldsStoreOff[i])
                {
                    customType.StoreTermVectors         = true;
                    customType.StoreTermVectorPositions = true;
                }
                else if (!TestFieldsStorePos[i] && TestFieldsStoreOff[i])
                {
                    customType.StoreTermVectors       = true;
                    customType.StoreTermVectorOffsets = true;
                }
                else
                {
                    customType.StoreTermVectors = true;
                }
                doc.Add(new Field(TestFields[i], "", customType));
            }

            //Create 5 documents for testing, they all have the same
            //terms
            for (int j = 0; j < 5; j++)
            {
                writer.AddDocument(doc);
            }
            writer.Commit();
            Seg = writer.NewestSegment();
            writer.Dispose();

            FieldInfos = SegmentReader.ReadFieldInfos(Seg);
        }
Example #12
0
        public virtual void TestIndexAndMerge()
        {
            MemoryStream sw   = new MemoryStream();
            StreamWriter @out = new StreamWriter(sw);

            Directory directory = NewFSDirectory(IndexDir, null);

            MockDirectoryWrapper wrapper = directory as MockDirectoryWrapper;

            if (wrapper != null)
            {
                // We create unreferenced files (we don't even write
                // a segments file):
                wrapper.AssertNoUnrefencedFilesOnClose = false;
            }

            IndexWriter writer = new IndexWriter(directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode.CREATE).SetMaxBufferedDocs(-1).SetMergePolicy(NewLogMergePolicy(10)));

            SegmentCommitInfo si1 = IndexDoc(writer, "test.txt");

            PrintSegment(@out, si1);

            SegmentCommitInfo si2 = IndexDoc(writer, "test2.txt");

            PrintSegment(@out, si2);
            writer.Dispose();

            SegmentCommitInfo siMerge = Merge(directory, si1, si2, "_merge", false);

            PrintSegment(@out, siMerge);

            SegmentCommitInfo siMerge2 = Merge(directory, si1, si2, "_merge2", false);

            PrintSegment(@out, siMerge2);

            SegmentCommitInfo siMerge3 = Merge(directory, siMerge, siMerge2, "_merge3", false);

            PrintSegment(@out, siMerge3);

            directory.Dispose();
            @out.Dispose();
            sw.Dispose();

            string multiFileOutput = sw.ToString();

            //System.out.println(multiFileOutput);

            sw   = new MemoryStream();
            @out = new StreamWriter(sw);

            directory = NewFSDirectory(IndexDir, null);

            wrapper = directory as MockDirectoryWrapper;
            if (wrapper != null)
            {
                // We create unreferenced files (we don't even write
                // a segments file):
                wrapper.AssertNoUnrefencedFilesOnClose = false;
            }

            writer = new IndexWriter(directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode.CREATE).SetMaxBufferedDocs(-1).SetMergePolicy(NewLogMergePolicy(10)));

            si1 = IndexDoc(writer, "test.txt");
            PrintSegment(@out, si1);

            si2 = IndexDoc(writer, "test2.txt");
            PrintSegment(@out, si2);
            writer.Dispose();

            siMerge = Merge(directory, si1, si2, "_merge", true);
            PrintSegment(@out, siMerge);

            siMerge2 = Merge(directory, si1, si2, "_merge2", true);
            PrintSegment(@out, siMerge2);

            siMerge3 = Merge(directory, siMerge, siMerge2, "_merge3", true);
            PrintSegment(@out, siMerge3);

            directory.Dispose();
            @out.Dispose();
            sw.Dispose();
            string singleFileOutput = sw.ToString();

            Assert.AreEqual(multiFileOutput, singleFileOutput);
        }
Example #13
0
        internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor)
        {
            fieldsReaderLocal = new DisposableThreadLocal <StoredFieldsReader>(()
                                                                               => (StoredFieldsReader)fieldsReaderOrig.Clone());
            termVectorsLocal = new DisposableThreadLocal <TermVectorsReader>(()
                                                                             => (termVectorsReaderOrig == null) ? null : (TermVectorsReader)termVectorsReaderOrig.Clone());

            if (termsIndexDivisor == 0)
            {
                throw new ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)");
            }

            Codec     codec = si.Info.Codec;
            Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory.

            bool success = false;

            try
            {
                if (si.Info.UseCompoundFile)
                {
                    cfsDir = cfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false);
                }
                else
                {
                    cfsReader = null;
                    cfsDir    = dir;
                }

                FieldInfos fieldInfos = owner.FieldInfos;

                this.termsIndexDivisor = termsIndexDivisor;
                PostingsFormat   format           = codec.PostingsFormat;
                SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor);
                // Ask codec for its Fields
                fields = format.FieldsProducer(segmentReadState);
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(fields != null);
                }
                // ask codec for its Norms:
                // TODO: since we don't write any norms file if there are no norms,
                // kinda jaky to assume the codec handles the case of no norms file at all gracefully?!

                if (fieldInfos.HasNorms)
                {
                    normsProducer = codec.NormsFormat.NormsProducer(segmentReadState);
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(normsProducer != null);
                    }
                }
                else
                {
                    normsProducer = null;
                }

                fieldsReaderOrig = si.Info.Codec.StoredFieldsFormat.FieldsReader(cfsDir, si.Info, fieldInfos, context);

                if (fieldInfos.HasVectors) // open term vector files only as needed
                {
                    termVectorsReaderOrig = si.Info.Codec.TermVectorsFormat.VectorsReader(cfsDir, si.Info, fieldInfos, context);
                }
                else
                {
                    termVectorsReaderOrig = null;
                }

                success = true;
            }
            finally
            {
                if (!success)
                {
                    DecRef();
                }
            }
        }
 public override bool UseCompoundFile(SegmentInfos infos, SegmentCommitInfo mergedInfo)
 {
     // 80% of the time we create CFS:
     return(random.Next(5) != 1);
 }
Example #15
0
 /// <summary>
 /// Returns if the given segment should be upgraded. The default implementation
 /// will return {@code !Constants.LUCENE_MAIN_VERSION.equals(si.getVersion())},
 /// so all segments created with a different version number than this Lucene version will
 /// get upgraded.
 /// </summary>
 protected internal virtual bool ShouldUpgradeSegment(SegmentCommitInfo si)
 {
     return(!Constants.LUCENE_MAIN_VERSION.Equals(si.Info.Version));
 }
Example #16
0
 public override bool UseCompoundFile(SegmentInfos segments, SegmentCommitInfo newSegment)
 {
     return(@base.UseCompoundFile(segments, newSegment));
 }
Example #17
0
 public ReadersAndUpdates(IndexWriter writer, SegmentCommitInfo info)
 {
     this.Info = info;
     this.Writer = writer;
     LiveDocsShared = true;
 }
        /// <summary>
        /// Used by near real-time search </summary>
        internal static DirectoryReader Open(IndexWriter writer, SegmentInfos infos, bool applyAllDeletes)
        {
            // IndexWriter synchronizes externally before calling
            // us, which ensures infos will not change; so there's
            // no need to process segments in reverse order
            int numSegments = infos.Count;

            IList <SegmentReader> readers = new List <SegmentReader>();
            Directory             dir     = writer.Directory;

            SegmentInfos segmentInfos = (SegmentInfos)infos.Clone();
            int          infosUpto    = 0;
            bool         success      = false;

            try
            {
                for (int i = 0; i < numSegments; i++)
                {
                    // NOTE: important that we use infos not
                    // segmentInfos here, so that we are passing the
                    // actual instance of SegmentInfoPerCommit in
                    // IndexWriter's segmentInfos:
                    SegmentCommitInfo info = infos.Info(i);
                    Debug.Assert(info.Info.Dir == dir);
                    ReadersAndUpdates rld = writer.readerPool.Get(info, true);
                    try
                    {
                        SegmentReader reader = rld.GetReadOnlyClone(IOContext.READ);
                        if (reader.NumDocs > 0 || writer.KeepFullyDeletedSegments)
                        {
                            // Steal the ref:
                            readers.Add(reader);
                            infosUpto++;
                        }
                        else
                        {
                            reader.DecRef();
                            segmentInfos.Remove(infosUpto);
                        }
                    }
                    finally
                    {
                        writer.readerPool.Release(rld);
                    }
                }

                writer.IncRefDeleter(segmentInfos);

                StandardDirectoryReader result = new StandardDirectoryReader(dir, readers.ToArray(), writer, segmentInfos, writer.Config.ReaderTermsIndexDivisor, applyAllDeletes);
                success = true;
                return(result);
            }
            finally
            {
                if (!success)
                {
                    foreach (SegmentReader r in readers)
                    {
                        try
                        {
                            r.DecRef();
                        }
#pragma warning disable 168
                        catch (Exception th)
#pragma warning restore 168
                        {
                            // ignore any exception that is thrown here to not mask any original
                            // exception.
                        }
                    }
                }
            }
        }
Example #19
0
        /// <summary>
        /// Merges the provided indexes into this index.
        ///
        /// <p>
        /// The provided IndexReaders are not closed.
        ///
        /// <p>
        /// See <seealso cref="#addIndexes"/> for details on transactional semantics, temporary
        /// free space required in the Directory, and non-CFS segments on an Exception.
        ///
        /// <p>
        /// <b>NOTE</b>: if this method hits an OutOfMemoryError you should immediately
        /// close the writer. See <a href="#OOME">above</a> for details.
        ///
        /// <p>
        /// <b>NOTE:</b> empty segments are dropped by this method and not added to this
        /// index.
        ///
        /// <p>
        /// <b>NOTE:</b> this method merges all given <seealso cref="IndexReader"/>s in one
        /// merge. If you intend to merge a large number of readers, it may be better
        /// to call this method multiple times, each time with a small set of readers.
        /// In principle, if you use a merge policy with a {@code mergeFactor} or
        /// {@code maxMergeAtOnce} parameter, you should pass that many readers in one
        /// call. Also, if the given readers are <seealso cref="DirectoryReader"/>s, they can be
        /// opened with {@code termIndexInterval=-1} to save RAM, since during merge
        /// the in-memory structure is not used. See
        /// <seealso cref="DirectoryReader#open(Directory, int)"/>.
        ///
        /// <p>
        /// <b>NOTE</b>: if you call <seealso cref="#close(boolean)"/> with <tt>false</tt>, which
        /// aborts all running merges, then any thread still running this method might
        /// hit a <seealso cref="MergePolicy.MergeAbortedException"/>.
        /// </summary>
        /// <exception cref="CorruptIndexException">
        ///           if the index is corrupt </exception>
        /// <exception cref="IOException">
        ///           if there is a low-level IO error </exception>
        public virtual void AddIndexes(params IndexReader[] readers)
        {
            EnsureOpen();
            int numDocs = 0;

            try
            {
                if (infoStream.IsEnabled("IW"))
                {
                    infoStream.Message("IW", "flush at addIndexes(IndexReader...)");
                }
                Flush(false, true);

                string mergedName = NewSegmentName();
                IList<AtomicReader> mergeReaders = new List<AtomicReader>();
                foreach (IndexReader indexReader in readers)
                {
                    numDocs += indexReader.NumDocs();
                    foreach (AtomicReaderContext ctx in indexReader.Leaves())
                    {
                        mergeReaders.Add(ctx.AtomicReader);
                    }
                }

                IOContext context = new IOContext(new MergeInfo(numDocs, -1, true, -1));

                // TODO: somehow we should fix this merge so it's
                // abortable so that IW.close(false) is able to stop it
                TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory);

                SegmentInfo info = new SegmentInfo(directory, Constants.LUCENE_MAIN_VERSION, mergedName, -1, false, Codec, null);

                SegmentMerger merger = new SegmentMerger(mergeReaders, info, infoStream, trackingDir, Config_Renamed.TermIndexInterval, MergeState.CheckAbort.NONE, GlobalFieldNumberMap, context, Config_Renamed.CheckIntegrityAtMerge);

                if (!merger.ShouldMerge())
                {
                    return;
                }

                MergeState mergeState;
                bool success = false;
                try
                {
                    mergeState = merger.Merge(); // merge 'em
                    success = true;
                }
                finally
                {
                    if (!success)
                    {
                        lock (this)
                        {
                            Deleter.Refresh(info.Name);
                        }
                    }
                }

                SegmentCommitInfo infoPerCommit = new SegmentCommitInfo(info, 0, -1L, -1L);

                info.Files = new HashSet<string>(trackingDir.CreatedFiles);
                trackingDir.CreatedFiles.Clear();

                SetDiagnostics(info, SOURCE_ADDINDEXES_READERS);

                bool useCompoundFile;
                lock (this) // Guard segmentInfos
                {
                    if (StopMerges)
                    {
                        Deleter.DeleteNewFiles(infoPerCommit.Files());
                        return;
                    }
                    EnsureOpen();
                    useCompoundFile = mergePolicy.UseCompoundFile(segmentInfos, infoPerCommit);
                }

                // Now create the compound file if needed
                if (useCompoundFile)
                {
                    ICollection<string> filesToDelete = infoPerCommit.Files();
                    try
                    {
                        CreateCompoundFile(infoStream, directory, MergeState.CheckAbort.NONE, info, context);
                    }
                    finally
                    {
                        // delete new non cfs files directly: they were never
                        // registered with IFD
                        lock (this)
                        {
                            Deleter.DeleteNewFiles(filesToDelete);
                        }
                    }
                    info.UseCompoundFile = true;
                }

                // Have codec write SegmentInfo.  Must do this after
                // creating CFS so that 1) .si isn't slurped into CFS,
                // and 2) .si reflects useCompoundFile=true change
                // above:
                success = false;
                try
                {
                    Codec.SegmentInfoFormat().SegmentInfoWriter.Write(trackingDir, info, mergeState.FieldInfos, context);
                    success = true;
                }
                finally
                {
                    if (!success)
                    {
                        lock (this)
                        {
                            Deleter.Refresh(info.Name);
                        }
                    }
                }

                info.AddFiles(trackingDir.CreatedFiles);

                // Register the new segment
                lock (this)
                {
                    if (StopMerges)
                    {
                        Deleter.DeleteNewFiles(info.Files);
                        return;
                    }
                    EnsureOpen();
                    segmentInfos.Add(infoPerCommit);
                    Checkpoint();
                }
            }
            catch (System.OutOfMemoryException oom)
            {
                HandleOOM(oom, "addIndexes(IndexReader...)");
            }
        }
Example #20
0
 /// <summary>
 /// Create new <see cref="SegmentReader"/> sharing core from a previous
 /// <see cref="SegmentReader"/> and loading new live docs from a new
 /// deletes file. Used by <see cref="DirectoryReader.OpenIfChanged(DirectoryReader)"/>.
 /// </summary>
 internal SegmentReader(SegmentCommitInfo si, SegmentReader sr)
     : this(si, sr, si.Info.Codec.LiveDocsFormat.ReadLiveDocs(si.Info.Dir, si, IOContext.READ_ONCE), si.Info.DocCount - si.DelCount)
 {
 }
Example #21
0
        /// <summary>
        /// Obtain the number of deleted docs for a pooled reader.
        /// If the reader isn't being pooled, the segmentInfo's
        /// delCount is returned.
        /// </summary>
        public virtual int NumDeletedDocs(SegmentCommitInfo info)
        {
            EnsureOpen(false);
            int delCount = info.DelCount;

            ReadersAndUpdates rld = readerPool.Get(info, false);
            if (rld != null)
            {
                delCount += rld.PendingDeleteCount;
            }
            return delCount;
        }
        internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor)
        {
            fieldsReaderLocal = new AnonymousFieldsReaderLocal(this);
            termVectorsLocal  = new AnonymousTermVectorsLocal(this);

            if (termsIndexDivisor == 0)
            {
                throw new System.ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)");
            }

            Codec     codec = si.Info.Codec;
            Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory.

            bool success = false;

            try
            {
                if (si.Info.UseCompoundFile)
                {
                    cfsDir = cfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false);
                }
                else
                {
                    cfsReader = null;
                    cfsDir    = dir;
                }

                FieldInfos fieldInfos = owner.FieldInfos;

                this.termsIndexDivisor = termsIndexDivisor;
                PostingsFormat   format           = codec.PostingsFormat;
                SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor);
                // Ask codec for its Fields
                fields = format.FieldsProducer(segmentReadState);
                Debug.Assert(fields != null);
                // ask codec for its Norms:
                // TODO: since we don't write any norms file if there are no norms,
                // kinda jaky to assume the codec handles the case of no norms file at all gracefully?!

                if (fieldInfos.HasNorms)
                {
                    normsProducer = codec.NormsFormat.NormsProducer(segmentReadState);
                    Debug.Assert(normsProducer != null);
                }
                else
                {
                    normsProducer = null;
                }

                // LUCENENET TODO: EXCEPTIONS Not sure why this catch block is swallowing AccessViolationException,
                // because it didn't exist in Lucene. Is it really needed? AVE is for protected memory...could
                // this be needed because we are using unchecked??

#if !NETSTANDARD
                try
                {
#endif
                fieldsReaderOrig = si.Info.Codec.StoredFieldsFormat.FieldsReader(cfsDir, si.Info, fieldInfos, context);
#if !NETSTANDARD
            }
#pragma warning disable 168
            catch (System.AccessViolationException ave)
#pragma warning restore 168
            {
            }
#endif

                if (fieldInfos.HasVectors) // open term vector files only as needed
                {
                    termVectorsReaderOrig = si.Info.Codec.TermVectorsFormat.VectorsReader(cfsDir, si.Info, fieldInfos, context);
                }
                else
                {
                    termVectorsReaderOrig = null;
                }

                success = true;
            }
            finally
            {
                if (!success)
                {
                    DecRef();
                }
            }
        }
Example #23
0
        internal virtual void SealFlushedSegment(FlushedSegment flushedSegment)
        {
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(flushedSegment != null);
            }

            SegmentCommitInfo newSegment = flushedSegment.segmentInfo;

            IndexWriter.SetDiagnostics(newSegment.Info, IndexWriter.SOURCE_FLUSH);

            IOContext context = new IOContext(new FlushInfo(newSegment.Info.DocCount, newSegment.GetSizeInBytes()));

            bool success = false;

            try
            {
                if (indexWriterConfig.UseCompoundFile)
                {
                    filesToDelete.UnionWith(IndexWriter.CreateCompoundFile(infoStream, directory, CheckAbort.NONE, newSegment.Info, context));
                    newSegment.Info.UseCompoundFile = true;
                }

                // Have codec write SegmentInfo.  Must do this after
                // creating CFS so that 1) .si isn't slurped into CFS,
                // and 2) .si reflects useCompoundFile=true change
                // above:
                codec.SegmentInfoFormat.SegmentInfoWriter.Write(directory, newSegment.Info, flushedSegment.fieldInfos, context);

                // TODO: ideally we would freeze newSegment here!!
                // because any changes after writing the .si will be
                // lost...

                // Must write deleted docs after the CFS so we don't
                // slurp the del file into CFS:
                if (flushedSegment.liveDocs != null)
                {
                    int delCount = flushedSegment.delCount;
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(delCount > 0);
                    }
                    if (infoStream.IsEnabled("DWPT"))
                    {
                        infoStream.Message("DWPT", "flush: write " + delCount + " deletes gen=" + flushedSegment.segmentInfo.DelGen);
                    }

                    // TODO: we should prune the segment if it's 100%
                    // deleted... but merge will also catch it.

                    // TODO: in the NRT case it'd be better to hand
                    // this del vector over to the
                    // shortly-to-be-opened SegmentReader and let it
                    // carry the changes; there's no reason to use
                    // filesystem as intermediary here.

                    SegmentCommitInfo info  = flushedSegment.segmentInfo;
                    Codec             codec = info.Info.Codec;
                    codec.LiveDocsFormat.WriteLiveDocs(flushedSegment.liveDocs, directory, info, delCount, context);
                    newSegment.DelCount = delCount;
                    newSegment.AdvanceDelGen();
                }

                success = true;
            }
            finally
            {
                if (!success)
                {
                    if (infoStream.IsEnabled("DWPT"))
                    {
                        infoStream.Message("DWPT", "hit exception creating compound file for newly flushed segment " + newSegment.Info.Name);
                    }
                }
            }
        }
Example #24
0
 protected override long Size(SegmentCommitInfo info)
 {
     return(long.MaxValue);
 }
Example #25
0
        /// <summary>
        /// Read a particular segmentFileName.  Note that this may
        /// throw an IOException if a commit is in process.
        /// </summary>
        /// <param name="directory"> -- directory containing the segments file </param>
        /// <param name="segmentFileName"> -- segment file to load </param>
        /// <exception cref="CorruptIndexException"> if the index is corrupt </exception>
        /// <exception cref="IOException"> if there is a low-level IO error </exception>
        public void Read(Directory directory, string segmentFileName)
        {
            var success = false;

            // Clear any previous segments:
            this.Clear();

            _generation = GenerationFromSegmentsFileName(segmentFileName);

            _lastGeneration = _generation;

            var input = directory.OpenChecksumInput(segmentFileName, IOContext.READ);
            try
            {
                int format = input.ReadInt();
                int actualFormat;
                if (format == CodecUtil.CODEC_MAGIC)
                {
                    // 4.0+
                    actualFormat = CodecUtil.CheckHeaderNoMagic(input, "segments", VERSION_40, VERSION_48);
                    Version = input.ReadLong();
                    Counter = input.ReadInt();
                    int numSegments = input.ReadInt();
                    if (numSegments < 0)
                    {
                        throw new CorruptIndexException("invalid segment count: " + numSegments + " (resource: " + input + ")");
                    }
                    for (var seg = 0; seg < numSegments; seg++)
                    {
                        var segName = input.ReadString();
                        var codec = Codec.ForName(input.ReadString());
                        //System.out.println("SIS.read seg=" + seg + " codec=" + codec);
                        var info = codec.SegmentInfoFormat().SegmentInfoReader.Read(directory, segName, IOContext.READ);
                        info.Codec = codec;
                        long delGen = input.ReadLong();
                        int delCount = input.ReadInt();
                        if (delCount < 0 || delCount > info.DocCount)
                        {
                            throw new CorruptIndexException("invalid deletion count: " + delCount + " vs docCount=" + info.DocCount + " (resource: " + input + ")");
                        }
                        long fieldInfosGen = -1;
                        if (actualFormat >= VERSION_46)
                        {
                            fieldInfosGen = input.ReadLong();
                        }
                        var siPerCommit = new SegmentCommitInfo(info, delCount, delGen, fieldInfosGen);
                        if (actualFormat >= VERSION_46)
                        {
                            int numGensUpdatesFiles = input.ReadInt();
                            IDictionary<long, ISet<string>> genUpdatesFiles;
                            if (numGensUpdatesFiles == 0)
                            {
                                genUpdatesFiles = CollectionsHelper.EmptyMap<long, ISet<string>>();
                            }
                            else
                            {
                                genUpdatesFiles = new Dictionary<long, ISet<string>>(numGensUpdatesFiles);
                                for (int i = 0; i < numGensUpdatesFiles; i++)
                                {
                                    genUpdatesFiles[input.ReadLong()] = input.ReadStringSet();
                                }
                            }
                            siPerCommit.GenUpdatesFiles = genUpdatesFiles;
                        }
                        Add(siPerCommit);
                    }
                    _userData = input.ReadStringStringMap();
                }
                else
                {
                    actualFormat = -1;
                    Lucene3xSegmentInfoReader.ReadLegacyInfos(this, directory, input, format);
                    Codec codec = Codec.ForName("Lucene3x");
                    foreach (SegmentCommitInfo info in segments)
                    {
                        info.Info.Codec = codec;
                    }
                }

                if (actualFormat >= VERSION_48)
                {
                    CodecUtil.CheckFooter(input);
                }
                else
                {
                    long checksumNow = input.Checksum;
                    long checksumThen = input.ReadLong();
                    if (checksumNow != checksumThen)
                    {
                        throw new CorruptIndexException("checksum mismatch in segments file (resource: " + input + ")");
                    }
                    CodecUtil.CheckEOF(input);
                }

                success = true;
            }
            finally
            {
                if (!success)
                {
                    // Clear any segment infos we had loaded so we
                    // have a clean slate on retry:
                    this.Clear();
                    IOUtils.CloseWhileHandlingException(input);
                }
                else
                {
                    input.Dispose();
                }
            }
        }
Example #26
0
 public override bool UseCompoundFile(SegmentInfos segments, SegmentCommitInfo newSegment)
 {
     return(useCompoundFile);
 }
Example #27
0
 protected internal override long Size(SegmentCommitInfo info)
 {
     return long.MaxValue;
 }
Example #28
0
        public virtual object Clone()
        {
            SegmentCommitInfo other = new SegmentCommitInfo(Info, DelCount_Renamed, DelGen_Renamed, FieldInfosGen_Renamed);
            // Not clear that we need to carry over nextWriteDelGen
            // (i.e. do we ever clone after a failed write and
            // before the next successful write?), but just do it to
            // be safe:
            other.NextWriteDelGen = NextWriteDelGen;
            other.NextWriteFieldInfosGen = NextWriteFieldInfosGen;

            // deep clone
            foreach (KeyValuePair<long, ISet<string>> e in GenUpdatesFiles_Renamed)
            {
                other.GenUpdatesFiles_Renamed[e.Key] = new HashSet<string>(e.Value);
            }

            return other;
        }
Example #29
0
 /// <summary>
 /// Return the byte size of the provided {@link
 ///  SegmentCommitInfo}, pro-rated by percentage of
 ///  non-deleted documents is set.
 /// </summary>
 protected internal virtual long Size(SegmentCommitInfo info)
 {
     long byteSize = info.SizeInBytes();
     int delCount = Writer.Get().NumDeletedDocs(info);
     double delRatio = (info.Info.DocCount <= 0 ? 0.0f : ((float)delCount / (float)info.Info.DocCount));
     Debug.Assert(delRatio <= 1.0);
     return (info.Info.DocCount <= 0 ? byteSize : (long)(byteSize * (1.0 - delRatio)));
 }
Example #30
0
        private SegmentCommitInfo Merge(Directory dir, SegmentCommitInfo si1, SegmentCommitInfo si2, string merged, bool useCompoundFile)
        {
            IOContext context = NewIOContext(Random());
            SegmentReader r1 = new SegmentReader(si1, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, context);
            SegmentReader r2 = new SegmentReader(si2, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, context);

            Codec codec = Codec.Default;
            TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(si1.Info.Dir);
            SegmentInfo si = new SegmentInfo(si1.Info.Dir, Constants.LUCENE_MAIN_VERSION, merged, -1, false, codec, null);

            SegmentMerger merger = new SegmentMerger(Arrays.AsList<AtomicReader>(r1, r2), si, InfoStream.Default, trackingDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, MergeState.CheckAbort.NONE, new FieldInfos.FieldNumbers(), context, true);

            MergeState mergeState = merger.Merge();
            r1.Dispose();
            r2.Dispose();
            SegmentInfo info = new SegmentInfo(si1.Info.Dir, Constants.LUCENE_MAIN_VERSION, merged, si1.Info.DocCount + si2.Info.DocCount, false, codec, null);
            info.Files = new HashSet<string>(trackingDir.CreatedFiles);

            if (useCompoundFile)
            {
                ICollection<string> filesToDelete = IndexWriter.CreateCompoundFile(InfoStream.Default, dir, MergeState.CheckAbort.NONE, info, NewIOContext(Random()));
                info.UseCompoundFile = true;
                foreach (String fileToDelete in filesToDelete)
                {
                    si1.Info.Dir.DeleteFile(fileToDelete);
                }
            }

            return new SegmentCommitInfo(info, 0, -1L, -1L);
        }
        /// <summary>
        /// Flush all pending docs to a new segment </summary>
        internal virtual FlushedSegment Flush()
        {
            Debug.Assert(numDocsInRAM > 0);
            Debug.Assert(DeleteSlice.Empty, "all deletes must be applied in prepareFlush");
            SegmentInfo_Renamed.DocCount = numDocsInRAM;
            SegmentWriteState flushState = new SegmentWriteState(InfoStream, Directory, SegmentInfo_Renamed, FieldInfos.Finish(), IndexWriterConfig.TermIndexInterval, PendingUpdates, new IOContext(new FlushInfo(numDocsInRAM, BytesUsed())));
            double startMBUsed = BytesUsed() / 1024.0 / 1024.0;

            // Apply delete-by-docID now (delete-byDocID only
            // happens when an exception is hit processing that
            // doc, eg if analyzer has some problem w/ the text):
            if (PendingUpdates.DocIDs.Count > 0)
            {
                flushState.LiveDocs = Codec.LiveDocsFormat().NewLiveDocs(numDocsInRAM);
                foreach (int delDocID in PendingUpdates.DocIDs)
                {
                    flushState.LiveDocs.Clear(delDocID);
                }
                flushState.DelCountOnFlush = PendingUpdates.DocIDs.Count;
                PendingUpdates.BytesUsed.AddAndGet(-PendingUpdates.DocIDs.Count * BufferedUpdates.BYTES_PER_DEL_DOCID);
                PendingUpdates.DocIDs.Clear();
            }

            if (Aborting)
            {
                if (InfoStream.IsEnabled("DWPT"))
                {
                    InfoStream.Message("DWPT", "flush: skip because aborting is set");
                }
                return null;
            }

            if (InfoStream.IsEnabled("DWPT"))
            {
                InfoStream.Message("DWPT", "flush postings as segment " + flushState.SegmentInfo.Name + " numDocs=" + numDocsInRAM);
            }

            bool success = false;

            try
            {
                Consumer.Flush(flushState);
                PendingUpdates.Terms.Clear();
                SegmentInfo_Renamed.Files = new HashSet<string>(Directory.CreatedFiles);

                SegmentCommitInfo segmentInfoPerCommit = new SegmentCommitInfo(SegmentInfo_Renamed, 0, -1L, -1L);
                if (InfoStream.IsEnabled("DWPT"))
                {
                    InfoStream.Message("DWPT", "new segment has " + (flushState.LiveDocs == null ? 0 : (flushState.SegmentInfo.DocCount - flushState.DelCountOnFlush)) + " deleted docs");
                    InfoStream.Message("DWPT", "new segment has " + (flushState.FieldInfos.HasVectors() ? "vectors" : "no vectors") + "; " + (flushState.FieldInfos.HasNorms() ? "norms" : "no norms") + "; " + (flushState.FieldInfos.HasDocValues() ? "docValues" : "no docValues") + "; " + (flushState.FieldInfos.HasProx() ? "prox" : "no prox") + "; " + (flushState.FieldInfos.HasFreq() ? "freqs" : "no freqs"));
                    InfoStream.Message("DWPT", "flushedFiles=" + segmentInfoPerCommit.Files());
                    InfoStream.Message("DWPT", "flushed codec=" + Codec);
                }

                BufferedUpdates segmentDeletes;
                if (PendingUpdates.Queries.Count == 0 && PendingUpdates.NumericUpdates.Count == 0 && PendingUpdates.BinaryUpdates.Count == 0)
                {
                    PendingUpdates.Clear();
                    segmentDeletes = null;
                }
                else
                {
                    segmentDeletes = PendingUpdates;
                }

                if (InfoStream.IsEnabled("DWPT"))
                {
                    double newSegmentSize = segmentInfoPerCommit.SizeInBytes() / 1024.0 / 1024.0;
                    InfoStream.Message("DWPT", "flushed: segment=" + SegmentInfo_Renamed.Name + " ramUsed=" + startMBUsed.ToString(Nf) + " MB" + " newFlushedSize(includes docstores)=" + newSegmentSize.ToString(Nf) + " MB" + " docs/MB=" + (flushState.SegmentInfo.DocCount / newSegmentSize).ToString(Nf));
                }

                Debug.Assert(SegmentInfo_Renamed != null);

                FlushedSegment fs = new FlushedSegment(segmentInfoPerCommit, flushState.FieldInfos, segmentDeletes, flushState.LiveDocs, flushState.DelCountOnFlush);
                SealFlushedSegment(fs);
                success = true;

                return fs;
            }
            finally
            {
                if (!success)
                {
                    Abort(FilesToDelete);
                }
            }
        }
Example #32
0
        private void PrintSegment(StreamWriter @out, SegmentCommitInfo si)
        {
            SegmentReader reader = new SegmentReader(si, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random()));

            for (int i = 0; i < reader.NumDocs; i++)
            {
                @out.WriteLine(reader.Document(i));
            }

            Fields fields = reader.Fields;
            foreach (string field in fields)
            {
                Terms terms = fields.Terms(field);
                Assert.IsNotNull(terms);
                TermsEnum tis = terms.Iterator(null);
                while (tis.Next() != null)
                {
                    @out.Write("  term=" + field + ":" + tis.Term());
                    @out.WriteLine("    DF=" + tis.DocFreq());

                    DocsAndPositionsEnum positions = tis.DocsAndPositions(reader.LiveDocs, null);

                    while (positions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        @out.Write(" doc=" + positions.DocID());
                        @out.Write(" TF=" + positions.Freq());
                        @out.Write(" pos=");
                        @out.Write(positions.NextPosition());
                        for (int j = 1; j < positions.Freq(); j++)
                        {
                            @out.Write("," + positions.NextPosition());
                        }
                        @out.WriteLine("");
                    }
                }
            }
            reader.Dispose();
        }
 public override bool UseCompoundFile(SegmentInfos infos, SegmentCommitInfo mergedInfo)
 {
     // 80% of the time we create CFS:
     return Random.Next(5) != 1;
 }
Example #34
0
 // used only by asserts
 public virtual bool InfoIsLive(SegmentCommitInfo info)
 {
     lock (this)
     {
         int idx = OuterInstance.segmentInfos.IndexOf(info);
         Debug.Assert(idx != -1, "info=" + info + " isn't live");
         Debug.Assert(OuterInstance.segmentInfos.Info(idx) == info, "info=" + info + " doesn't match live info in segmentInfos");
         return true;
     }
 }
Example #35
0
        /// <summary>
        /// Atomically adds the segment private delete packet and publishes the flushed
        /// segments SegmentInfo to the index writer.
        /// </summary>
        internal virtual void PublishFlushedSegment(SegmentCommitInfo newSegment, FrozenBufferedUpdates packet, FrozenBufferedUpdates globalPacket)
        {
            try
            {
                lock (this)
                {
                    // Lock order IW -> BDS
                    lock (BufferedUpdatesStream)
                    {
                        if (infoStream.IsEnabled("IW"))
                        {
                            infoStream.Message("IW", "publishFlushedSegment");
                        }

                        if (globalPacket != null && globalPacket.Any())
                        {
                            BufferedUpdatesStream.Push(globalPacket);
                        }
                        // Publishing the segment must be synched on IW -> BDS to make the sure
                        // that no merge prunes away the seg. private delete packet
                        long nextGen;
                        if (packet != null && packet.Any())
                        {
                            nextGen = BufferedUpdatesStream.Push(packet);
                        }
                        else
                        {
                            // Since we don't have a delete packet to apply we can get a new
                            // generation right away
                            nextGen = BufferedUpdatesStream.NextGen;
                        }
                        if (infoStream.IsEnabled("IW"))
                        {
                            infoStream.Message("IW", "publish sets newSegment delGen=" + nextGen + " seg=" + SegString(newSegment));
                        }
                        newSegment.BufferedDeletesGen = nextGen;
                        segmentInfos.Add(newSegment);
                        Checkpoint();
                    }
                }
            }
            finally
            {
                flushCount.IncrementAndGet();
                DoAfterFlush();
            }
        }
Example #36
0
 /// <summary>
 /// Returns a string description of the specified
 ///  segment, for debugging.
 ///
 /// @lucene.internal
 /// </summary>
 public virtual string SegString(SegmentCommitInfo info)
 {
     lock (this)
     {
         return info.ToString(info.Info.Dir, NumDeletedDocs(info) - info.DelCount);
     }
 }
Example #37
0
        /// <summary>
        /// Copies the segment files as-is into the IndexWriter's directory. </summary>
        private SegmentCommitInfo CopySegmentAsIs(SegmentCommitInfo info, string segName, IDictionary<string, string> dsNames, ISet<string> dsFilesCopied, IOContext context, ISet<string> copiedFiles)
        {
            // Determine if the doc store of this segment needs to be copied. It's
            // only relevant for segments that share doc store with others,
            // because the DS might have been copied already, in which case we
            // just want to update the DS name of this SegmentInfo.
            string dsName = Lucene3xSegmentInfoFormat.GetDocStoreSegment(info.Info);
            Debug.Assert(dsName != null);
            string newDsName;
            if (dsNames.ContainsKey(dsName))
            {
                newDsName = dsNames[dsName];
            }
            else
            {
                dsNames[dsName] = segName;
                newDsName = segName;
            }

            // note: we don't really need this fis (its copied), but we load it up
            // so we don't pass a null value to the si writer
            FieldInfos fis = SegmentReader.ReadFieldInfos(info);

            ISet<string> docStoreFiles3xOnly = Lucene3xCodec.GetDocStoreFiles(info.Info);

            IDictionary<string, string> attributes;
            // copy the attributes map, we might modify it below.
            // also we need to ensure its read-write, since we will invoke the SIwriter (which might want to set something).
            if (info.Info.Attributes() == null)
            {
                attributes = new Dictionary<string, string>();
            }
            else
            {
                attributes = new Dictionary<string, string>(info.Info.Attributes());
            }
            if (docStoreFiles3xOnly != null)
            {
                // only violate the codec this way if it's preflex &
                // shares doc stores
                // change docStoreSegment to newDsName
                attributes[Lucene3xSegmentInfoFormat.DS_NAME_KEY] = newDsName;
            }

            //System.out.println("copy seg=" + info.info.name + " version=" + info.info.getVersion());
            // Same SI as before but we change directory, name and docStoreSegment:
            SegmentInfo newInfo = new SegmentInfo(directory, info.Info.Version, segName, info.Info.DocCount, info.Info.UseCompoundFile, info.Info.Codec, info.Info.Diagnostics, attributes);
            SegmentCommitInfo newInfoPerCommit = new SegmentCommitInfo(newInfo, info.DelCount, info.DelGen, info.FieldInfosGen);

            HashSet<string> segFiles = new HashSet<string>();

            // Build up new segment's file names.  Must do this
            // before writing SegmentInfo:
            foreach (string file in info.Files())
            {
                string newFileName;
                if (docStoreFiles3xOnly != null && docStoreFiles3xOnly.Contains(file))
                {
                    newFileName = newDsName + Lucene.Net.Index.IndexFileNames.StripSegmentName(file);
                }
                else
                {
                    newFileName = segName + Lucene.Net.Index.IndexFileNames.StripSegmentName(file);
                }
                segFiles.Add(newFileName);
            }
            newInfo.Files = segFiles;

            // We must rewrite the SI file because it references
            // segment name (its own name, if its 3.x, and doc
            // store segment name):
            TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory);
            Codec currentCodec = newInfo.Codec;
            try
            {
                currentCodec.SegmentInfoFormat().SegmentInfoWriter.Write(trackingDir, newInfo, fis, context);
            }
            catch (System.NotSupportedException uoe)
            {
                if (currentCodec is Lucene3xCodec)
                {
                    // OK: 3x codec cannot write a new SI file;
                    // SegmentInfos will write this on commit
                }
                else
                {
                    throw uoe;
                }
            }

            ICollection<string> siFiles = trackingDir.CreatedFiles;

            bool success = false;
            try
            {
                // Copy the segment's files
                foreach (string file in info.Files())
                {
                    string newFileName;
                    if (docStoreFiles3xOnly != null && docStoreFiles3xOnly.Contains(file))
                    {
                        newFileName = newDsName + Lucene.Net.Index.IndexFileNames.StripSegmentName(file);
                        if (dsFilesCopied.Contains(newFileName))
                        {
                            continue;
                        }
                        dsFilesCopied.Add(newFileName);
                    }
                    else
                    {
                        newFileName = segName + Lucene.Net.Index.IndexFileNames.StripSegmentName(file);
                    }

                    if (siFiles.Contains(newFileName))
                    {
                        // We already rewrote this above
                        continue;
                    }

                    Debug.Assert(!SlowFileExists(directory, newFileName), "file \"" + newFileName + "\" already exists; siFiles=" + siFiles);
                    Debug.Assert(!copiedFiles.Contains(file), "file \"" + file + "\" is being copied more than once");
                    copiedFiles.Add(file);
                    info.Info.Dir.Copy(directory, file, newFileName, context);
                }
                success = true;
            }
            finally
            {
                if (!success)
                {
                    foreach (string file in newInfo.Files)
                    {
                        try
                        {
                            directory.DeleteFile(file);
                        }
                        catch (Exception)
                        {
                        }
                    }
                }
            }

            return newInfoPerCommit;
        }
Example #38
0
 /// <summary>
 /// Returns <c>true</c> if the given segment should be upgraded. The default implementation
 /// will return <c>!Constants.LUCENE_MAIN_VERSION.Equals(si.Info.Version, StringComparison.Ordinal)</c>,
 /// so all segments created with a different version number than this Lucene version will
 /// get upgraded.
 /// </summary>
 protected virtual bool ShouldUpgradeSegment(SegmentCommitInfo si)
 {
     return(!Constants.LUCENE_MAIN_VERSION.Equals(si.Info.Version, StringComparison.Ordinal));
 }
Example #39
0
 public virtual void Drop(SegmentCommitInfo info)
 {
     lock (this)
     {
         ReadersAndUpdates rld;
         ReaderMap.TryGetValue(info, out rld);
         if (rld != null)
         {
             Debug.Assert(info == rld.Info);
             //        System.out.println("[" + Thread.currentThread().getName() + "] ReaderPool.drop: " + info);
             ReaderMap.Remove(info);
             rld.DropReaders();
         }
     }
 }
 internal virtual bool SegThere(SegmentCommitInfo info, SegmentInfos infos)
 {
     foreach (SegmentCommitInfo si in infos.Segments)
     {
         if (si.Info.Name.Equals(info.Info.Name))
         {
             return true;
         }
     }
     return false;
 }
Example #41
0
            /// <summary>
            /// Obtain a ReadersAndLiveDocs instance from the
            /// readerPool.  If create is true, you must later call
            /// <seealso cref="#release(ReadersAndUpdates)"/>.
            /// </summary>
            public virtual ReadersAndUpdates Get(SegmentCommitInfo info, bool create)
            {
                lock (this)
                {
                    Debug.Assert(info.Info.Dir == OuterInstance.directory, "info.dir=" + info.Info.Dir + " vs " + OuterInstance.directory);

                    ReadersAndUpdates rld;
                    ReaderMap.TryGetValue(info, out rld);
                    if (rld == null)
                    {
                        if (!create)
                        {
                            return null;
                        }
                        rld = new ReadersAndUpdates(OuterInstance, info);
                        // Steal initial reference:
                        ReaderMap[info] = rld;
                    }
                    else
                    {
                        Debug.Assert(rld.Info == info, "Infos are not equal");//, "rld.info=" + rld.Info + " info=" + info + " isLive?=" + InfoIsLive(rld.Info) + " vs " + InfoIsLive(info));
                    }

                    if (create)
                    {
                        // Return ref to caller:
                        rld.IncRef();
                    }

                    Debug.Assert(NoDups());

                    return rld;
                }
            }
Example #42
0
 /// <summary>
 /// Appends the provided <seealso cref="SegmentCommitInfo"/>. </summary>
 public void Add(SegmentCommitInfo si)
 {
     segments.Add(si);
 }
Example #43
0
 /// <summary>
 /// Remove the provided <seealso cref="SegmentCommitInfo"/>.
 ///
 /// <p><b>WARNING</b>: O(N) cost
 /// </summary>
 public void Remove(SegmentCommitInfo si)
 {
     segments.Remove(si);
 }
Example #44
0
 /// <summary>
 /// Return true if the provided {@link
 ///  SegmentCommitInfo} is contained.
 ///
 /// <p><b>WARNING</b>: O(N) cost
 /// </summary>
 internal bool Contains(SegmentCommitInfo si)
 {
     return segments.Contains(si);
 }
 /// <summary>
 /// Returns if the given segment should be upgraded. The default implementation
 /// will return {@code !Constants.LUCENE_MAIN_VERSION.equals(si.getVersion())},
 /// so all segments created with a different version number than this Lucene version will
 /// get upgraded.
 /// </summary>
 protected internal virtual bool ShouldUpgradeSegment(SegmentCommitInfo si)
 {
     return !Constants.LUCENE_MAIN_VERSION.Equals(si.Info.Version);
 }
Example #46
0
        internal virtual FlushedSegment Flush()
        {
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(numDocsInRAM > 0);
                Debugging.Assert(deleteSlice.IsEmpty, "all deletes must be applied in prepareFlush");
            }
            segmentInfo.DocCount = numDocsInRAM;
            SegmentWriteState flushState  = new SegmentWriteState(infoStream, directory, segmentInfo, fieldInfos.Finish(), indexWriterConfig.TermIndexInterval, pendingUpdates, new IOContext(new FlushInfo(numDocsInRAM, BytesUsed)));
            double            startMBUsed = BytesUsed / 1024.0 / 1024.0;

            // Apply delete-by-docID now (delete-byDocID only
            // happens when an exception is hit processing that
            // doc, eg if analyzer has some problem w/ the text):
            if (pendingUpdates.docIDs.Count > 0)
            {
                flushState.LiveDocs = codec.LiveDocsFormat.NewLiveDocs(numDocsInRAM);
                foreach (int delDocID in pendingUpdates.docIDs)
                {
                    flushState.LiveDocs.Clear(delDocID);
                }
                flushState.DelCountOnFlush = pendingUpdates.docIDs.Count;
                pendingUpdates.bytesUsed.AddAndGet(-pendingUpdates.docIDs.Count * BufferedUpdates.BYTES_PER_DEL_DOCID);
                pendingUpdates.docIDs.Clear();
            }

            if (aborting)
            {
                if (infoStream.IsEnabled("DWPT"))
                {
                    infoStream.Message("DWPT", "flush: skip because aborting is set");
                }
                return(null);
            }

            if (infoStream.IsEnabled("DWPT"))
            {
                infoStream.Message("DWPT", "flush postings as segment " + flushState.SegmentInfo.Name + " numDocs=" + numDocsInRAM);
            }

            bool success = false;

            try
            {
                consumer.Flush(flushState);
                pendingUpdates.terms.Clear();
                segmentInfo.SetFiles(new JCG.HashSet <string>(directory.CreatedFiles));

                SegmentCommitInfo segmentInfoPerCommit = new SegmentCommitInfo(segmentInfo, 0, -1L, -1L);
                if (infoStream.IsEnabled("DWPT"))
                {
                    infoStream.Message("DWPT", "new segment has " + (flushState.LiveDocs == null ? 0 : (flushState.SegmentInfo.DocCount - flushState.DelCountOnFlush)) + " deleted docs");
                    infoStream.Message("DWPT", "new segment has " + (flushState.FieldInfos.HasVectors ? "vectors" : "no vectors") + "; " + (flushState.FieldInfos.HasNorms ? "norms" : "no norms") + "; " + (flushState.FieldInfos.HasDocValues ? "docValues" : "no docValues") + "; " + (flushState.FieldInfos.HasProx ? "prox" : "no prox") + "; " + (flushState.FieldInfos.HasFreq ? "freqs" : "no freqs"));
                    infoStream.Message("DWPT", "flushedFiles=" + string.Format(J2N.Text.StringFormatter.InvariantCulture, "{0}", segmentInfoPerCommit.GetFiles()));
                    infoStream.Message("DWPT", "flushed codec=" + codec);
                }

                BufferedUpdates segmentDeletes;
                if (pendingUpdates.queries.Count == 0 && pendingUpdates.numericUpdates.Count == 0 && pendingUpdates.binaryUpdates.Count == 0)
                {
                    pendingUpdates.Clear();
                    segmentDeletes = null;
                }
                else
                {
                    segmentDeletes = pendingUpdates;
                }

                if (infoStream.IsEnabled("DWPT"))
                {
                    double newSegmentSize = segmentInfoPerCommit.GetSizeInBytes() / 1024.0 / 1024.0;
                    infoStream.Message("DWPT", "flushed: segment=" + segmentInfo.Name + " ramUsed=" + startMBUsed.ToString(nf) + " MB" + " newFlushedSize(includes docstores)=" + newSegmentSize.ToString(nf) + " MB" + " docs/MB=" + (flushState.SegmentInfo.DocCount / newSegmentSize).ToString(nf));
                }

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(segmentInfo != null);
                }

                FlushedSegment fs = new FlushedSegment(segmentInfoPerCommit, flushState.FieldInfos, segmentDeletes, flushState.LiveDocs, flushState.DelCountOnFlush);
                SealFlushedSegment(fs);
                success = true;

                return(fs);
            }
            finally
            {
                if (!success)
                {
                    Abort(filesToDelete);
                }
            }
        }