public override void SetUp() { base.SetUp(); Dir = NewDirectory(); DocHelper.SetupDoc(TestDoc); Info = DocHelper.WriteDoc(Random(), Dir, TestDoc); }
protected internal override long Size(SegmentCommitInfo info) { int hourOfDay = Calendar.Hour; if (hourOfDay < 6 || hourOfDay > 20 || Random.Next(23) == 5) // its 5 o'clock somewhere { Drink.Drink_e[] values = Enum.GetValues(typeof(Drink.Drink_e)).Cast<Drink.Drink_e>().ToArray(); // pick a random drink during the day Drink.Drink_e drink = values[Random.Next(values.Length - 1)]; return (long)drink * info.SizeInBytes(); } return info.SizeInBytes(); }
internal FlushedSegment(SegmentCommitInfo segmentInfo, FieldInfos fieldInfos, BufferedUpdates segmentUpdates, MutableBits liveDocs, int delCount) { this.SegmentInfo = segmentInfo; this.FieldInfos = fieldInfos; this.SegmentUpdates = segmentUpdates != null && segmentUpdates.Any() ? new FrozenBufferedUpdates(segmentUpdates, true) : null; this.LiveDocs = liveDocs; this.DelCount = delCount; }
/// <summary> /// Returns true if a new segment (regardless of its origin) should use the /// compound file format. The default implementation returns <code>true</code> /// iff the size of the given mergedInfo is less or equal to /// <seealso cref="#getMaxCFSSegmentSizeMB()"/> and the size is less or equal to the /// TotalIndexSize * <seealso cref="#getNoCFSRatio()"/> otherwise <code>false</code>. /// </summary> public virtual bool UseCompoundFile(SegmentInfos infos, SegmentCommitInfo mergedInfo) { if (NoCFSRatio == 0.0) { return false; } long mergedInfoSize = Size(mergedInfo); if (mergedInfoSize > MaxCFSSegmentSize) { return false; } if (NoCFSRatio >= 1.0) { return true; } long totalSize = 0; foreach (SegmentCommitInfo info in infos.Segments) { totalSize += Size(info); } return mergedInfoSize <= NoCFSRatio * totalSize; }
/// <summary> /// Returns true if this single info is already fully merged (has no /// pending deletes, is in the same dir as the /// writer, and matches the current compound file setting /// </summary> protected internal bool IsMerged(SegmentInfos infos, SegmentCommitInfo info) { IndexWriter w = Writer.Get(); Debug.Assert(w != null); bool hasDeletions = w.NumDeletedDocs(info) > 0; return !hasDeletions && !info.Info.HasSeparateNorms() && info.Info.Dir == w.Directory && UseCompoundFile(infos, info) == info.Info.UseCompoundFile; }
/// <summary> /// Returns index of the provided {@link /// SegmentCommitInfo}. /// /// <p><b>WARNING</b>: O(N) cost /// </summary> internal int IndexOf(SegmentCommitInfo si) { return segments.IndexOf(si); }
public override bool UseCompoundFile(SegmentInfos segments, SegmentCommitInfo newSegment) { return useCompoundFile; }
public virtual ApplyDeletesResult ApplyDeletesAndUpdates(IndexWriter.ReaderPool readerPool, IList <SegmentCommitInfo> infos) { lock (this) { long t0 = Environment.TickCount; if (infos.Count == 0) { return(new ApplyDeletesResult(false, nextGen++, null)); } Debug.Assert(CheckDeleteStats()); if (!Any()) { if (infoStream.IsEnabled("BD")) { infoStream.Message("BD", "applyDeletes: no deletes; skipping"); } return(new ApplyDeletesResult(false, nextGen++, null)); } if (infoStream.IsEnabled("BD")) { infoStream.Message("BD", "applyDeletes: infos=" + Arrays.ToString(infos) + " packetCount=" + updates.Count); } long gen = nextGen++; List <SegmentCommitInfo> infos2 = new List <SegmentCommitInfo>(); infos2.AddRange(infos); infos2.Sort(sortSegInfoByDelGen); CoalescedUpdates coalescedUpdates = null; bool anyNewDeletes = false; int infosIDX = infos2.Count - 1; int delIDX = updates.Count - 1; IList <SegmentCommitInfo> allDeleted = null; while (infosIDX >= 0) { //System.out.println("BD: cycle delIDX=" + delIDX + " infoIDX=" + infosIDX); FrozenBufferedUpdates packet = delIDX >= 0 ? updates[delIDX] : null; SegmentCommitInfo info = infos2[infosIDX]; long segGen = info.BufferedDeletesGen; if (packet != null && segGen < packet.DelGen) { // System.out.println(" coalesce"); if (coalescedUpdates == null) { coalescedUpdates = new CoalescedUpdates(); } if (!packet.isSegmentPrivate) { /* * Only coalesce if we are NOT on a segment private del packet: the segment private del packet * must only applied to segments with the same delGen. Yet, if a segment is already deleted * from the SI since it had no more documents remaining after some del packets younger than * its segPrivate packet (higher delGen) have been applied, the segPrivate packet has not been * removed. */ coalescedUpdates.Update(packet); } delIDX--; } else if (packet != null && segGen == packet.DelGen) { Debug.Assert(packet.isSegmentPrivate, "Packet and Segments deletegen can only match on a segment private del packet gen=" + segGen); //System.out.println(" eq"); // Lock order: IW -> BD -> RP Debug.Assert(readerPool.InfoIsLive(info)); ReadersAndUpdates rld = readerPool.Get(info, true); SegmentReader reader = rld.GetReader(IOContext.READ); int delCount = 0; bool segAllDeletes; try { DocValuesFieldUpdates.Container dvUpdates = new DocValuesFieldUpdates.Container(); if (coalescedUpdates != null) { //System.out.println(" del coalesced"); delCount += (int)ApplyTermDeletes(coalescedUpdates.TermsIterable(), rld, reader); delCount += (int)ApplyQueryDeletes(coalescedUpdates.QueriesIterable(), rld, reader); ApplyDocValuesUpdates(coalescedUpdates.numericDVUpdates, rld, reader, dvUpdates); ApplyDocValuesUpdates(coalescedUpdates.binaryDVUpdates, rld, reader, dvUpdates); } //System.out.println(" del exact"); // Don't delete by Term here; DocumentsWriterPerThread // already did that on flush: delCount += (int)ApplyQueryDeletes(packet.GetQueriesEnumerable(), rld, reader); ApplyDocValuesUpdates(Arrays.AsList(packet.numericDVUpdates), rld, reader, dvUpdates); ApplyDocValuesUpdates(Arrays.AsList(packet.binaryDVUpdates), rld, reader, dvUpdates); if (dvUpdates.Any()) { rld.WriteFieldUpdates(info.Info.Dir, dvUpdates); } int fullDelCount = rld.Info.DelCount + rld.PendingDeleteCount; Debug.Assert(fullDelCount <= rld.Info.Info.DocCount); segAllDeletes = fullDelCount == rld.Info.Info.DocCount; } finally { rld.Release(reader); readerPool.Release(rld); } anyNewDeletes |= delCount > 0; if (segAllDeletes) { if (allDeleted == null) { allDeleted = new List <SegmentCommitInfo>(); } allDeleted.Add(info); } if (infoStream.IsEnabled("BD")) { infoStream.Message("BD", "seg=" + info + " segGen=" + segGen + " segDeletes=[" + packet + "]; coalesced deletes=[" + (coalescedUpdates == null ? "null" : coalescedUpdates.ToString()) + "] newDelCount=" + delCount + (segAllDeletes ? " 100% deleted" : "")); } if (coalescedUpdates == null) { coalescedUpdates = new CoalescedUpdates(); } /* * Since we are on a segment private del packet we must not * update the coalescedDeletes here! We can simply advance to the * next packet and seginfo. */ delIDX--; infosIDX--; info.SetBufferedDeletesGen(gen); } else { //System.out.println(" gt"); if (coalescedUpdates != null) { // Lock order: IW -> BD -> RP Debug.Assert(readerPool.InfoIsLive(info)); ReadersAndUpdates rld = readerPool.Get(info, true); SegmentReader reader = rld.GetReader(IOContext.READ); int delCount = 0; bool segAllDeletes; try { delCount += (int)ApplyTermDeletes(coalescedUpdates.TermsIterable(), rld, reader); delCount += (int)ApplyQueryDeletes(coalescedUpdates.QueriesIterable(), rld, reader); DocValuesFieldUpdates.Container dvUpdates = new DocValuesFieldUpdates.Container(); ApplyDocValuesUpdates(coalescedUpdates.numericDVUpdates, rld, reader, dvUpdates); ApplyDocValuesUpdates(coalescedUpdates.binaryDVUpdates, rld, reader, dvUpdates); if (dvUpdates.Any()) { rld.WriteFieldUpdates(info.Info.Dir, dvUpdates); } int fullDelCount = rld.Info.DelCount + rld.PendingDeleteCount; Debug.Assert(fullDelCount <= rld.Info.Info.DocCount); segAllDeletes = fullDelCount == rld.Info.Info.DocCount; } finally { rld.Release(reader); readerPool.Release(rld); } anyNewDeletes |= delCount > 0; if (segAllDeletes) { if (allDeleted == null) { allDeleted = new List <SegmentCommitInfo>(); } allDeleted.Add(info); } if (infoStream.IsEnabled("BD")) { infoStream.Message("BD", "seg=" + info + " segGen=" + segGen + " coalesced deletes=[" + coalescedUpdates + "] newDelCount=" + delCount + (segAllDeletes ? " 100% deleted" : "")); } } info.SetBufferedDeletesGen(gen); infosIDX--; } } Debug.Assert(CheckDeleteStats()); if (infoStream.IsEnabled("BD")) { infoStream.Message("BD", "applyDeletes took " + (Environment.TickCount - t0) + " msec"); } // assert infos != segmentInfos || !any() : "infos=" + infos + " segmentInfos=" + segmentInfos + " any=" + any; return(new ApplyDeletesResult(anyNewDeletes, gen, allDeleted)); } }
public override bool UseCompoundFile(SegmentInfos segments, SegmentCommitInfo newSegment) { return @base.UseCompoundFile(segments, newSegment); }
public ReadersAndUpdates(IndexWriter writer, SegmentCommitInfo info) { this.Info = info; this.writer = writer; liveDocsShared = true; }
public override void SetUp() { base.SetUp(); /* * for (int i = 0; i < testFields.Length; i++) { * fieldInfos.Add(testFields[i], true, true, testFieldsStorePos[i], testFieldsStoreOff[i]); * } */ Array.Sort(TestTerms); int tokenUpto = 0; for (int i = 0; i < TestTerms.Length; i++) { Positions[i] = new int[TERM_FREQ]; // first position must be 0 for (int j = 0; j < TERM_FREQ; j++) { // positions are always sorted in increasing order Positions[i][j] = (int)(j * 10 + new Random(1).NextDouble() * 10); TestToken token = Tokens[tokenUpto++] = new TestToken(this); token.Text = TestTerms[i]; token.Pos = Positions[i][j]; token.StartOffset = j * 10; token.EndOffset = j * 10 + TestTerms[i].Length; } } Array.Sort(Tokens); Dir = NewDirectory(); IndexWriter writer = new IndexWriter(Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MyAnalyzer(this)).SetMaxBufferedDocs(-1).SetMergePolicy(NewLogMergePolicy(false, 10)).SetUseCompoundFile(false)); Document doc = new Document(); for (int i = 0; i < TestFields.Length; i++) { FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); if (TestFieldsStorePos[i] && TestFieldsStoreOff[i]) { customType.StoreTermVectors = true; customType.StoreTermVectorPositions = true; customType.StoreTermVectorOffsets = true; } else if (TestFieldsStorePos[i] && !TestFieldsStoreOff[i]) { customType.StoreTermVectors = true; customType.StoreTermVectorPositions = true; } else if (!TestFieldsStorePos[i] && TestFieldsStoreOff[i]) { customType.StoreTermVectors = true; customType.StoreTermVectorOffsets = true; } else { customType.StoreTermVectors = true; } doc.Add(new Field(TestFields[i], "", customType)); } //Create 5 documents for testing, they all have the same //terms for (int j = 0; j < 5; j++) { writer.AddDocument(doc); } writer.Commit(); Seg = writer.NewestSegment(); writer.Dispose(); FieldInfos = SegmentReader.ReadFieldInfos(Seg); }
public virtual void TestIndexAndMerge() { MemoryStream sw = new MemoryStream(); StreamWriter @out = new StreamWriter(sw); Directory directory = NewFSDirectory(IndexDir, null); MockDirectoryWrapper wrapper = directory as MockDirectoryWrapper; if (wrapper != null) { // We create unreferenced files (we don't even write // a segments file): wrapper.AssertNoUnrefencedFilesOnClose = false; } IndexWriter writer = new IndexWriter(directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode.CREATE).SetMaxBufferedDocs(-1).SetMergePolicy(NewLogMergePolicy(10))); SegmentCommitInfo si1 = IndexDoc(writer, "test.txt"); PrintSegment(@out, si1); SegmentCommitInfo si2 = IndexDoc(writer, "test2.txt"); PrintSegment(@out, si2); writer.Dispose(); SegmentCommitInfo siMerge = Merge(directory, si1, si2, "_merge", false); PrintSegment(@out, siMerge); SegmentCommitInfo siMerge2 = Merge(directory, si1, si2, "_merge2", false); PrintSegment(@out, siMerge2); SegmentCommitInfo siMerge3 = Merge(directory, siMerge, siMerge2, "_merge3", false); PrintSegment(@out, siMerge3); directory.Dispose(); @out.Dispose(); sw.Dispose(); string multiFileOutput = sw.ToString(); //System.out.println(multiFileOutput); sw = new MemoryStream(); @out = new StreamWriter(sw); directory = NewFSDirectory(IndexDir, null); wrapper = directory as MockDirectoryWrapper; if (wrapper != null) { // We create unreferenced files (we don't even write // a segments file): wrapper.AssertNoUnrefencedFilesOnClose = false; } writer = new IndexWriter(directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode.CREATE).SetMaxBufferedDocs(-1).SetMergePolicy(NewLogMergePolicy(10))); si1 = IndexDoc(writer, "test.txt"); PrintSegment(@out, si1); si2 = IndexDoc(writer, "test2.txt"); PrintSegment(@out, si2); writer.Dispose(); siMerge = Merge(directory, si1, si2, "_merge", true); PrintSegment(@out, siMerge); siMerge2 = Merge(directory, si1, si2, "_merge2", true); PrintSegment(@out, siMerge2); siMerge3 = Merge(directory, siMerge, siMerge2, "_merge3", true); PrintSegment(@out, siMerge3); directory.Dispose(); @out.Dispose(); sw.Dispose(); string singleFileOutput = sw.ToString(); Assert.AreEqual(multiFileOutput, singleFileOutput); }
internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor) { fieldsReaderLocal = new DisposableThreadLocal <StoredFieldsReader>(() => (StoredFieldsReader)fieldsReaderOrig.Clone()); termVectorsLocal = new DisposableThreadLocal <TermVectorsReader>(() => (termVectorsReaderOrig == null) ? null : (TermVectorsReader)termVectorsReaderOrig.Clone()); if (termsIndexDivisor == 0) { throw new ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)"); } Codec codec = si.Info.Codec; Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory. bool success = false; try { if (si.Info.UseCompoundFile) { cfsDir = cfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false); } else { cfsReader = null; cfsDir = dir; } FieldInfos fieldInfos = owner.FieldInfos; this.termsIndexDivisor = termsIndexDivisor; PostingsFormat format = codec.PostingsFormat; SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor); // Ask codec for its Fields fields = format.FieldsProducer(segmentReadState); if (Debugging.AssertsEnabled) { Debugging.Assert(fields != null); } // ask codec for its Norms: // TODO: since we don't write any norms file if there are no norms, // kinda jaky to assume the codec handles the case of no norms file at all gracefully?! if (fieldInfos.HasNorms) { normsProducer = codec.NormsFormat.NormsProducer(segmentReadState); if (Debugging.AssertsEnabled) { Debugging.Assert(normsProducer != null); } } else { normsProducer = null; } fieldsReaderOrig = si.Info.Codec.StoredFieldsFormat.FieldsReader(cfsDir, si.Info, fieldInfos, context); if (fieldInfos.HasVectors) // open term vector files only as needed { termVectorsReaderOrig = si.Info.Codec.TermVectorsFormat.VectorsReader(cfsDir, si.Info, fieldInfos, context); } else { termVectorsReaderOrig = null; } success = true; } finally { if (!success) { DecRef(); } } }
public override bool UseCompoundFile(SegmentInfos infos, SegmentCommitInfo mergedInfo) { // 80% of the time we create CFS: return(random.Next(5) != 1); }
/// <summary> /// Returns if the given segment should be upgraded. The default implementation /// will return {@code !Constants.LUCENE_MAIN_VERSION.equals(si.getVersion())}, /// so all segments created with a different version number than this Lucene version will /// get upgraded. /// </summary> protected internal virtual bool ShouldUpgradeSegment(SegmentCommitInfo si) { return(!Constants.LUCENE_MAIN_VERSION.Equals(si.Info.Version)); }
public override bool UseCompoundFile(SegmentInfos segments, SegmentCommitInfo newSegment) { return(@base.UseCompoundFile(segments, newSegment)); }
public ReadersAndUpdates(IndexWriter writer, SegmentCommitInfo info) { this.Info = info; this.Writer = writer; LiveDocsShared = true; }
/// <summary> /// Used by near real-time search </summary> internal static DirectoryReader Open(IndexWriter writer, SegmentInfos infos, bool applyAllDeletes) { // IndexWriter synchronizes externally before calling // us, which ensures infos will not change; so there's // no need to process segments in reverse order int numSegments = infos.Count; IList <SegmentReader> readers = new List <SegmentReader>(); Directory dir = writer.Directory; SegmentInfos segmentInfos = (SegmentInfos)infos.Clone(); int infosUpto = 0; bool success = false; try { for (int i = 0; i < numSegments; i++) { // NOTE: important that we use infos not // segmentInfos here, so that we are passing the // actual instance of SegmentInfoPerCommit in // IndexWriter's segmentInfos: SegmentCommitInfo info = infos.Info(i); Debug.Assert(info.Info.Dir == dir); ReadersAndUpdates rld = writer.readerPool.Get(info, true); try { SegmentReader reader = rld.GetReadOnlyClone(IOContext.READ); if (reader.NumDocs > 0 || writer.KeepFullyDeletedSegments) { // Steal the ref: readers.Add(reader); infosUpto++; } else { reader.DecRef(); segmentInfos.Remove(infosUpto); } } finally { writer.readerPool.Release(rld); } } writer.IncRefDeleter(segmentInfos); StandardDirectoryReader result = new StandardDirectoryReader(dir, readers.ToArray(), writer, segmentInfos, writer.Config.ReaderTermsIndexDivisor, applyAllDeletes); success = true; return(result); } finally { if (!success) { foreach (SegmentReader r in readers) { try { r.DecRef(); } #pragma warning disable 168 catch (Exception th) #pragma warning restore 168 { // ignore any exception that is thrown here to not mask any original // exception. } } } } }
/// <summary> /// Merges the provided indexes into this index. /// /// <p> /// The provided IndexReaders are not closed. /// /// <p> /// See <seealso cref="#addIndexes"/> for details on transactional semantics, temporary /// free space required in the Directory, and non-CFS segments on an Exception. /// /// <p> /// <b>NOTE</b>: if this method hits an OutOfMemoryError you should immediately /// close the writer. See <a href="#OOME">above</a> for details. /// /// <p> /// <b>NOTE:</b> empty segments are dropped by this method and not added to this /// index. /// /// <p> /// <b>NOTE:</b> this method merges all given <seealso cref="IndexReader"/>s in one /// merge. If you intend to merge a large number of readers, it may be better /// to call this method multiple times, each time with a small set of readers. /// In principle, if you use a merge policy with a {@code mergeFactor} or /// {@code maxMergeAtOnce} parameter, you should pass that many readers in one /// call. Also, if the given readers are <seealso cref="DirectoryReader"/>s, they can be /// opened with {@code termIndexInterval=-1} to save RAM, since during merge /// the in-memory structure is not used. See /// <seealso cref="DirectoryReader#open(Directory, int)"/>. /// /// <p> /// <b>NOTE</b>: if you call <seealso cref="#close(boolean)"/> with <tt>false</tt>, which /// aborts all running merges, then any thread still running this method might /// hit a <seealso cref="MergePolicy.MergeAbortedException"/>. /// </summary> /// <exception cref="CorruptIndexException"> /// if the index is corrupt </exception> /// <exception cref="IOException"> /// if there is a low-level IO error </exception> public virtual void AddIndexes(params IndexReader[] readers) { EnsureOpen(); int numDocs = 0; try { if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "flush at addIndexes(IndexReader...)"); } Flush(false, true); string mergedName = NewSegmentName(); IList<AtomicReader> mergeReaders = new List<AtomicReader>(); foreach (IndexReader indexReader in readers) { numDocs += indexReader.NumDocs(); foreach (AtomicReaderContext ctx in indexReader.Leaves()) { mergeReaders.Add(ctx.AtomicReader); } } IOContext context = new IOContext(new MergeInfo(numDocs, -1, true, -1)); // TODO: somehow we should fix this merge so it's // abortable so that IW.close(false) is able to stop it TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory); SegmentInfo info = new SegmentInfo(directory, Constants.LUCENE_MAIN_VERSION, mergedName, -1, false, Codec, null); SegmentMerger merger = new SegmentMerger(mergeReaders, info, infoStream, trackingDir, Config_Renamed.TermIndexInterval, MergeState.CheckAbort.NONE, GlobalFieldNumberMap, context, Config_Renamed.CheckIntegrityAtMerge); if (!merger.ShouldMerge()) { return; } MergeState mergeState; bool success = false; try { mergeState = merger.Merge(); // merge 'em success = true; } finally { if (!success) { lock (this) { Deleter.Refresh(info.Name); } } } SegmentCommitInfo infoPerCommit = new SegmentCommitInfo(info, 0, -1L, -1L); info.Files = new HashSet<string>(trackingDir.CreatedFiles); trackingDir.CreatedFiles.Clear(); SetDiagnostics(info, SOURCE_ADDINDEXES_READERS); bool useCompoundFile; lock (this) // Guard segmentInfos { if (StopMerges) { Deleter.DeleteNewFiles(infoPerCommit.Files()); return; } EnsureOpen(); useCompoundFile = mergePolicy.UseCompoundFile(segmentInfos, infoPerCommit); } // Now create the compound file if needed if (useCompoundFile) { ICollection<string> filesToDelete = infoPerCommit.Files(); try { CreateCompoundFile(infoStream, directory, MergeState.CheckAbort.NONE, info, context); } finally { // delete new non cfs files directly: they were never // registered with IFD lock (this) { Deleter.DeleteNewFiles(filesToDelete); } } info.UseCompoundFile = true; } // Have codec write SegmentInfo. Must do this after // creating CFS so that 1) .si isn't slurped into CFS, // and 2) .si reflects useCompoundFile=true change // above: success = false; try { Codec.SegmentInfoFormat().SegmentInfoWriter.Write(trackingDir, info, mergeState.FieldInfos, context); success = true; } finally { if (!success) { lock (this) { Deleter.Refresh(info.Name); } } } info.AddFiles(trackingDir.CreatedFiles); // Register the new segment lock (this) { if (StopMerges) { Deleter.DeleteNewFiles(info.Files); return; } EnsureOpen(); segmentInfos.Add(infoPerCommit); Checkpoint(); } } catch (System.OutOfMemoryException oom) { HandleOOM(oom, "addIndexes(IndexReader...)"); } }
/// <summary> /// Create new <see cref="SegmentReader"/> sharing core from a previous /// <see cref="SegmentReader"/> and loading new live docs from a new /// deletes file. Used by <see cref="DirectoryReader.OpenIfChanged(DirectoryReader)"/>. /// </summary> internal SegmentReader(SegmentCommitInfo si, SegmentReader sr) : this(si, sr, si.Info.Codec.LiveDocsFormat.ReadLiveDocs(si.Info.Dir, si, IOContext.READ_ONCE), si.Info.DocCount - si.DelCount) { }
/// <summary> /// Obtain the number of deleted docs for a pooled reader. /// If the reader isn't being pooled, the segmentInfo's /// delCount is returned. /// </summary> public virtual int NumDeletedDocs(SegmentCommitInfo info) { EnsureOpen(false); int delCount = info.DelCount; ReadersAndUpdates rld = readerPool.Get(info, false); if (rld != null) { delCount += rld.PendingDeleteCount; } return delCount; }
internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor) { fieldsReaderLocal = new AnonymousFieldsReaderLocal(this); termVectorsLocal = new AnonymousTermVectorsLocal(this); if (termsIndexDivisor == 0) { throw new System.ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)"); } Codec codec = si.Info.Codec; Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory. bool success = false; try { if (si.Info.UseCompoundFile) { cfsDir = cfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false); } else { cfsReader = null; cfsDir = dir; } FieldInfos fieldInfos = owner.FieldInfos; this.termsIndexDivisor = termsIndexDivisor; PostingsFormat format = codec.PostingsFormat; SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor); // Ask codec for its Fields fields = format.FieldsProducer(segmentReadState); Debug.Assert(fields != null); // ask codec for its Norms: // TODO: since we don't write any norms file if there are no norms, // kinda jaky to assume the codec handles the case of no norms file at all gracefully?! if (fieldInfos.HasNorms) { normsProducer = codec.NormsFormat.NormsProducer(segmentReadState); Debug.Assert(normsProducer != null); } else { normsProducer = null; } // LUCENENET TODO: EXCEPTIONS Not sure why this catch block is swallowing AccessViolationException, // because it didn't exist in Lucene. Is it really needed? AVE is for protected memory...could // this be needed because we are using unchecked?? #if !NETSTANDARD try { #endif fieldsReaderOrig = si.Info.Codec.StoredFieldsFormat.FieldsReader(cfsDir, si.Info, fieldInfos, context); #if !NETSTANDARD } #pragma warning disable 168 catch (System.AccessViolationException ave) #pragma warning restore 168 { } #endif if (fieldInfos.HasVectors) // open term vector files only as needed { termVectorsReaderOrig = si.Info.Codec.TermVectorsFormat.VectorsReader(cfsDir, si.Info, fieldInfos, context); } else { termVectorsReaderOrig = null; } success = true; } finally { if (!success) { DecRef(); } } }
internal virtual void SealFlushedSegment(FlushedSegment flushedSegment) { if (Debugging.AssertsEnabled) { Debugging.Assert(flushedSegment != null); } SegmentCommitInfo newSegment = flushedSegment.segmentInfo; IndexWriter.SetDiagnostics(newSegment.Info, IndexWriter.SOURCE_FLUSH); IOContext context = new IOContext(new FlushInfo(newSegment.Info.DocCount, newSegment.GetSizeInBytes())); bool success = false; try { if (indexWriterConfig.UseCompoundFile) { filesToDelete.UnionWith(IndexWriter.CreateCompoundFile(infoStream, directory, CheckAbort.NONE, newSegment.Info, context)); newSegment.Info.UseCompoundFile = true; } // Have codec write SegmentInfo. Must do this after // creating CFS so that 1) .si isn't slurped into CFS, // and 2) .si reflects useCompoundFile=true change // above: codec.SegmentInfoFormat.SegmentInfoWriter.Write(directory, newSegment.Info, flushedSegment.fieldInfos, context); // TODO: ideally we would freeze newSegment here!! // because any changes after writing the .si will be // lost... // Must write deleted docs after the CFS so we don't // slurp the del file into CFS: if (flushedSegment.liveDocs != null) { int delCount = flushedSegment.delCount; if (Debugging.AssertsEnabled) { Debugging.Assert(delCount > 0); } if (infoStream.IsEnabled("DWPT")) { infoStream.Message("DWPT", "flush: write " + delCount + " deletes gen=" + flushedSegment.segmentInfo.DelGen); } // TODO: we should prune the segment if it's 100% // deleted... but merge will also catch it. // TODO: in the NRT case it'd be better to hand // this del vector over to the // shortly-to-be-opened SegmentReader and let it // carry the changes; there's no reason to use // filesystem as intermediary here. SegmentCommitInfo info = flushedSegment.segmentInfo; Codec codec = info.Info.Codec; codec.LiveDocsFormat.WriteLiveDocs(flushedSegment.liveDocs, directory, info, delCount, context); newSegment.DelCount = delCount; newSegment.AdvanceDelGen(); } success = true; } finally { if (!success) { if (infoStream.IsEnabled("DWPT")) { infoStream.Message("DWPT", "hit exception creating compound file for newly flushed segment " + newSegment.Info.Name); } } } }
protected override long Size(SegmentCommitInfo info) { return(long.MaxValue); }
/// <summary> /// Read a particular segmentFileName. Note that this may /// throw an IOException if a commit is in process. /// </summary> /// <param name="directory"> -- directory containing the segments file </param> /// <param name="segmentFileName"> -- segment file to load </param> /// <exception cref="CorruptIndexException"> if the index is corrupt </exception> /// <exception cref="IOException"> if there is a low-level IO error </exception> public void Read(Directory directory, string segmentFileName) { var success = false; // Clear any previous segments: this.Clear(); _generation = GenerationFromSegmentsFileName(segmentFileName); _lastGeneration = _generation; var input = directory.OpenChecksumInput(segmentFileName, IOContext.READ); try { int format = input.ReadInt(); int actualFormat; if (format == CodecUtil.CODEC_MAGIC) { // 4.0+ actualFormat = CodecUtil.CheckHeaderNoMagic(input, "segments", VERSION_40, VERSION_48); Version = input.ReadLong(); Counter = input.ReadInt(); int numSegments = input.ReadInt(); if (numSegments < 0) { throw new CorruptIndexException("invalid segment count: " + numSegments + " (resource: " + input + ")"); } for (var seg = 0; seg < numSegments; seg++) { var segName = input.ReadString(); var codec = Codec.ForName(input.ReadString()); //System.out.println("SIS.read seg=" + seg + " codec=" + codec); var info = codec.SegmentInfoFormat().SegmentInfoReader.Read(directory, segName, IOContext.READ); info.Codec = codec; long delGen = input.ReadLong(); int delCount = input.ReadInt(); if (delCount < 0 || delCount > info.DocCount) { throw new CorruptIndexException("invalid deletion count: " + delCount + " vs docCount=" + info.DocCount + " (resource: " + input + ")"); } long fieldInfosGen = -1; if (actualFormat >= VERSION_46) { fieldInfosGen = input.ReadLong(); } var siPerCommit = new SegmentCommitInfo(info, delCount, delGen, fieldInfosGen); if (actualFormat >= VERSION_46) { int numGensUpdatesFiles = input.ReadInt(); IDictionary<long, ISet<string>> genUpdatesFiles; if (numGensUpdatesFiles == 0) { genUpdatesFiles = CollectionsHelper.EmptyMap<long, ISet<string>>(); } else { genUpdatesFiles = new Dictionary<long, ISet<string>>(numGensUpdatesFiles); for (int i = 0; i < numGensUpdatesFiles; i++) { genUpdatesFiles[input.ReadLong()] = input.ReadStringSet(); } } siPerCommit.GenUpdatesFiles = genUpdatesFiles; } Add(siPerCommit); } _userData = input.ReadStringStringMap(); } else { actualFormat = -1; Lucene3xSegmentInfoReader.ReadLegacyInfos(this, directory, input, format); Codec codec = Codec.ForName("Lucene3x"); foreach (SegmentCommitInfo info in segments) { info.Info.Codec = codec; } } if (actualFormat >= VERSION_48) { CodecUtil.CheckFooter(input); } else { long checksumNow = input.Checksum; long checksumThen = input.ReadLong(); if (checksumNow != checksumThen) { throw new CorruptIndexException("checksum mismatch in segments file (resource: " + input + ")"); } CodecUtil.CheckEOF(input); } success = true; } finally { if (!success) { // Clear any segment infos we had loaded so we // have a clean slate on retry: this.Clear(); IOUtils.CloseWhileHandlingException(input); } else { input.Dispose(); } } }
public override bool UseCompoundFile(SegmentInfos segments, SegmentCommitInfo newSegment) { return(useCompoundFile); }
protected internal override long Size(SegmentCommitInfo info) { return long.MaxValue; }
public virtual object Clone() { SegmentCommitInfo other = new SegmentCommitInfo(Info, DelCount_Renamed, DelGen_Renamed, FieldInfosGen_Renamed); // Not clear that we need to carry over nextWriteDelGen // (i.e. do we ever clone after a failed write and // before the next successful write?), but just do it to // be safe: other.NextWriteDelGen = NextWriteDelGen; other.NextWriteFieldInfosGen = NextWriteFieldInfosGen; // deep clone foreach (KeyValuePair<long, ISet<string>> e in GenUpdatesFiles_Renamed) { other.GenUpdatesFiles_Renamed[e.Key] = new HashSet<string>(e.Value); } return other; }
/// <summary> /// Return the byte size of the provided {@link /// SegmentCommitInfo}, pro-rated by percentage of /// non-deleted documents is set. /// </summary> protected internal virtual long Size(SegmentCommitInfo info) { long byteSize = info.SizeInBytes(); int delCount = Writer.Get().NumDeletedDocs(info); double delRatio = (info.Info.DocCount <= 0 ? 0.0f : ((float)delCount / (float)info.Info.DocCount)); Debug.Assert(delRatio <= 1.0); return (info.Info.DocCount <= 0 ? byteSize : (long)(byteSize * (1.0 - delRatio))); }
private SegmentCommitInfo Merge(Directory dir, SegmentCommitInfo si1, SegmentCommitInfo si2, string merged, bool useCompoundFile) { IOContext context = NewIOContext(Random()); SegmentReader r1 = new SegmentReader(si1, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, context); SegmentReader r2 = new SegmentReader(si2, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, context); Codec codec = Codec.Default; TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(si1.Info.Dir); SegmentInfo si = new SegmentInfo(si1.Info.Dir, Constants.LUCENE_MAIN_VERSION, merged, -1, false, codec, null); SegmentMerger merger = new SegmentMerger(Arrays.AsList<AtomicReader>(r1, r2), si, InfoStream.Default, trackingDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, MergeState.CheckAbort.NONE, new FieldInfos.FieldNumbers(), context, true); MergeState mergeState = merger.Merge(); r1.Dispose(); r2.Dispose(); SegmentInfo info = new SegmentInfo(si1.Info.Dir, Constants.LUCENE_MAIN_VERSION, merged, si1.Info.DocCount + si2.Info.DocCount, false, codec, null); info.Files = new HashSet<string>(trackingDir.CreatedFiles); if (useCompoundFile) { ICollection<string> filesToDelete = IndexWriter.CreateCompoundFile(InfoStream.Default, dir, MergeState.CheckAbort.NONE, info, NewIOContext(Random())); info.UseCompoundFile = true; foreach (String fileToDelete in filesToDelete) { si1.Info.Dir.DeleteFile(fileToDelete); } } return new SegmentCommitInfo(info, 0, -1L, -1L); }
/// <summary> /// Flush all pending docs to a new segment </summary> internal virtual FlushedSegment Flush() { Debug.Assert(numDocsInRAM > 0); Debug.Assert(DeleteSlice.Empty, "all deletes must be applied in prepareFlush"); SegmentInfo_Renamed.DocCount = numDocsInRAM; SegmentWriteState flushState = new SegmentWriteState(InfoStream, Directory, SegmentInfo_Renamed, FieldInfos.Finish(), IndexWriterConfig.TermIndexInterval, PendingUpdates, new IOContext(new FlushInfo(numDocsInRAM, BytesUsed()))); double startMBUsed = BytesUsed() / 1024.0 / 1024.0; // Apply delete-by-docID now (delete-byDocID only // happens when an exception is hit processing that // doc, eg if analyzer has some problem w/ the text): if (PendingUpdates.DocIDs.Count > 0) { flushState.LiveDocs = Codec.LiveDocsFormat().NewLiveDocs(numDocsInRAM); foreach (int delDocID in PendingUpdates.DocIDs) { flushState.LiveDocs.Clear(delDocID); } flushState.DelCountOnFlush = PendingUpdates.DocIDs.Count; PendingUpdates.BytesUsed.AddAndGet(-PendingUpdates.DocIDs.Count * BufferedUpdates.BYTES_PER_DEL_DOCID); PendingUpdates.DocIDs.Clear(); } if (Aborting) { if (InfoStream.IsEnabled("DWPT")) { InfoStream.Message("DWPT", "flush: skip because aborting is set"); } return null; } if (InfoStream.IsEnabled("DWPT")) { InfoStream.Message("DWPT", "flush postings as segment " + flushState.SegmentInfo.Name + " numDocs=" + numDocsInRAM); } bool success = false; try { Consumer.Flush(flushState); PendingUpdates.Terms.Clear(); SegmentInfo_Renamed.Files = new HashSet<string>(Directory.CreatedFiles); SegmentCommitInfo segmentInfoPerCommit = new SegmentCommitInfo(SegmentInfo_Renamed, 0, -1L, -1L); if (InfoStream.IsEnabled("DWPT")) { InfoStream.Message("DWPT", "new segment has " + (flushState.LiveDocs == null ? 0 : (flushState.SegmentInfo.DocCount - flushState.DelCountOnFlush)) + " deleted docs"); InfoStream.Message("DWPT", "new segment has " + (flushState.FieldInfos.HasVectors() ? "vectors" : "no vectors") + "; " + (flushState.FieldInfos.HasNorms() ? "norms" : "no norms") + "; " + (flushState.FieldInfos.HasDocValues() ? "docValues" : "no docValues") + "; " + (flushState.FieldInfos.HasProx() ? "prox" : "no prox") + "; " + (flushState.FieldInfos.HasFreq() ? "freqs" : "no freqs")); InfoStream.Message("DWPT", "flushedFiles=" + segmentInfoPerCommit.Files()); InfoStream.Message("DWPT", "flushed codec=" + Codec); } BufferedUpdates segmentDeletes; if (PendingUpdates.Queries.Count == 0 && PendingUpdates.NumericUpdates.Count == 0 && PendingUpdates.BinaryUpdates.Count == 0) { PendingUpdates.Clear(); segmentDeletes = null; } else { segmentDeletes = PendingUpdates; } if (InfoStream.IsEnabled("DWPT")) { double newSegmentSize = segmentInfoPerCommit.SizeInBytes() / 1024.0 / 1024.0; InfoStream.Message("DWPT", "flushed: segment=" + SegmentInfo_Renamed.Name + " ramUsed=" + startMBUsed.ToString(Nf) + " MB" + " newFlushedSize(includes docstores)=" + newSegmentSize.ToString(Nf) + " MB" + " docs/MB=" + (flushState.SegmentInfo.DocCount / newSegmentSize).ToString(Nf)); } Debug.Assert(SegmentInfo_Renamed != null); FlushedSegment fs = new FlushedSegment(segmentInfoPerCommit, flushState.FieldInfos, segmentDeletes, flushState.LiveDocs, flushState.DelCountOnFlush); SealFlushedSegment(fs); success = true; return fs; } finally { if (!success) { Abort(FilesToDelete); } } }
private void PrintSegment(StreamWriter @out, SegmentCommitInfo si) { SegmentReader reader = new SegmentReader(si, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random())); for (int i = 0; i < reader.NumDocs; i++) { @out.WriteLine(reader.Document(i)); } Fields fields = reader.Fields; foreach (string field in fields) { Terms terms = fields.Terms(field); Assert.IsNotNull(terms); TermsEnum tis = terms.Iterator(null); while (tis.Next() != null) { @out.Write(" term=" + field + ":" + tis.Term()); @out.WriteLine(" DF=" + tis.DocFreq()); DocsAndPositionsEnum positions = tis.DocsAndPositions(reader.LiveDocs, null); while (positions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { @out.Write(" doc=" + positions.DocID()); @out.Write(" TF=" + positions.Freq()); @out.Write(" pos="); @out.Write(positions.NextPosition()); for (int j = 1; j < positions.Freq(); j++) { @out.Write("," + positions.NextPosition()); } @out.WriteLine(""); } } } reader.Dispose(); }
public override bool UseCompoundFile(SegmentInfos infos, SegmentCommitInfo mergedInfo) { // 80% of the time we create CFS: return Random.Next(5) != 1; }
// used only by asserts public virtual bool InfoIsLive(SegmentCommitInfo info) { lock (this) { int idx = OuterInstance.segmentInfos.IndexOf(info); Debug.Assert(idx != -1, "info=" + info + " isn't live"); Debug.Assert(OuterInstance.segmentInfos.Info(idx) == info, "info=" + info + " doesn't match live info in segmentInfos"); return true; } }
/// <summary> /// Atomically adds the segment private delete packet and publishes the flushed /// segments SegmentInfo to the index writer. /// </summary> internal virtual void PublishFlushedSegment(SegmentCommitInfo newSegment, FrozenBufferedUpdates packet, FrozenBufferedUpdates globalPacket) { try { lock (this) { // Lock order IW -> BDS lock (BufferedUpdatesStream) { if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "publishFlushedSegment"); } if (globalPacket != null && globalPacket.Any()) { BufferedUpdatesStream.Push(globalPacket); } // Publishing the segment must be synched on IW -> BDS to make the sure // that no merge prunes away the seg. private delete packet long nextGen; if (packet != null && packet.Any()) { nextGen = BufferedUpdatesStream.Push(packet); } else { // Since we don't have a delete packet to apply we can get a new // generation right away nextGen = BufferedUpdatesStream.NextGen; } if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "publish sets newSegment delGen=" + nextGen + " seg=" + SegString(newSegment)); } newSegment.BufferedDeletesGen = nextGen; segmentInfos.Add(newSegment); Checkpoint(); } } } finally { flushCount.IncrementAndGet(); DoAfterFlush(); } }
/// <summary> /// Returns a string description of the specified /// segment, for debugging. /// /// @lucene.internal /// </summary> public virtual string SegString(SegmentCommitInfo info) { lock (this) { return info.ToString(info.Info.Dir, NumDeletedDocs(info) - info.DelCount); } }
/// <summary> /// Copies the segment files as-is into the IndexWriter's directory. </summary> private SegmentCommitInfo CopySegmentAsIs(SegmentCommitInfo info, string segName, IDictionary<string, string> dsNames, ISet<string> dsFilesCopied, IOContext context, ISet<string> copiedFiles) { // Determine if the doc store of this segment needs to be copied. It's // only relevant for segments that share doc store with others, // because the DS might have been copied already, in which case we // just want to update the DS name of this SegmentInfo. string dsName = Lucene3xSegmentInfoFormat.GetDocStoreSegment(info.Info); Debug.Assert(dsName != null); string newDsName; if (dsNames.ContainsKey(dsName)) { newDsName = dsNames[dsName]; } else { dsNames[dsName] = segName; newDsName = segName; } // note: we don't really need this fis (its copied), but we load it up // so we don't pass a null value to the si writer FieldInfos fis = SegmentReader.ReadFieldInfos(info); ISet<string> docStoreFiles3xOnly = Lucene3xCodec.GetDocStoreFiles(info.Info); IDictionary<string, string> attributes; // copy the attributes map, we might modify it below. // also we need to ensure its read-write, since we will invoke the SIwriter (which might want to set something). if (info.Info.Attributes() == null) { attributes = new Dictionary<string, string>(); } else { attributes = new Dictionary<string, string>(info.Info.Attributes()); } if (docStoreFiles3xOnly != null) { // only violate the codec this way if it's preflex & // shares doc stores // change docStoreSegment to newDsName attributes[Lucene3xSegmentInfoFormat.DS_NAME_KEY] = newDsName; } //System.out.println("copy seg=" + info.info.name + " version=" + info.info.getVersion()); // Same SI as before but we change directory, name and docStoreSegment: SegmentInfo newInfo = new SegmentInfo(directory, info.Info.Version, segName, info.Info.DocCount, info.Info.UseCompoundFile, info.Info.Codec, info.Info.Diagnostics, attributes); SegmentCommitInfo newInfoPerCommit = new SegmentCommitInfo(newInfo, info.DelCount, info.DelGen, info.FieldInfosGen); HashSet<string> segFiles = new HashSet<string>(); // Build up new segment's file names. Must do this // before writing SegmentInfo: foreach (string file in info.Files()) { string newFileName; if (docStoreFiles3xOnly != null && docStoreFiles3xOnly.Contains(file)) { newFileName = newDsName + Lucene.Net.Index.IndexFileNames.StripSegmentName(file); } else { newFileName = segName + Lucene.Net.Index.IndexFileNames.StripSegmentName(file); } segFiles.Add(newFileName); } newInfo.Files = segFiles; // We must rewrite the SI file because it references // segment name (its own name, if its 3.x, and doc // store segment name): TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory); Codec currentCodec = newInfo.Codec; try { currentCodec.SegmentInfoFormat().SegmentInfoWriter.Write(trackingDir, newInfo, fis, context); } catch (System.NotSupportedException uoe) { if (currentCodec is Lucene3xCodec) { // OK: 3x codec cannot write a new SI file; // SegmentInfos will write this on commit } else { throw uoe; } } ICollection<string> siFiles = trackingDir.CreatedFiles; bool success = false; try { // Copy the segment's files foreach (string file in info.Files()) { string newFileName; if (docStoreFiles3xOnly != null && docStoreFiles3xOnly.Contains(file)) { newFileName = newDsName + Lucene.Net.Index.IndexFileNames.StripSegmentName(file); if (dsFilesCopied.Contains(newFileName)) { continue; } dsFilesCopied.Add(newFileName); } else { newFileName = segName + Lucene.Net.Index.IndexFileNames.StripSegmentName(file); } if (siFiles.Contains(newFileName)) { // We already rewrote this above continue; } Debug.Assert(!SlowFileExists(directory, newFileName), "file \"" + newFileName + "\" already exists; siFiles=" + siFiles); Debug.Assert(!copiedFiles.Contains(file), "file \"" + file + "\" is being copied more than once"); copiedFiles.Add(file); info.Info.Dir.Copy(directory, file, newFileName, context); } success = true; } finally { if (!success) { foreach (string file in newInfo.Files) { try { directory.DeleteFile(file); } catch (Exception) { } } } } return newInfoPerCommit; }
/// <summary> /// Returns <c>true</c> if the given segment should be upgraded. The default implementation /// will return <c>!Constants.LUCENE_MAIN_VERSION.Equals(si.Info.Version, StringComparison.Ordinal)</c>, /// so all segments created with a different version number than this Lucene version will /// get upgraded. /// </summary> protected virtual bool ShouldUpgradeSegment(SegmentCommitInfo si) { return(!Constants.LUCENE_MAIN_VERSION.Equals(si.Info.Version, StringComparison.Ordinal)); }
public virtual void Drop(SegmentCommitInfo info) { lock (this) { ReadersAndUpdates rld; ReaderMap.TryGetValue(info, out rld); if (rld != null) { Debug.Assert(info == rld.Info); // System.out.println("[" + Thread.currentThread().getName() + "] ReaderPool.drop: " + info); ReaderMap.Remove(info); rld.DropReaders(); } } }
internal virtual bool SegThere(SegmentCommitInfo info, SegmentInfos infos) { foreach (SegmentCommitInfo si in infos.Segments) { if (si.Info.Name.Equals(info.Info.Name)) { return true; } } return false; }
/// <summary> /// Obtain a ReadersAndLiveDocs instance from the /// readerPool. If create is true, you must later call /// <seealso cref="#release(ReadersAndUpdates)"/>. /// </summary> public virtual ReadersAndUpdates Get(SegmentCommitInfo info, bool create) { lock (this) { Debug.Assert(info.Info.Dir == OuterInstance.directory, "info.dir=" + info.Info.Dir + " vs " + OuterInstance.directory); ReadersAndUpdates rld; ReaderMap.TryGetValue(info, out rld); if (rld == null) { if (!create) { return null; } rld = new ReadersAndUpdates(OuterInstance, info); // Steal initial reference: ReaderMap[info] = rld; } else { Debug.Assert(rld.Info == info, "Infos are not equal");//, "rld.info=" + rld.Info + " info=" + info + " isLive?=" + InfoIsLive(rld.Info) + " vs " + InfoIsLive(info)); } if (create) { // Return ref to caller: rld.IncRef(); } Debug.Assert(NoDups()); return rld; } }
/// <summary> /// Appends the provided <seealso cref="SegmentCommitInfo"/>. </summary> public void Add(SegmentCommitInfo si) { segments.Add(si); }
/// <summary> /// Remove the provided <seealso cref="SegmentCommitInfo"/>. /// /// <p><b>WARNING</b>: O(N) cost /// </summary> public void Remove(SegmentCommitInfo si) { segments.Remove(si); }
/// <summary> /// Return true if the provided {@link /// SegmentCommitInfo} is contained. /// /// <p><b>WARNING</b>: O(N) cost /// </summary> internal bool Contains(SegmentCommitInfo si) { return segments.Contains(si); }
/// <summary> /// Returns if the given segment should be upgraded. The default implementation /// will return {@code !Constants.LUCENE_MAIN_VERSION.equals(si.getVersion())}, /// so all segments created with a different version number than this Lucene version will /// get upgraded. /// </summary> protected internal virtual bool ShouldUpgradeSegment(SegmentCommitInfo si) { return !Constants.LUCENE_MAIN_VERSION.Equals(si.Info.Version); }
internal virtual FlushedSegment Flush() { if (Debugging.AssertsEnabled) { Debugging.Assert(numDocsInRAM > 0); Debugging.Assert(deleteSlice.IsEmpty, "all deletes must be applied in prepareFlush"); } segmentInfo.DocCount = numDocsInRAM; SegmentWriteState flushState = new SegmentWriteState(infoStream, directory, segmentInfo, fieldInfos.Finish(), indexWriterConfig.TermIndexInterval, pendingUpdates, new IOContext(new FlushInfo(numDocsInRAM, BytesUsed))); double startMBUsed = BytesUsed / 1024.0 / 1024.0; // Apply delete-by-docID now (delete-byDocID only // happens when an exception is hit processing that // doc, eg if analyzer has some problem w/ the text): if (pendingUpdates.docIDs.Count > 0) { flushState.LiveDocs = codec.LiveDocsFormat.NewLiveDocs(numDocsInRAM); foreach (int delDocID in pendingUpdates.docIDs) { flushState.LiveDocs.Clear(delDocID); } flushState.DelCountOnFlush = pendingUpdates.docIDs.Count; pendingUpdates.bytesUsed.AddAndGet(-pendingUpdates.docIDs.Count * BufferedUpdates.BYTES_PER_DEL_DOCID); pendingUpdates.docIDs.Clear(); } if (aborting) { if (infoStream.IsEnabled("DWPT")) { infoStream.Message("DWPT", "flush: skip because aborting is set"); } return(null); } if (infoStream.IsEnabled("DWPT")) { infoStream.Message("DWPT", "flush postings as segment " + flushState.SegmentInfo.Name + " numDocs=" + numDocsInRAM); } bool success = false; try { consumer.Flush(flushState); pendingUpdates.terms.Clear(); segmentInfo.SetFiles(new JCG.HashSet <string>(directory.CreatedFiles)); SegmentCommitInfo segmentInfoPerCommit = new SegmentCommitInfo(segmentInfo, 0, -1L, -1L); if (infoStream.IsEnabled("DWPT")) { infoStream.Message("DWPT", "new segment has " + (flushState.LiveDocs == null ? 0 : (flushState.SegmentInfo.DocCount - flushState.DelCountOnFlush)) + " deleted docs"); infoStream.Message("DWPT", "new segment has " + (flushState.FieldInfos.HasVectors ? "vectors" : "no vectors") + "; " + (flushState.FieldInfos.HasNorms ? "norms" : "no norms") + "; " + (flushState.FieldInfos.HasDocValues ? "docValues" : "no docValues") + "; " + (flushState.FieldInfos.HasProx ? "prox" : "no prox") + "; " + (flushState.FieldInfos.HasFreq ? "freqs" : "no freqs")); infoStream.Message("DWPT", "flushedFiles=" + string.Format(J2N.Text.StringFormatter.InvariantCulture, "{0}", segmentInfoPerCommit.GetFiles())); infoStream.Message("DWPT", "flushed codec=" + codec); } BufferedUpdates segmentDeletes; if (pendingUpdates.queries.Count == 0 && pendingUpdates.numericUpdates.Count == 0 && pendingUpdates.binaryUpdates.Count == 0) { pendingUpdates.Clear(); segmentDeletes = null; } else { segmentDeletes = pendingUpdates; } if (infoStream.IsEnabled("DWPT")) { double newSegmentSize = segmentInfoPerCommit.GetSizeInBytes() / 1024.0 / 1024.0; infoStream.Message("DWPT", "flushed: segment=" + segmentInfo.Name + " ramUsed=" + startMBUsed.ToString(nf) + " MB" + " newFlushedSize(includes docstores)=" + newSegmentSize.ToString(nf) + " MB" + " docs/MB=" + (flushState.SegmentInfo.DocCount / newSegmentSize).ToString(nf)); } if (Debugging.AssertsEnabled) { Debugging.Assert(segmentInfo != null); } FlushedSegment fs = new FlushedSegment(segmentInfoPerCommit, flushState.FieldInfos, segmentDeletes, flushState.LiveDocs, flushState.DelCountOnFlush); SealFlushedSegment(fs); success = true; return(fs); } finally { if (!success) { Abort(filesToDelete); } } }