internal SegmentMerger(IndexWriter writer, System.String name) { InitBlock(); directory = writer.GetDirectory(); segment = name; termIndexInterval = writer.GetTermIndexInterval(); }
private void Crash(IndexWriter writer) { MockRAMDirectory dir = (MockRAMDirectory) writer.GetDirectory(); ConcurrentMergeScheduler cms = (ConcurrentMergeScheduler) writer.GetMergeScheduler(); dir.Crash(); cms.Sync(); dir.ClearCrash(); }
private void CheckInvariants(IndexWriter writer) { _TestUtil.SyncConcurrentMerges(writer); int maxBufferedDocs = writer.GetMaxBufferedDocs(); int mergeFactor = writer.GetMergeFactor(); int maxMergeDocs = writer.GetMaxMergeDocs(); int ramSegmentCount = writer.GetNumBufferedDocuments(); Assert.IsTrue(ramSegmentCount < maxBufferedDocs); int lowerBound = -1; int upperBound = maxBufferedDocs; int numSegments = 0; int segmentCount = writer.GetSegmentCount(); for (int i = segmentCount - 1; i >= 0; i--) { int docCount = writer.GetDocCount(i); Assert.IsTrue(docCount > lowerBound); if (docCount <= upperBound) { numSegments++; } else { if (upperBound * mergeFactor <= maxMergeDocs) { Assert.IsTrue(numSegments < mergeFactor); } do { lowerBound = upperBound; upperBound *= mergeFactor; }while (docCount > upperBound); numSegments = 1; } } if (upperBound * mergeFactor <= maxMergeDocs) { Assert.IsTrue(numSegments < mergeFactor); } System.String[] files = writer.GetDirectory().ListAll(); int segmentCfsCount = 0; for (int i = 0; i < files.Length; i++) { if (files[i].EndsWith(".cfs")) { segmentCfsCount++; } } Assert.AreEqual(segmentCount, segmentCfsCount); }
private void Crash(IndexWriter writer) { MockRAMDirectory dir = (MockRAMDirectory)writer.GetDirectory(); ConcurrentMergeScheduler cms = (ConcurrentMergeScheduler)writer.GetMergeScheduler(); dir.Crash(); cms.Sync(); dir.ClearCrash(); }
public virtual void TestCrashWhileIndexing() { IndexWriter writer = InitIndex(); MockRAMDirectory dir = (MockRAMDirectory)writer.GetDirectory(); Crash(writer); IndexReader reader = IndexReader.Open(dir); Assert.IsTrue(reader.NumDocs() < 157); }
internal SegmentMerger(IndexWriter writer, System.String name, MergePolicy.OneMerge merge) { InitBlock(); directory = writer.GetDirectory(); segment = name; if (merge != null) { checkAbort = new CheckAbort(merge, directory); } termIndexInterval = writer.GetTermIndexInterval(); }
public virtual void TestWriterAfterCrash() { IndexWriter writer = InitIndex(); MockRAMDirectory dir = (MockRAMDirectory)writer.GetDirectory(); dir.SetPreventDoubleWrite(false); Crash(writer); writer = InitIndex(dir); writer.Close(); IndexReader reader = IndexReader.Open(dir); Assert.IsTrue(reader.NumDocs() < 314); }
public virtual void TestCrashAfterCloseNoWait() { IndexWriter writer = InitIndex(); MockRAMDirectory dir = (MockRAMDirectory)writer.GetDirectory(); writer.Close(false); dir.Crash(); /* * String[] l = dir.list(); * Arrays.sort(l); * for(int i=0;i<l.length;i++) * System.out.println("file " + i + " = " + l[i] + " " + dir.fileLength(l[i]) + " bytes"); */ IndexReader reader = IndexReader.Open(dir); Assert.AreEqual(157, reader.NumDocs()); }
public virtual void TestCrashAfterReopen() { IndexWriter writer = InitIndex(); MockRAMDirectory dir = (MockRAMDirectory)writer.GetDirectory(); writer.Close(); writer = InitIndex(dir); Assert.AreEqual(314, writer.DocCount()); Crash(writer); /* * System.out.println("\n\nTEST: open reader"); * String[] l = dir.list(); * Arrays.sort(l); * for(int i=0;i<l.length;i++) * System.out.println("file " + i + " = " + l[i] + " " + * dir.fileLength(l[i]) + " bytes"); */ IndexReader reader = IndexReader.Open(dir); Assert.IsTrue(reader.NumDocs() >= 157); }
public override void Merge(IndexWriter writer) { // TODO: enable this once we are on JRE 1.5 // assert !Thread.holdsLock(writer); this.writer = writer; InitMergeThreadPriority(); dir = writer.GetDirectory(); // First, quickly run through the newly proposed merges // and add any orthogonal merges (ie a merge not // involving segments already pending to be merged) to // the queue. If we are way behind on merging, many of // these newly proposed merges will likely already be // registered. if (Verbose()) { Message("now merge"); Message(" index: " + writer.SegString()); } // Iterate, pulling from the IndexWriter's queue of // pending merges, until it's empty: while (true) { // TODO: we could be careful about which merges to do in // the BG (eg maybe the "biggest" ones) vs FG, which // merges to do first (the easiest ones?), etc. MergePolicy.OneMerge merge = writer.GetNextMerge(); if (merge == null) { if (Verbose()) Message(" no more merges pending; now return"); return ; } // We do this w/ the primary thread to keep // deterministic assignment of segment names writer.MergeInit(merge); bool success = false; try { lock (this) { MergeThread merger; while (MergeThreadCount(true) >= maxThreadCount) { if (Verbose()) Message(" too many merge threads running; stalling..."); try { System.Threading.Monitor.Wait(this); } catch (System.Threading.ThreadInterruptedException ie) { // In 3.0 we will change this to throw // InterruptedException instead Support.ThreadClass.Current().Interrupt(); throw new System.SystemException(ie.Message, ie); } } if (Verbose()) Message(" consider merge " + merge.SegString(dir)); // OK to spawn a new merge thread to handle this // merge: merger = GetMergeThread(writer, merge); mergeThreads.Add(merger); if (Verbose()) Message(" launch new thread [" + merger.Name + "]"); merger.Start(); success = true; } } finally { if (!success) { writer.MergeFinish(merge); } } } }
/// <summary>Returns true if this single nfo is optimized (has no /// pending norms or deletes, is in the same dir as the /// writer, and matches the current compound file setting /// </summary> private bool IsOptimized(IndexWriter writer, SegmentInfo info) { return(!info.HasDeletions() && !info.HasSeparateNorms() && info.dir == writer.GetDirectory() && info.GetUseCompoundFile() == useCompoundFile); }
/// <summary>Checks if any merges are now necessary and returns a /// {@link MergePolicy.MergeSpecification} if so. A merge /// is necessary when there are more than {@link /// #setMergeFactor} segments at a given level. When /// multiple levels have too many segments, this method /// will return multiple merges, allowing the {@link /// MergeScheduler} to use concurrency. /// </summary> public override MergeSpecification FindMerges(SegmentInfos infos, IndexWriter writer) { int numSegments = infos.Count; this.writer = writer; Message("findMerges: " + numSegments + " segments"); // Compute levels, which is just log (base mergeFactor) // of the size of each segment float[] levels = new float[numSegments]; float norm = (float)System.Math.Log(mergeFactor); Directory directory = writer.GetDirectory(); for (int i = 0; i < numSegments; i++) { SegmentInfo info = infos.Info(i); long size = Size(info); // Floor tiny segments if (size < 1) { size = 1; } levels[i] = (float)System.Math.Log(size) / norm; } float levelFloor; if (minMergeSize <= 0) { levelFloor = (float)0.0; } else { levelFloor = (float)(System.Math.Log(minMergeSize) / norm); } // Now, we quantize the log values into levels. The // first level is any segment whose log size is within // LEVEL_LOG_SPAN of the max size, or, who has such as // segment "to the right". Then, we find the max of all // other segments and use that to define the next level // segment, etc. MergeSpecification spec = null; int start = 0; while (start < numSegments) { // Find max level of all segments not already // quantized. float maxLevel = levels[start]; for (int i = 1 + start; i < numSegments; i++) { float level = levels[i]; if (level > maxLevel) { maxLevel = level; } } // Now search backwards for the rightmost segment that // falls into this level: float levelBottom; if (maxLevel < levelFloor) { // All remaining segments fall into the min level levelBottom = -1.0F; } else { levelBottom = (float)(maxLevel - LEVEL_LOG_SPAN); // Force a boundary at the level floor if (levelBottom < levelFloor && maxLevel >= levelFloor) { levelBottom = levelFloor; } } int upto = numSegments - 1; while (upto >= start) { if (levels[upto] >= levelBottom) { break; } upto--; } Message(" level " + levelBottom + " to " + maxLevel + ": " + (1 + upto - start) + " segments"); // Finally, record all merges that are viable at this level: int end = start + mergeFactor; while (end <= 1 + upto) { bool anyTooLarge = false; for (int i = start; i < end; i++) { SegmentInfo info = infos.Info(i); anyTooLarge |= (Size(info) >= maxMergeSize || info.docCount >= maxMergeDocs); } if (!anyTooLarge) { if (spec == null) { spec = new MergeSpecification(); } Message(" " + start + " to " + end + ": add this merge"); spec.Add(new OneMerge(infos.Range(start, end), useCompoundFile)); } else { Message(" " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping"); } start = end; end = start + mergeFactor; } start = 1 + upto; } return(spec); }
/// <summary> /// Creates the index for the bz2 file on a separate thread. /// </summary> private void CreateIndexAsync() { bool failed = false; try { // Close any searchers if (searcher != null) { searcher.Close(); searcher = null; } indexExists = false; // Create the index writer indexer = new IndexWriter(indexPath, textAnalyzer, true); memoryIndexer = new IndexWriter(new RAMDirectory(), textAnalyzer, true); memoryIndexer.SetMaxBufferedDocs(1000); memoryIndexer.SetMergeFactor(100); indexer.SetMaxBufferedDocs(1000); indexer.SetMergeFactor(100); // Locate the bzip2 blocks in the file LocateBlocks(); // Two times more than the first block but not less than 100 bytes long bufSize = ((ends[0] - beginnings[0]) / 8) * 2 + 100; // Buffers for the current and next block blockBuf = new byte[bufSize]; charBuf = new char[bufSize]; // Whether there was a Wiki topic carryover from current block to the next one char[] charCarryOver = new char[0]; // The length of the currently loaded data long loadedLength = 0; StringBuilder sb = new StringBuilder(); // Starting indexing ReportProgress(0, IndexingProgress.State.Running, "Indexing"); for (long i = 0; i < totalBlocks && !abortIndexing; i++) { ReportProgress((int)((double)(i * 100) / (double)totalBlocks), IndexingProgress.State.Running, String.Empty); #region Indexing logic loadedLength = LoadBlock(beginnings[i], ends[i], ref blockBuf); if (charBuf.Length < blockBuf.Length) { charBuf = new char[blockBuf.Length]; } int bytesUsed = 0; int charsUsed = 0; bool completed = false; // Convert the text to UTF8 utf8.Convert(blockBuf, 0, (int)loadedLength, charBuf, 0, charBuf.Length, i == totalBlocks - 1, out bytesUsed, out charsUsed, out completed); if (!completed) { throw new Exception("UTF8 decoder could not complete the conversion"); } // Construct a current string sb.Length = 0; if (charCarryOver.Length > 0) { sb.Append(charCarryOver); } sb.Append(charBuf, 0, charsUsed); int carryOverLength = charCarryOver.Length; int charsMatched = IndexString(sb.ToString(), beginnings[i], ends[i], carryOverLength, i == totalBlocks - 1); // There's a Wiki topic carryover, let's store the characters which need to be carried over if (charsMatched > 0) { charCarryOver = new char[charsMatched]; sb.CopyTo(charsUsed + carryOverLength - charsMatched, charCarryOver, 0, charsMatched); } else { charCarryOver = new char[0]; } #endregion } // Wait till all the threads finish while (activeThreads != 0) { ReportProgress(0, IndexingProgress.State.Running, "Waiting for tokenizer threads to finish"); Thread.Sleep(TimeSpan.FromSeconds(5)); } ReportProgress(0, IndexingProgress.State.Running, "Flushing documents to disk"); Lucene.Net.Store.Directory dir = memoryIndexer.GetDirectory(); memoryIndexer.Close(); indexer.AddIndexes(new Lucene.Net.Store.Directory[] { dir }); memoryIndexer = null; ReportProgress(0, IndexingProgress.State.Running, "Optimizing index"); indexer.Optimize(); indexExists = true; } catch (Exception ex) { ReportProgress(0, IndexingProgress.State.Failure, ex.ToString()); failed = true; } // Try to release some memory if (indexer != null) { indexer.Close(); indexer = null; } if (failed || abortIndexing) { Directory.Delete(indexPath, true); indexExists = false; } else { if (indexExists) { searcher = new IndexSearcher(indexPath); } } ReportProgress(0, IndexingProgress.State.Finished, String.Empty); }
public override void Merge(IndexWriter writer) { this.writer = writer; InitMergeThreadPriority(); dir = writer.GetDirectory(); // First, quickly run through the newly proposed merges // and add any orthogonal merges (ie a merge not // involving segments already pending to be merged) to // the queue. If we are way behind on merging, many of // these newly proposed merges will likely already be // registered. Message("now merge"); Message(" index: " + writer.SegString()); // Iterate, pulling from the IndexWriter's queue of // pending merges, until its empty: while (true) { // TODO: we could be careful about which merges to do in // the BG (eg maybe the "biggest" ones) vs FG, which // merges to do first (the easiest ones?), etc. MergePolicy.OneMerge merge = writer.GetNextMerge(); if (merge == null) { Message(" no more merges pending; now return"); return; } // We do this w/ the primary thread to keep // deterministic assignment of segment names writer.MergeInit(merge); lock (this) { while (MergeThreadCount() >= maxThreadCount) { Message(" too may merge threads running; stalling..."); try { System.Threading.Monitor.Wait(this); } catch (System.Threading.ThreadInterruptedException) { SupportClass.ThreadClass.Current().Interrupt(); } } Message(" consider merge " + merge.SegString(dir)); System.Diagnostics.Debug.Assert(MergeThreadCount() < maxThreadCount); // OK to spawn a new merge thread to handle this // merge: MergeThread merger = GetMergeThread(writer, merge); mergeThreads.Add(merger); Message(" launch new thread [" + merger.Name + "]"); merger.Start(); } } }
override public void Run() { Document doc = new Document(); doc.Add(new Field("content1", "aaa bbb ccc ddd", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("content6", "aaa bbb ccc ddd", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); doc.Add(new Field("content2", "aaa bbb ccc ddd", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("content3", "aaa bbb ccc ddd", Field.Store.YES, Field.Index.NO)); doc.Add(new Field("content4", "aaa bbb ccc ddd", Field.Store.NO, Field.Index.ANALYZED)); doc.Add(new Field("content5", "aaa bbb ccc ddd", Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.Add(new Field("content7", "aaa bbb ccc ddd", Field.Store.NO, Field.Index.NOT_ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); Field idField = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED); doc.Add(idField); long stopTime = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) + 3000; while ((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) < stopTime) { System.Threading.Thread.SetData(Enclosing_Instance.doFail, this); System.String id = "" + r.Next(50); idField.SetValue(id); Term idTerm = new Term("id", id); try { writer.UpdateDocument(idTerm, doc); } catch (System.SystemException re) { if (Lucene.Net.Index.TestIndexWriterExceptions.DEBUG) { System.Console.Out.WriteLine("EXC: "); System.Console.Out.WriteLine(re.StackTrace); } try { _TestUtil.CheckIndex(writer.GetDirectory()); } catch (System.IO.IOException ioe) { System.Console.Out.WriteLine(SupportClass.ThreadClass.Current().Name + ": unexpected exception1"); System.Console.Out.WriteLine(ioe.StackTrace); failure = ioe; break; } } catch (System.Exception t) { System.Console.Out.WriteLine(SupportClass.ThreadClass.Current().Name + ": unexpected exception2"); System.Console.Out.WriteLine(t.StackTrace); failure = t; break; } System.Threading.Thread.SetData(Enclosing_Instance.doFail, null); // After a possible exception (above) I should be able // to add a new document without hitting an // exception: try { writer.UpdateDocument(idTerm, doc); } catch (System.Exception t) { System.Console.Out.WriteLine(SupportClass.ThreadClass.Current().Name + ": unexpected exception3"); System.Console.Out.WriteLine(t.StackTrace); failure = t; break; } } }
public override void Merge(IndexWriter writer) { this.writer = writer; InitMergeThreadPriority(); dir = writer.GetDirectory(); // First, quickly run through the newly proposed merges // and add any orthogonal merges (ie a merge not // involving segments already pending to be merged) to // the queue. If we are way behind on merging, many of // these newly proposed merges will likely already be // registered. Message("now merge"); Message(" index: " + writer.SegString()); // Iterate, pulling from the IndexWriter's queue of // pending merges, until its empty: while (true) { // TODO: we could be careful about which merges to do in // the BG (eg maybe the "biggest" ones) vs FG, which // merges to do first (the easiest ones?), etc. MergePolicy.OneMerge merge = writer.GetNextMerge(); if (merge == null) { Message(" no more merges pending; now return"); return ; } // We do this w/ the primary thread to keep // deterministic assignment of segment names writer.MergeInit(merge); Message(" consider merge " + merge.SegString(dir)); if (merge.isExternal) { Message(" merge involves segments from an external directory; now run in foreground"); } else { lock (this) { if (MergeThreadCount() < maxThreadCount) { // OK to spawn a new merge thread to handle this // merge: MergeThread merger = new MergeThread(this, writer, merge); mergeThreads.Add(merger); Message(" launch new thread [" + merger.Name + "]"); merger.SetThreadPriority(mergeThreadPriority); merger.IsBackground = true; merger.Start(); continue; } else Message(" too many merge threads running; run merge in foreground"); } } // Too many merge threads already running, so we do // this in the foreground of the calling thread writer.Merge(merge); } }
private void CheckInvariants(IndexWriter writer) { _TestUtil.SyncConcurrentMerges(writer); int maxBufferedDocs = writer.GetMaxBufferedDocs(); int mergeFactor = writer.GetMergeFactor(); int maxMergeDocs = writer.GetMaxMergeDocs(); int ramSegmentCount = writer.GetNumBufferedDocuments(); Assert.IsTrue(ramSegmentCount < maxBufferedDocs); int lowerBound = - 1; int upperBound = maxBufferedDocs; int numSegments = 0; int segmentCount = writer.GetSegmentCount(); for (int i = segmentCount - 1; i >= 0; i--) { int docCount = writer.GetDocCount(i); Assert.IsTrue(docCount > lowerBound); if (docCount <= upperBound) { numSegments++; } else { if (upperBound * mergeFactor <= maxMergeDocs) { Assert.IsTrue(numSegments < mergeFactor); } do { lowerBound = upperBound; upperBound *= mergeFactor; } while (docCount > upperBound); numSegments = 1; } } if (upperBound * mergeFactor <= maxMergeDocs) { Assert.IsTrue(numSegments < mergeFactor); } System.String[] files = writer.GetDirectory().ListAll(); int segmentCfsCount = 0; for (int i = 0; i < files.Length; i++) { if (files[i].EndsWith(".cfs")) { segmentCfsCount++; } } Assert.AreEqual(segmentCount, segmentCfsCount); }
public override void Merge(IndexWriter writer) { // TODO: enable this once we are on JRE 1.5 // assert !Thread.holdsLock(writer); this.writer = writer; InitMergeThreadPriority(); dir = writer.GetDirectory(); // First, quickly run through the newly proposed merges // and add any orthogonal merges (ie a merge not // involving segments already pending to be merged) to // the queue. If we are way behind on merging, many of // these newly proposed merges will likely already be // registered. if (Verbose()) { Message("now merge"); Message(" index: " + writer.SegString()); } // Iterate, pulling from the IndexWriter's queue of // pending merges, until it's empty: while (true) { // TODO: we could be careful about which merges to do in // the BG (eg maybe the "biggest" ones) vs FG, which // merges to do first (the easiest ones?), etc. MergePolicy.OneMerge merge = writer.GetNextMerge(); if (merge == null) { if (Verbose()) { Message(" no more merges pending; now return"); } return; } // We do this w/ the primary thread to keep // deterministic assignment of segment names writer.MergeInit(merge); bool success = false; try { lock (this) { MergeThread merger; while (MergeThreadCount() >= maxThreadCount) { if (Verbose()) { Message(" too many merge threads running; stalling..."); } try { System.Threading.Monitor.Wait(this); } catch (System.Threading.ThreadInterruptedException ie) { // In 3.0 we will change this to throw // InterruptedException instead SupportClass.ThreadClass.Current().Interrupt(); throw new System.SystemException(ie.Message, ie); } } if (Verbose()) { Message(" consider merge " + merge.SegString(dir)); } System.Diagnostics.Debug.Assert(MergeThreadCount() < maxThreadCount); // OK to spawn a new merge thread to handle this // merge: merger = GetMergeThread(writer, merge); mergeThreads.Add(merger); if (Verbose()) { Message(" launch new thread [" + merger.Name + "]"); } merger.Start(); success = true; } } finally { if (!success) { writer.MergeFinish(merge); } } } }
internal SegmentMerger(IndexWriter writer, System.String name, MergePolicy.OneMerge merge) { InitBlock(); directory = writer.GetDirectory(); segment = name; if (merge != null) { checkAbort = new CheckAbort(merge, directory); } else { checkAbort = new CheckAbort(null, null, (d) => {/*Do nothing*/ }); } termIndexInterval = writer.GetTermIndexInterval(); }
/// <summary>Checks if any merges are now necessary and returns a /// {@link MergePolicy.MergeSpecification} if so. A merge /// is necessary when there are more than {@link /// #setMergeFactor} segments at a given level. When /// multiple levels have too many segments, this method /// will return multiple merges, allowing the {@link /// MergeScheduler} to use concurrency. /// </summary> public override MergeSpecification FindMerges(SegmentInfos infos, IndexWriter writer) { int numSegments = infos.Count; this.writer = writer; Message("findMerges: " + numSegments + " segments"); // Compute levels, which is just log (base mergeFactor) // of the size of each segment float[] levels = new float[numSegments]; float norm = (float) System.Math.Log(mergeFactor); Directory directory = writer.GetDirectory(); for (int i = 0; i < numSegments; i++) { SegmentInfo info = infos.Info(i); long size = Size(info); // Floor tiny segments if (size < 1) size = 1; levels[i] = (float) System.Math.Log(size) / norm; } float levelFloor; if (minMergeSize <= 0) levelFloor = (float) 0.0; else { levelFloor = (float) (System.Math.Log(minMergeSize) / norm); } // Now, we quantize the log values into levels. The // first level is any segment whose log size is within // LEVEL_LOG_SPAN of the max size, or, who has such as // segment "to the right". Then, we find the max of all // other segments and use that to define the next level // segment, etc. MergeSpecification spec = null; int start = 0; while (start < numSegments) { // Find max level of all segments not already // quantized. float maxLevel = levels[start]; for (int i = 1 + start; i < numSegments; i++) { float level = levels[i]; if (level > maxLevel) maxLevel = level; } // Now search backwards for the rightmost segment that // falls into this level: float levelBottom; if (maxLevel < levelFloor) // All remaining segments fall into the min level levelBottom = - 1.0F; else { levelBottom = (float) (maxLevel - LEVEL_LOG_SPAN); // Force a boundary at the level floor if (levelBottom < levelFloor && maxLevel >= levelFloor) levelBottom = levelFloor; } int upto = numSegments - 1; while (upto >= start) { if (levels[upto] >= levelBottom) { break; } upto--; } Message(" level " + levelBottom + " to " + maxLevel + ": " + (1 + upto - start) + " segments"); // Finally, record all merges that are viable at this level: int end = start + mergeFactor; while (end <= 1 + upto) { bool anyTooLarge = false; for (int i = start; i < end; i++) { SegmentInfo info = infos.Info(i); anyTooLarge |= (Size(info) >= maxMergeSize || info.docCount >= maxMergeDocs); } if (!anyTooLarge) { if (spec == null) spec = new MergeSpecification(); Message(" " + start + " to " + end + ": add this merge"); spec.Add(new OneMerge(infos.Range(start, end), useCompoundFile)); } else Message(" " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping"); start = end; end = start + mergeFactor; } start = 1 + upto; } return spec; }
internal SegmentMerger(IndexWriter writer, System.String name, MergePolicy.OneMerge merge) { InitBlock(); directory = writer.GetDirectory(); segment = name; if (merge != null) { checkAbort = new CheckAbort(merge, directory); } else { checkAbort = new AnonymousClassCheckAbort1(this, null, null); } termIndexInterval = writer.GetTermIndexInterval(); }
internal SegmentMerger(IndexWriter writer, System.String name, MergePolicy.OneMerge merge) { InitBlock(); directory = writer.GetDirectory(); segment = name; if (merge != null) checkAbort = new CheckAbort(merge, directory); termIndexInterval = writer.GetTermIndexInterval(); }
// Used by near real-time search internal DirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor) { this.directory = writer.GetDirectory(); this.readOnly = true; this.segmentInfos = infos; segmentInfosStart = (SegmentInfos) infos.Clone(); this.termInfosIndexDivisor = termInfosIndexDivisor; if (!readOnly) { // We assume that this segments_N was previously // properly sync'd: SupportClass.CollectionsHelper.AddAllIfNotContains(synced, infos.Files(directory, true)); } // IndexWriter synchronizes externally before calling // us, which ensures infos will not change; so there's // no need to process segments in reverse order int numSegments = infos.Count; SegmentReader[] readers = new SegmentReader[numSegments]; Directory dir = writer.GetDirectory(); int upto = 0; for (int i = 0; i < numSegments; i++) { bool success = false; try { SegmentInfo info = infos.Info(upto); if (info.dir == dir) { readers[upto++] = writer.readerPool.GetReadOnlyClone(info, true, termInfosIndexDivisor); } success = true; } finally { if (!success) { // Close all readers we had opened: for (upto--; upto >= 0; upto--) { try { readers[upto].Close(); } catch (System.Exception ignore) { // keep going - we want to clean up as much as possible } } } } } this.writer = writer; if (upto < readers.Length) { // This means some segments were in a foreign Directory SegmentReader[] newReaders = new SegmentReader[upto]; Array.Copy(readers, 0, newReaders, 0, upto); readers = newReaders; } Initialize(readers); }
public override void Merge(IndexWriter writer) { this.writer = writer; InitMergeThreadPriority(); dir = writer.GetDirectory(); // First, quickly run through the newly proposed merges // and add any orthogonal merges (ie a merge not // involving segments already pending to be merged) to // the queue. If we are way behind on merging, many of // these newly proposed merges will likely already be // registered. Message("now merge"); Message(" index: " + writer.SegString()); // Iterate, pulling from the IndexWriter's queue of // pending merges, until its empty: while (true) { // TODO: we could be careful about which merges to do in // the BG (eg maybe the "biggest" ones) vs FG, which // merges to do first (the easiest ones?), etc. MergePolicy.OneMerge merge = writer.GetNextMerge(); if (merge == null) { Message(" no more merges pending; now return"); return; } // We do this w/ the primary thread to keep // deterministic assignment of segment names writer.MergeInit(merge); Message(" consider merge " + merge.SegString(dir)); if (merge.isExternal) { Message(" merge involves segments from an external directory; now run in foreground"); } else { lock (this) { if (MergeThreadCount() < maxThreadCount) { // OK to spawn a new merge thread to handle this // merge: MergeThread merger = new MergeThread(this, writer, merge); mergeThreads.Add(merger); Message(" launch new thread [" + merger.Name + "]"); merger.SetThreadPriority(mergeThreadPriority); merger.IsBackground = true; merger.Start(); continue; } else { Message(" too many merge threads running; run merge in foreground"); } } } // Too many merge threads already running, so we do // this in the foreground of the calling thread writer.Merge(merge); } }
/// <summary>Returns true if this single nfo is optimized (has no /// pending norms or deletes, is in the same dir as the /// writer, and matches the current compound file setting /// </summary> private bool IsOptimized(IndexWriter writer, SegmentInfo info) { return !info.HasDeletions() && !info.HasSeparateNorms() && info.dir == writer.GetDirectory() && info.GetUseCompoundFile() == useCompoundFile; }