private SegmentCommitInfo Merge(Directory dir, SegmentCommitInfo si1, SegmentCommitInfo si2, string merged, bool useCompoundFile) { IOContext context = NewIOContext(Random); SegmentReader r1 = new SegmentReader(si1, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, context); SegmentReader r2 = new SegmentReader(si2, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, context); Codec codec = Codec.Default; TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(si1.Info.Dir); SegmentInfo si = new SegmentInfo(si1.Info.Dir, Constants.LUCENE_MAIN_VERSION, merged, -1, false, codec, null); SegmentMerger merger = new SegmentMerger(new List <AtomicReader> { r1, r2 }, si, InfoStream.Default, trackingDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, CheckAbort.NONE, new FieldInfos.FieldNumbers(), context, true); MergeState mergeState = merger.Merge(); r1.Dispose(); r2.Dispose(); SegmentInfo info = new SegmentInfo(si1.Info.Dir, Constants.LUCENE_MAIN_VERSION, merged, si1.Info.DocCount + si2.Info.DocCount, false, codec, null); info.SetFiles(new JCG.HashSet <string>(trackingDir.CreatedFiles)); if (useCompoundFile) { ICollection <string> filesToDelete = IndexWriter.CreateCompoundFile(InfoStream.Default, dir, CheckAbort.NONE, info, NewIOContext(Random)); info.UseCompoundFile = true; foreach (String fileToDelete in filesToDelete) { si1.Info.Dir.DeleteFile(fileToDelete); } } return(new SegmentCommitInfo(info, 0, -1L, -1L)); }
public virtual void TestMerge() { //System.out.println("----------------TestMerge------------------"); SegmentMerger merger = new SegmentMerger(mergedDir, mergedSegment, false); merger.Add(reader1); merger.Add(reader2); try { int docsMerged = merger.Merge(); merger.CloseReaders(); Assert.IsTrue(docsMerged == 2); //Should be able to open a new SegmentReader against the new directory SegmentReader mergedReader = new SegmentReader(new SegmentInfo(mergedSegment, docsMerged, mergedDir)); Assert.IsTrue(mergedReader != null); Assert.IsTrue(mergedReader.NumDocs() == 2); Document newDoc1 = mergedReader.Document(0); Assert.IsTrue(newDoc1 != null); //There are 2 unstored fields on the document Assert.IsTrue(DocHelper.NumFields(newDoc1) == DocHelper.NumFields(doc1) - 2); Document newDoc2 = mergedReader.Document(1); Assert.IsTrue(newDoc2 != null); Assert.IsTrue(DocHelper.NumFields(newDoc2) == DocHelper.NumFields(doc2) - 2); TermDocs termDocs = mergedReader.TermDocs(new Term(DocHelper.TEXT_FIELD_2_KEY, "Field")); Assert.IsTrue(termDocs != null); Assert.IsTrue(termDocs.Next() == true); System.Collections.ICollection stored = mergedReader.GetIndexedFieldNames(true); Assert.IsTrue(stored != null); //System.out.println("stored size: " + stored.size()); Assert.IsTrue(stored.Count == 2); TermFreqVector vector = mergedReader.GetTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY); Assert.IsTrue(vector != null); System.String[] terms = vector.GetTerms(); Assert.IsTrue(terms != null); //System.out.println("Terms size: " + terms.length); Assert.IsTrue(terms.Length == 3); int[] freqs = vector.GetTermFrequencies(); Assert.IsTrue(freqs != null); //System.out.println("Freqs size: " + freqs.length); for (int i = 0; i < terms.Length; i++) { System.String term = terms[i]; int freq = freqs[i]; //System.out.println("Term: " + term + " Freq: " + freq); Assert.IsTrue(DocHelper.FIELD_2_TEXT.IndexOf(term) != -1); Assert.IsTrue(DocHelper.FIELD_2_FREQS[i] == freq); } } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); Assert.IsTrue(false); } //System.out.println("---------------------end TestMerge-------------------"); }
public virtual void TestMerge() { SegmentMerger merger = new SegmentMerger(mergedDir, mergedSegment); merger.Add(reader1); merger.Add(reader2); int docsMerged = merger.Merge(); merger.CloseReaders(); Assert.IsTrue(docsMerged == 2); //Should be able to open a new SegmentReader against the new directory SegmentReader mergedReader = SegmentReader.Get(new SegmentInfo(mergedSegment, docsMerged, mergedDir, false, true)); Assert.IsTrue(mergedReader != null); Assert.IsTrue(mergedReader.NumDocs() == 2); Document newDoc1 = mergedReader.Document(0); Assert.IsTrue(newDoc1 != null); //There are 2 unstored fields on the document Assert.IsTrue(DocHelper.NumFields(newDoc1) == DocHelper.NumFields(doc1) - DocHelper.unstored.Count); Document newDoc2 = mergedReader.Document(1); Assert.IsTrue(newDoc2 != null); Assert.IsTrue(DocHelper.NumFields(newDoc2) == DocHelper.NumFields(doc2) - DocHelper.unstored.Count); TermDocs termDocs = mergedReader.TermDocs(new Term(DocHelper.TEXT_FIELD_2_KEY, "field")); Assert.IsTrue(termDocs != null); Assert.IsTrue(termDocs.Next() == true); System.Collections.Generic.ICollection <string> stored = mergedReader.GetFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR); Assert.IsTrue(stored != null); //System.out.println("stored size: " + stored.size()); Assert.IsTrue(stored.Count == 4, "We do not have 4 fields that were indexed with term vector"); TermFreqVector vector = mergedReader.GetTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY); Assert.IsTrue(vector != null); System.String[] terms = vector.GetTerms(); Assert.IsTrue(terms != null); //System.out.println("Terms size: " + terms.length); Assert.IsTrue(terms.Length == 3); int[] freqs = vector.GetTermFrequencies(); Assert.IsTrue(freqs != null); //System.out.println("Freqs size: " + freqs.length); Assert.IsTrue(vector is TermPositionVector == true); for (int i = 0; i < terms.Length; i++) { System.String term = terms[i]; int freq = freqs[i]; //System.out.println("Term: " + term + " Freq: " + freq); Assert.IsTrue(DocHelper.FIELD_2_TEXT.IndexOf(term) != -1); Assert.IsTrue(DocHelper.FIELD_2_FREQS[i] == freq); } TestSegmentReader.CheckNorms(mergedReader); }
public virtual void TestMerge() { //System.out.println("----------------TestMerge------------------"); SegmentMerger merger = new SegmentMerger(mergedDir, mergedSegment, false); merger.Add(reader1); merger.Add(reader2); try { int docsMerged = merger.Merge(); merger.CloseReaders(); Assert.IsTrue(docsMerged == 2); //Should be able to open a new SegmentReader against the new directory SegmentReader mergedReader = new SegmentReader(new SegmentInfo(mergedSegment, docsMerged, mergedDir)); Assert.IsTrue(mergedReader != null); Assert.IsTrue(mergedReader.NumDocs() == 2); Document newDoc1 = mergedReader.Document(0); Assert.IsTrue(newDoc1 != null); //There are 2 unstored fields on the document Assert.IsTrue(DocHelper.NumFields(newDoc1) == DocHelper.NumFields(doc1) - 2); Document newDoc2 = mergedReader.Document(1); Assert.IsTrue(newDoc2 != null); Assert.IsTrue(DocHelper.NumFields(newDoc2) == DocHelper.NumFields(doc2) - 2); TermDocs termDocs = mergedReader.TermDocs(new Term(DocHelper.TEXT_FIELD_2_KEY, "Field")); Assert.IsTrue(termDocs != null); Assert.IsTrue(termDocs.Next() == true); System.Collections.ICollection stored = mergedReader.GetIndexedFieldNames(true); Assert.IsTrue(stored != null); //System.out.println("stored size: " + stored.size()); Assert.IsTrue(stored.Count == 2); TermFreqVector vector = mergedReader.GetTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY); Assert.IsTrue(vector != null); System.String[] terms = vector.GetTerms(); Assert.IsTrue(terms != null); //System.out.println("Terms size: " + terms.length); Assert.IsTrue(terms.Length == 3); int[] freqs = vector.GetTermFrequencies(); Assert.IsTrue(freqs != null); //System.out.println("Freqs size: " + freqs.length); for (int i = 0; i < terms.Length; i++) { System.String term = terms[i]; int freq = freqs[i]; //System.out.println("Term: " + term + " Freq: " + freq); Assert.IsTrue(DocHelper.FIELD_2_TEXT.IndexOf(term) != - 1); Assert.IsTrue(DocHelper.FIELD_2_FREQS[i] == freq); } } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); Assert.IsTrue(false); } //System.out.println("---------------------end TestMerge-------------------"); }
/// <summary>Merges the named range of segments, replacing them in the stack with a /// single segment. /// </summary> private void MergeSegments(int minSegment, int end) { System.String mergedName = NewSegmentName(); if (infoStream != null) { infoStream.Write("merging segments"); } SegmentMerger merger = new SegmentMerger(this, mergedName); System.Collections.ArrayList segmentsToDelete = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10)); for (int i = minSegment; i < end; i++) { SegmentInfo si = segmentInfos.Info(i); if (infoStream != null) { infoStream.Write(" " + si.name + " (" + si.docCount + " docs)"); } IndexReader reader = SegmentReader.Get(si); merger.Add(reader); if ((reader.Directory() == this.directory) || (reader.Directory() == this.ramDirectory)) { segmentsToDelete.Add(reader); // queue segment for deletion } } int mergedDocCount = merger.Merge(); if (infoStream != null) { infoStream.WriteLine(" into " + mergedName + " (" + mergedDocCount + " docs)"); } for (int i = end - 1; i >= minSegment; i--) { // remove old infos & add new segmentInfos.RemoveAt(i); } segmentInfos.Add(new SegmentInfo(mergedName, mergedDocCount, directory)); // close readers before we attempt to delete now-obsolete segments merger.CloseReaders(); lock (directory) { // in- & inter-process sync new AnonymousClassWith3(segmentsToDelete, this, directory.MakeLock(COMMIT_LOCK_NAME), COMMIT_LOCK_TIMEOUT).Run(); } if (useCompoundFile) { System.Collections.ArrayList filesToDelete = merger.CreateCompoundFile(mergedName + ".tmp"); lock (directory) { // in- & inter-process sync new AnonymousClassWith4(mergedName, filesToDelete, this, directory.MakeLock(COMMIT_LOCK_NAME), COMMIT_LOCK_TIMEOUT).Run(); } } }
/// <summary>Merges the provided indexes into this index. /// <p>After this completes, the index is optimized. </p> /// <p>The provided IndexReaders are not closed.</p> /// </summary> public virtual void AddIndexes(IndexReader[] readers) { lock (this) { Optimize(); // start with zero or 1 seg System.String mergedName = NewSegmentName(); SegmentMerger merger = new SegmentMerger(this, mergedName); System.Collections.ArrayList segmentsToDelete = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10)); IndexReader sReader = null; if (segmentInfos.Count == 1) { // add existing index, if any sReader = SegmentReader.Get(segmentInfos.Info(0)); merger.Add(sReader); segmentsToDelete.Add(sReader); // queue segment for deletion } for (int i = 0; i < readers.Length; i++) { // add new indexes merger.Add(readers[i]); } int docCount = merger.Merge(); // merge 'em segmentInfos.RemoveRange(0, segmentInfos.Count); // pop old infos & add new segmentInfos.Add(new SegmentInfo(mergedName, docCount, directory)); if (sReader != null) { sReader.Close(); } lock (directory) { // in- & inter-process sync new AnonymousClassWith1(this, directory.MakeLock(COMMIT_LOCK_NAME), commitLockTimeout).Run(); } DeleteSegments(segmentsToDelete); // delete now-unused segments if (useCompoundFile) { System.Collections.ArrayList filesToDelete = merger.CreateCompoundFile(mergedName + ".tmp"); lock (directory) { // in- & inter-process sync new AnonymousClassWith2(mergedName, this, directory.MakeLock(COMMIT_LOCK_NAME), commitLockTimeout).Run(); } // delete now unused files of segment DeleteFiles(filesToDelete); } } }
internal static void Merge(System.String seg1, System.String seg2, System.String merged) { Directory directory = FSDirectory.GetDirectory("test", false); SegmentReader r1 = new SegmentReader(new SegmentInfo(seg1, 1, directory)); SegmentReader r2 = new SegmentReader(new SegmentInfo(seg2, 1, directory)); SegmentMerger merger = new SegmentMerger(directory, merged, false); merger.Add(r1); merger.Add(r2); merger.Merge(); merger.CloseReaders(); directory.Close(); }
private void Merge(System.String seg1, System.String seg2, System.String merged, bool useCompoundFile) { Directory directory = FSDirectory.GetDirectory(indexDir, false); SegmentReader r1 = new SegmentReader(new SegmentInfo(seg1, 1, directory)); SegmentReader r2 = new SegmentReader(new SegmentInfo(seg2, 1, directory)); SegmentMerger merger = new SegmentMerger(directory, merged, useCompoundFile); merger.Add(r1); merger.Add(r2); merger.Merge(); merger.CloseReaders(); directory.Close(); }
private SegmentInfo Merge(SegmentInfo si1, SegmentInfo si2, System.String merged, bool useCompoundFile) { SegmentReader r1 = SegmentReader.Get(true, si1, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR, null); SegmentReader r2 = SegmentReader.Get(true, si2, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR, null); SegmentMerger merger = new SegmentMerger(si1.dir, merged); merger.Add(r1); merger.Add(r2); merger.Merge(null); merger.CloseReaders(); if (useCompoundFile) { System.Collections.Generic.ICollection <string> filesToDelete = merger.CreateCompoundFile(merged + ".cfs"); for (System.Collections.IEnumerator iter = filesToDelete.GetEnumerator(); iter.MoveNext();) { si1.dir.DeleteFile((System.String)iter.Current, null); } } return(new SegmentInfo(merged, si1.docCount + si2.docCount, si1.dir, useCompoundFile, true)); }
private SegmentInfo Merge(SegmentInfo si1, SegmentInfo si2, System.String merged, bool useCompoundFile) { SegmentReader r1 = SegmentReader.Get(si1); SegmentReader r2 = SegmentReader.Get(si2); SegmentMerger merger = new SegmentMerger(si1.dir, merged); merger.Add(r1); merger.Add(r2); merger.Merge(); merger.CloseReaders(); if (useCompoundFile) { System.Collections.IList filesToDelete = merger.CreateCompoundFile(merged + ".cfs"); for (System.Collections.IEnumerator iter = filesToDelete.GetEnumerator(); iter.MoveNext();) { si1.dir.DeleteFile((System.String)iter.Current); } } return(new SegmentInfo(merged, si1.docCount + si2.docCount, si1.dir, useCompoundFile, true)); }
private SegmentInfo Merge(SegmentInfo si1, SegmentInfo si2, System.String merged, bool useCompoundFile) { SegmentReader r1 = SegmentReader.Get(si1); SegmentReader r2 = SegmentReader.Get(si2); SegmentMerger merger = new SegmentMerger(si1.dir, merged); merger.Add(r1); merger.Add(r2); merger.Merge(); merger.CloseReaders(); if (useCompoundFile) { System.Collections.Generic.ICollection<string> filesToDelete = merger.CreateCompoundFile(merged + ".cfs"); for (System.Collections.IEnumerator iter = filesToDelete.GetEnumerator(); iter.MoveNext(); ) { si1.dir.DeleteFile((System.String) iter.Current); } } return new SegmentInfo(merged, si1.docCount + si2.docCount, si1.dir, useCompoundFile, true); }
/// <summary>Does the actual (time-consuming) work of the merge, /// but without holding synchronized lock on IndexWriter /// instance /// </summary> private int MergeMiddle(MergePolicy.OneMerge merge) { merge.CheckAborted(directory); System.String mergedName = merge.info.name; SegmentMerger merger = null; int mergedDocCount = 0; SegmentInfos sourceSegments = merge.segments; int numSegments = sourceSegments.Count; if (infoStream != null) Message("merging " + merge.SegString(directory)); merger = new SegmentMerger(this, mergedName, merge); merge.readers = new SegmentReader[numSegments]; merge.readersClone = new SegmentReader[numSegments]; bool mergeDocStores = false; System.Collections.Hashtable dss = new System.Collections.Hashtable(); // This is try/finally to make sure merger's readers are // closed: bool success = false; try { int totDocCount = 0; for (int i = 0; i < numSegments; i++) { SegmentInfo info = sourceSegments.Info(i); // Hold onto the "live" reader; we will use this to // commit merged deletes SegmentReader reader = merge.readers[i] = readerPool.Get(info, merge.mergeDocStores, MERGE_READ_BUFFER_SIZE, - 1); // We clone the segment readers because other // deletes may come in while we're merging so we // need readers that will not change SegmentReader clone = merge.readersClone[i] = (SegmentReader) reader.Clone(true); merger.Add(clone); if (clone.HasDeletions()) { mergeDocStores = true; } if (info.GetDocStoreOffset() != - 1) { dss[info.GetDocStoreSegment()] = info.GetDocStoreSegment(); } totDocCount += clone.NumDocs(); } if (infoStream != null) { Message("merge: total " + totDocCount + " docs"); } merge.CheckAborted(directory); // If deletions have arrived and it has now become // necessary to merge doc stores, go and open them: if (mergeDocStores && !merge.mergeDocStores) { merge.mergeDocStores = true; lock (this) { String key = docWriter.GetDocStoreSegment(); if (key!=null && dss.Contains(key)) { if (infoStream != null) Message("now flush at mergeMiddle"); DoFlush(true, false); } } for (int i = 0; i < numSegments; i++) { merge.readersClone[i].OpenDocStores(); } // Clear DSS lock (this) { merge.info.SetDocStore(- 1, null, false); } } // This is where all the work happens: mergedDocCount = merge.info.docCount = merger.Merge(merge.mergeDocStores); System.Diagnostics.Debug.Assert(mergedDocCount == totDocCount); // TODO: in the non-realtime case, we may want to only // keep deletes (it's costly to open entire reader // when we just need deletes) SegmentReader mergedReader = readerPool.Get(merge.info, false, BufferedIndexInput.BUFFER_SIZE, - 1); try { if (poolReaders && mergedSegmentWarmer != null) { mergedSegmentWarmer.Warm(mergedReader); } if (!CommitMerge(merge, merger, mergedDocCount, mergedReader)) // commitMerge will return false if this merge was aborted return 0; } finally { lock (this) { readerPool.Release(mergedReader); } } success = true; } finally { lock (this) { if (!success) { // Suppress any new exceptions so we throw the // original cause for (int i = 0; i < numSegments; i++) { if (merge.readers[i] != null) { try { readerPool.Release(merge.readers[i], true); } catch (System.Exception t) { } } if (merge.readersClone[i] != null) { try { merge.readersClone[i].Close(); } catch (System.Exception t) { } // This was a private clone and we had the only reference System.Diagnostics.Debug.Assert(merge.readersClone[i].GetRefCount() == 0); } } } else { for (int i = 0; i < numSegments; i++) { if (merge.readers[i] != null) { readerPool.Release(merge.readers[i], true); } if (merge.readersClone[i] != null) { merge.readersClone[i].Close(); // This was a private clone and we had the only reference System.Diagnostics.Debug.Assert(merge.readersClone[i].GetRefCount() == 0); } } } } } // Must checkpoint before decrefing so any newly // referenced files in the new merge.info are incref'd // first: lock (this) { deleter.Checkpoint(segmentInfos, false); } DecrefMergeSegments(merge); if (merge.useCompoundFile) { // Maybe force a sync here to allow reclaiming of the // disk space used by the segments we just merged: if (autoCommit && DoCommitBeforeMergeCFS(merge)) { long size; lock (this) { size = merge.info.SizeInBytes(); } Commit(size); } success = false; System.String compoundFileName = mergedName + "." + IndexFileNames.COMPOUND_FILE_EXTENSION; try { merger.CreateCompoundFile(compoundFileName); success = true; } catch (System.IO.IOException ioe) { lock (this) { if (merge.IsAborted()) { // This can happen if rollback or close(false) // is called -- fall through to logic below to // remove the partially created CFS: success = true; } else HandleMergeException(ioe, merge); } } catch (System.Exception t) { HandleMergeException(t, merge); } finally { if (!success) { if (infoStream != null) Message("hit exception creating compound file during merge"); lock (this) { deleter.DeleteFile(compoundFileName); } } } if (merge.IsAborted()) { if (infoStream != null) Message("abort merge after building CFS"); deleter.DeleteFile(compoundFileName); return 0; } lock (this) { if (segmentInfos.IndexOf(merge.info) == - 1 || merge.IsAborted()) { // Our segment (committed in non-compound // format) got merged away while we were // building the compound format. deleter.DeleteFile(compoundFileName); } else { merge.info.SetUseCompoundFile(true); Checkpoint(); } } } // Force a sync after commiting the merge. Once this // sync completes then all index files referenced by the // current segmentInfos are on stable storage so if the // OS/machine crashes, or power cord is yanked, the // index will be intact. Note that this is just one // (somewhat arbitrary) policy; we could try other // policies like only sync if it's been > X minutes or // more than Y bytes have been written, etc. if (autoCommit) { long size; lock (this) { size = merge.info.SizeInBytes(); } Commit(size); } return mergedDocCount; }
/// <summary> /// Does the actual (time-consuming) work of the merge, /// but without holding synchronized lock on IndexWriter /// instance /// </summary> private int MergeMiddle(MergePolicy.OneMerge merge) { merge.CheckAborted(directory); string mergedName = merge.Info_Renamed.Info.Name; IList<SegmentCommitInfo> sourceSegments = merge.Segments; IOContext context = new IOContext(merge.MergeInfo); MergeState.CheckAbort checkAbort = new MergeState.CheckAbort(merge, directory); TrackingDirectoryWrapper dirWrapper = new TrackingDirectoryWrapper(directory); if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "merging " + SegString(merge.Segments)); } merge.Readers = new List<SegmentReader>(); // this is try/finally to make sure merger's readers are // closed: bool success = false; try { int segUpto = 0; while (segUpto < sourceSegments.Count) { SegmentCommitInfo info = sourceSegments[segUpto]; // Hold onto the "live" reader; we will use this to // commit merged deletes ReadersAndUpdates rld = readerPool.Get(info, true); // Carefully pull the most recent live docs and reader SegmentReader reader; Bits liveDocs; int delCount; lock (this) { // Must sync to ensure BufferedDeletesStream cannot change liveDocs, // pendingDeleteCount and field updates while we pull a copy: reader = rld.GetReaderForMerge(context); liveDocs = rld.ReadOnlyLiveDocs; delCount = rld.PendingDeleteCount + info.DelCount; Debug.Assert(reader != null); Debug.Assert(rld.VerifyDocCounts()); if (infoStream.IsEnabled("IW")) { if (rld.PendingDeleteCount != 0) { infoStream.Message("IW", "seg=" + SegString(info) + " delCount=" + info.DelCount + " pendingDelCount=" + rld.PendingDeleteCount); } else if (info.DelCount != 0) { infoStream.Message("IW", "seg=" + SegString(info) + " delCount=" + info.DelCount); } else { infoStream.Message("IW", "seg=" + SegString(info) + " no deletes"); } } } // Deletes might have happened after we pulled the merge reader and // before we got a read-only copy of the segment's actual live docs // (taking pending deletes into account). In that case we need to // make a new reader with updated live docs and del count. if (reader.NumDeletedDocs() != delCount) { // fix the reader's live docs and del count Debug.Assert(delCount > reader.NumDeletedDocs()); // beware of zombies SegmentReader newReader = new SegmentReader(info, reader, liveDocs, info.Info.DocCount - delCount); bool released = false; try { rld.Release(reader); released = true; } finally { if (!released) { newReader.DecRef(); } } reader = newReader; } merge.Readers.Add(reader); Debug.Assert(delCount <= info.Info.DocCount, "delCount=" + delCount + " info.docCount=" + info.Info.DocCount + " rld.pendingDeleteCount=" + rld.PendingDeleteCount + " info.getDelCount()=" + info.DelCount); segUpto++; } // System.out.println("[" + Thread.currentThread().getName() + "] IW.mergeMiddle: merging " + merge.getMergeReaders()); // we pass merge.getMergeReaders() instead of merge.readers to allow the // OneMerge to return a view over the actual segments to merge SegmentMerger merger = new SegmentMerger(merge.MergeReaders, merge.Info_Renamed.Info, infoStream, dirWrapper, Config_Renamed.TermIndexInterval, checkAbort, GlobalFieldNumberMap, context, Config_Renamed.CheckIntegrityAtMerge); merge.CheckAborted(directory); // this is where all the work happens: MergeState mergeState; bool success3 = false; try { if (!merger.ShouldMerge()) { // would result in a 0 document segment: nothing to merge! mergeState = new MergeState(new List<AtomicReader>(), merge.Info_Renamed.Info, infoStream, checkAbort); } else { mergeState = merger.Merge(); } success3 = true; } finally { if (!success3) { lock (this) { Deleter.Refresh(merge.Info_Renamed.Info.Name); } } } Debug.Assert(mergeState.SegmentInfo == merge.Info_Renamed.Info); merge.Info_Renamed.Info.Files = new HashSet<string>(dirWrapper.CreatedFiles); // Record which codec was used to write the segment if (infoStream.IsEnabled("IW")) { if (merge.Info_Renamed.Info.DocCount == 0) { infoStream.Message("IW", "merge away fully deleted segments"); } else { infoStream.Message("IW", "merge codec=" + Codec + " docCount=" + merge.Info_Renamed.Info.DocCount + "; merged segment has " + (mergeState.FieldInfos.HasVectors() ? "vectors" : "no vectors") + "; " + (mergeState.FieldInfos.HasNorms() ? "norms" : "no norms") + "; " + (mergeState.FieldInfos.HasDocValues() ? "docValues" : "no docValues") + "; " + (mergeState.FieldInfos.HasProx() ? "prox" : "no prox") + "; " + (mergeState.FieldInfos.HasProx() ? "freqs" : "no freqs")); } } // Very important to do this before opening the reader // because codec must know if prox was written for // this segment: //System.out.println("merger set hasProx=" + merger.hasProx() + " seg=" + merge.info.name); bool useCompoundFile; lock (this) // Guard segmentInfos { useCompoundFile = mergePolicy.UseCompoundFile(segmentInfos, merge.Info_Renamed); } if (useCompoundFile) { success = false; ICollection<string> filesToRemove = merge.Info_Renamed.Files(); try { filesToRemove = CreateCompoundFile(infoStream, directory, checkAbort, merge.Info_Renamed.Info, context); success = true; } catch (System.IO.IOException ioe) { lock (this) { if (merge.Aborted) { // this can happen if rollback or close(false) // is called -- fall through to logic below to // remove the partially created CFS: } else { HandleMergeException(ioe, merge); } } } catch (Exception t) { HandleMergeException(t, merge); } finally { if (!success) { if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "hit exception creating compound file during merge"); } lock (this) { Deleter.DeleteFile(Lucene.Net.Index.IndexFileNames.SegmentFileName(mergedName, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_EXTENSION)); Deleter.DeleteFile(Lucene.Net.Index.IndexFileNames.SegmentFileName(mergedName, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)); Deleter.DeleteNewFiles(merge.Info_Renamed.Files()); } } } // So that, if we hit exc in deleteNewFiles (next) // or in commitMerge (later), we close the // per-segment readers in the finally clause below: success = false; lock (this) { // delete new non cfs files directly: they were never // registered with IFD Deleter.DeleteNewFiles(filesToRemove); if (merge.Aborted) { if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "abort merge after building CFS"); } Deleter.DeleteFile(Lucene.Net.Index.IndexFileNames.SegmentFileName(mergedName, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_EXTENSION)); Deleter.DeleteFile(Lucene.Net.Index.IndexFileNames.SegmentFileName(mergedName, "", Lucene.Net.Index.IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)); return 0; } } merge.Info_Renamed.Info.UseCompoundFile = true; } else { // So that, if we hit exc in commitMerge (later), // we close the per-segment readers in the finally // clause below: success = false; } // Have codec write SegmentInfo. Must do this after // creating CFS so that 1) .si isn't slurped into CFS, // and 2) .si reflects useCompoundFile=true change // above: bool success2 = false; try { Codec.SegmentInfoFormat().SegmentInfoWriter.Write(directory, merge.Info_Renamed.Info, mergeState.FieldInfos, context); success2 = true; } finally { if (!success2) { lock (this) { Deleter.DeleteNewFiles(merge.Info_Renamed.Files()); } } } // TODO: ideally we would freeze merge.info here!! // because any changes after writing the .si will be // lost... if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", string.Format(CultureInfo.InvariantCulture, "merged segment size=%.3f MB vs estimate=%.3f MB", merge.Info_Renamed.SizeInBytes() / 1024.0 / 1024.0, merge.EstimatedMergeBytes / 1024 / 1024.0)); } IndexReaderWarmer mergedSegmentWarmer = Config_Renamed.MergedSegmentWarmer; if (PoolReaders && mergedSegmentWarmer != null && merge.Info_Renamed.Info.DocCount != 0) { ReadersAndUpdates rld = readerPool.Get(merge.Info_Renamed, true); SegmentReader sr = rld.GetReader(IOContext.READ); try { mergedSegmentWarmer.Warm(sr); } finally { lock (this) { rld.Release(sr); readerPool.Release(rld); } } } // Force READ context because we merge deletes onto // this reader: if (!CommitMerge(merge, mergeState)) { // commitMerge will return false if this merge was // aborted return 0; } success = true; } finally { // Readers are already closed in commitMerge if we didn't hit // an exc: if (!success) { CloseMergeReaders(merge, true); } } return merge.Info_Renamed.Info.DocCount; }
/// <summary> /// Merges the provided indexes into this index. /// /// <p> /// The provided IndexReaders are not closed. /// /// <p> /// See <seealso cref="#addIndexes"/> for details on transactional semantics, temporary /// free space required in the Directory, and non-CFS segments on an Exception. /// /// <p> /// <b>NOTE</b>: if this method hits an OutOfMemoryError you should immediately /// close the writer. See <a href="#OOME">above</a> for details. /// /// <p> /// <b>NOTE:</b> empty segments are dropped by this method and not added to this /// index. /// /// <p> /// <b>NOTE:</b> this method merges all given <seealso cref="IndexReader"/>s in one /// merge. If you intend to merge a large number of readers, it may be better /// to call this method multiple times, each time with a small set of readers. /// In principle, if you use a merge policy with a {@code mergeFactor} or /// {@code maxMergeAtOnce} parameter, you should pass that many readers in one /// call. Also, if the given readers are <seealso cref="DirectoryReader"/>s, they can be /// opened with {@code termIndexInterval=-1} to save RAM, since during merge /// the in-memory structure is not used. See /// <seealso cref="DirectoryReader#open(Directory, int)"/>. /// /// <p> /// <b>NOTE</b>: if you call <seealso cref="#close(boolean)"/> with <tt>false</tt>, which /// aborts all running merges, then any thread still running this method might /// hit a <seealso cref="MergePolicy.MergeAbortedException"/>. /// </summary> /// <exception cref="CorruptIndexException"> /// if the index is corrupt </exception> /// <exception cref="IOException"> /// if there is a low-level IO error </exception> public virtual void AddIndexes(params IndexReader[] readers) { EnsureOpen(); int numDocs = 0; try { if (infoStream.IsEnabled("IW")) { infoStream.Message("IW", "flush at addIndexes(IndexReader...)"); } Flush(false, true); string mergedName = NewSegmentName(); IList<AtomicReader> mergeReaders = new List<AtomicReader>(); foreach (IndexReader indexReader in readers) { numDocs += indexReader.NumDocs(); foreach (AtomicReaderContext ctx in indexReader.Leaves()) { mergeReaders.Add(ctx.AtomicReader); } } IOContext context = new IOContext(new MergeInfo(numDocs, -1, true, -1)); // TODO: somehow we should fix this merge so it's // abortable so that IW.close(false) is able to stop it TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory); SegmentInfo info = new SegmentInfo(directory, Constants.LUCENE_MAIN_VERSION, mergedName, -1, false, Codec, null); SegmentMerger merger = new SegmentMerger(mergeReaders, info, infoStream, trackingDir, Config_Renamed.TermIndexInterval, MergeState.CheckAbort.NONE, GlobalFieldNumberMap, context, Config_Renamed.CheckIntegrityAtMerge); if (!merger.ShouldMerge()) { return; } MergeState mergeState; bool success = false; try { mergeState = merger.Merge(); // merge 'em success = true; } finally { if (!success) { lock (this) { Deleter.Refresh(info.Name); } } } SegmentCommitInfo infoPerCommit = new SegmentCommitInfo(info, 0, -1L, -1L); info.Files = new HashSet<string>(trackingDir.CreatedFiles); trackingDir.CreatedFiles.Clear(); SetDiagnostics(info, SOURCE_ADDINDEXES_READERS); bool useCompoundFile; lock (this) // Guard segmentInfos { if (StopMerges) { Deleter.DeleteNewFiles(infoPerCommit.Files()); return; } EnsureOpen(); useCompoundFile = mergePolicy.UseCompoundFile(segmentInfos, infoPerCommit); } // Now create the compound file if needed if (useCompoundFile) { ICollection<string> filesToDelete = infoPerCommit.Files(); try { CreateCompoundFile(infoStream, directory, MergeState.CheckAbort.NONE, info, context); } finally { // delete new non cfs files directly: they were never // registered with IFD lock (this) { Deleter.DeleteNewFiles(filesToDelete); } } info.UseCompoundFile = true; } // Have codec write SegmentInfo. Must do this after // creating CFS so that 1) .si isn't slurped into CFS, // and 2) .si reflects useCompoundFile=true change // above: success = false; try { Codec.SegmentInfoFormat().SegmentInfoWriter.Write(trackingDir, info, mergeState.FieldInfos, context); success = true; } finally { if (!success) { lock (this) { Deleter.Refresh(info.Name); } } } info.AddFiles(trackingDir.CreatedFiles); // Register the new segment lock (this) { if (StopMerges) { Deleter.DeleteNewFiles(info.Files); return; } EnsureOpen(); segmentInfos.Add(infoPerCommit); Checkpoint(); } } catch (System.OutOfMemoryException oom) { HandleOOM(oom, "addIndexes(IndexReader...)"); } }
/// <summary>Does the actual (time-consuming) work of the merge, /// but without holding synchronized lock on IndexWriter /// instance /// </summary> private int MergeMiddle(MergePolicy.OneMerge merge) { merge.CheckAborted(directory); System.String mergedName = merge.info.name; SegmentMerger merger = null; int mergedDocCount = 0; SegmentInfos sourceSegments = merge.segments; int numSegments = sourceSegments.Count; if (infoStream != null) Message("merging " + merge.SegString(directory)); merger = new SegmentMerger(this, mergedName, merge); merge.readers = new SegmentReader[numSegments]; merge.readersClone = new SegmentReader[numSegments]; bool mergeDocStores = false; System.Collections.Hashtable dss = new System.Collections.Hashtable(); String currentDocStoreSegment; lock(this) { currentDocStoreSegment = docWriter.GetDocStoreSegment(); } bool currentDSSMerged = false; // This is try/finally to make sure merger's readers are // closed: bool success = false; try { int totDocCount = 0; for (int i = 0; i < numSegments; i++) { SegmentInfo info = sourceSegments.Info(i); // Hold onto the "live" reader; we will use this to // commit merged deletes SegmentReader reader = merge.readers[i] = readerPool.Get(info, merge.mergeDocStores, MERGE_READ_BUFFER_SIZE, -1); // We clone the segment readers because other // deletes may come in while we're merging so we // need readers that will not change SegmentReader clone = merge.readersClone[i] = (SegmentReader)reader.Clone(true); merger.Add(clone); if (clone.HasDeletions()) { mergeDocStores = true; } if (info.GetDocStoreOffset() != -1 && currentDocStoreSegment != null) { currentDSSMerged |= currentDocStoreSegment.Equals(info.GetDocStoreSegment()); } totDocCount += clone.NumDocs(); } if (infoStream != null) { Message("merge: total " + totDocCount + " docs"); } merge.CheckAborted(directory); // If deletions have arrived and it has now become // necessary to merge doc stores, go and open them: if (mergeDocStores && !merge.mergeDocStores) { merge.mergeDocStores = true; lock (this) { if (currentDSSMerged) { if (infoStream != null) { Message("now flush at mergeMiddle"); } DoFlush(true, false); } } for (int i = 0; i < numSegments; i++) { merge.readersClone[i].OpenDocStores(); } // Clear DSS merge.info.SetDocStore(-1, null, false); } // This is where all the work happens: mergedDocCount = merge.info.docCount = merger.Merge(merge.mergeDocStores); System.Diagnostics.Debug.Assert(mergedDocCount == totDocCount); if (merge.useCompoundFile) { success = false; string compoundFileName = IndexFileNames.SegmentFileName(mergedName, IndexFileNames.COMPOUND_FILE_EXTENSION); try { if (infoStream != null) { Message("create compound file " + compoundFileName); } merger.CreateCompoundFile(compoundFileName); success = true; } catch (System.IO.IOException ioe) { lock (this) { if (merge.IsAborted()) { // This can happen if rollback or close(false) // is called -- fall through to logic below to // remove the partially created CFS: } else { HandleMergeException(ioe, merge); } } } catch (Exception t) { HandleMergeException(t, merge); } finally { if (!success) { if (infoStream != null) { Message("hit exception creating compound file during merge"); } lock (this) { deleter.DeleteFile(compoundFileName); deleter.DeleteNewFiles(merger.GetMergedFiles()); } } } success = false; lock (this) { // delete new non cfs files directly: they were never // registered with IFD deleter.DeleteNewFiles(merger.GetMergedFiles()); if (merge.IsAborted()) { if (infoStream != null) { Message("abort merge after building CFS"); } deleter.DeleteFile(compoundFileName); return 0; } } merge.info.SetUseCompoundFile(true); } int termsIndexDivisor; bool loadDocStores; // if the merged segment warmer was not installed when // this merge was started, causing us to not force // the docStores to close, we can't warm it now bool canWarm = merge.info.GetDocStoreSegment() == null || currentDocStoreSegment == null || !merge.info.GetDocStoreSegment().Equals(currentDocStoreSegment); if (poolReaders && mergedSegmentWarmer != null && canWarm) { // Load terms index & doc stores so the segment // warmer can run searches, load documents/term // vectors termsIndexDivisor = readerTermsIndexDivisor; loadDocStores = true; } else { termsIndexDivisor = -1; loadDocStores = false; } // TODO: in the non-realtime case, we may want to only // keep deletes (it's costly to open entire reader // when we just need deletes) SegmentReader mergedReader = readerPool.Get(merge.info, loadDocStores, BufferedIndexInput.BUFFER_SIZE, termsIndexDivisor); try { if (poolReaders && mergedSegmentWarmer != null) { mergedSegmentWarmer.Warm(mergedReader); } if (!CommitMerge(merge, merger, mergedDocCount, mergedReader)) { // commitMerge will return false if this merge was aborted return 0; } } finally { lock (this) { readerPool.Release(mergedReader); } } success = true; } finally { // Readers are already closed in commitMerge if we didn't hit // an exc: if (!success) { CloseMergeReaders(merge, true); } } merge.mergeDone = true; lock (mergeScheduler) { System.Threading.Monitor.PulseAll(mergeScheduler); } // Force a sync after commiting the merge. Once this // sync completes then all index files referenced by the // current segmentInfos are on stable storage so if the // OS/machine crashes, or power cord is yanked, the // index will be intact. Note that this is just one // (somewhat arbitrary) policy; we could try other // policies like only sync if it's been > X minutes or // more than Y bytes have been written, etc. if (autoCommit) { long size; lock (this) { size = merge.info.SizeInBytes(); } Commit(size); } return mergedDocCount; }
/// <summary>Merges the provided indexes into this index. /// <p/>After this completes, the index is optimized. <p/> /// <p/>The provided IndexReaders are not closed.<p/> /// /// <p/><b>NOTE:</b> while this is running, any attempts to /// add or delete documents (with another thread) will be /// paused until this method completes. /// /// <p/>See {@link #AddIndexesNoOptimize(Directory[])} for /// details on transactional semantics, temporary free /// space required in the Directory, and non-CFS segments /// on an Exception.<p/> /// /// <p/><b>NOTE</b>: if this method hits an OutOfMemoryError /// you should immediately close the writer. See <a /// href="#OOME">above</a> for details.<p/> /// /// </summary> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> public virtual void AddIndexes(IndexReader[] readers) { EnsureOpen(); // Do not allow add docs or deletes while we are running: docWriter.PauseAllThreads(); // We must pre-acquire a read lock here (and upgrade to // write lock in startTransaction below) so that no // other addIndexes is allowed to start up after we have // flushed & optimized but before we then start our // transaction. This is because the merging below // requires that only one segment is present in the // index: AcquireRead(); try { SegmentInfo info = null; System.String mergedName = null; SegmentMerger merger = null; bool success = false; try { Flush(true, false, true); Optimize(); // start with zero or 1 seg success = true; } finally { // Take care to release the read lock if we hit an // exception before starting the transaction if (!success) ReleaseRead(); } // true means we already have a read lock; if this // call hits an exception it will release the write // lock: StartTransaction(true); try { mergedName = NewSegmentName(); merger = new SegmentMerger(this, mergedName, null); SegmentReader sReader = null; lock (this) { if (segmentInfos.Count == 1) { // add existing index, if any sReader = readerPool.Get(segmentInfos.Info(0), true, BufferedIndexInput.BUFFER_SIZE, - 1); } } success = false; try { if (sReader != null) merger.Add(sReader); for (int i = 0; i < readers.Length; i++) // add new indexes merger.Add(readers[i]); int docCount = merger.Merge(); // merge 'em lock (this) { segmentInfos.Clear(); // pop old infos & add new info = new SegmentInfo(mergedName, docCount, directory, false, true, - 1, null, false, merger.HasProx()); SetDiagnostics(info, "addIndexes(IndexReader[])"); segmentInfos.Add(info); } // Notify DocumentsWriter that the flushed count just increased docWriter.UpdateFlushedDocCount(docCount); success = true; } finally { if (sReader != null) { readerPool.Release(sReader); } } } finally { if (!success) { if (infoStream != null) Message("hit exception in addIndexes during merge"); RollbackTransaction(); } else { CommitTransaction(); } } if (mergePolicy is LogMergePolicy && GetUseCompoundFile()) { System.Collections.Generic.IList<string> files = null; lock (this) { // Must incRef our files so that if another thread // is running merge/optimize, it doesn't delete our // segment's files before we have a change to // finish making the compound file. if (segmentInfos.Contains(info)) { files = info.Files(); deleter.IncRef(files); } } if (files != null) { success = false; StartTransaction(false); try { merger.CreateCompoundFile(mergedName + ".cfs"); lock (this) { info.SetUseCompoundFile(true); } success = true; } finally { lock (this) { deleter.DecRef(files); } if (!success) { if (infoStream != null) Message("hit exception building compound file in addIndexes during merge"); RollbackTransaction(); } else { CommitTransaction(); } } } } } catch (System.OutOfMemoryException oom) { HandleOOM(oom, "addIndexes(IndexReader[])"); } finally { if (docWriter != null) { docWriter.ResumeAllThreads(); } } }
public virtual void TestMerge() { Codec codec = Codec.Default; SegmentInfo si = new SegmentInfo(MergedDir, Constants.LUCENE_MAIN_VERSION, MergedSegment, -1, false, codec, null); SegmentMerger merger = new SegmentMerger(Arrays.AsList<AtomicReader>(Reader1, Reader2), si, InfoStream.Default, MergedDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, MergeState.CheckAbort.NONE, new FieldInfos.FieldNumbers(), NewIOContext(Random()), true); MergeState mergeState = merger.Merge(); int docsMerged = mergeState.SegmentInfo.DocCount; Assert.IsTrue(docsMerged == 2); //Should be able to open a new SegmentReader against the new directory SegmentReader mergedReader = new SegmentReader(new SegmentCommitInfo(new SegmentInfo(MergedDir, Constants.LUCENE_MAIN_VERSION, MergedSegment, docsMerged, false, codec, null), 0, -1L, -1L), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random())); Assert.IsTrue(mergedReader != null); Assert.IsTrue(mergedReader.NumDocs == 2); Document newDoc1 = mergedReader.Document(0); Assert.IsTrue(newDoc1 != null); //There are 2 unstored fields on the document Assert.IsTrue(DocHelper.NumFields(newDoc1) == DocHelper.NumFields(Doc1) - DocHelper.Unstored.Count); Document newDoc2 = mergedReader.Document(1); Assert.IsTrue(newDoc2 != null); Assert.IsTrue(DocHelper.NumFields(newDoc2) == DocHelper.NumFields(Doc2) - DocHelper.Unstored.Count); DocsEnum termDocs = TestUtil.Docs(Random(), mergedReader, DocHelper.TEXT_FIELD_2_KEY, new BytesRef("field"), MultiFields.GetLiveDocs(mergedReader), null, 0); Assert.IsTrue(termDocs != null); Assert.IsTrue(termDocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); int tvCount = 0; foreach (FieldInfo fieldInfo in mergedReader.FieldInfos) { if (fieldInfo.HasVectors()) { tvCount++; } } //System.out.println("stored size: " + stored.Size()); Assert.AreEqual(3, tvCount, "We do not have 3 fields that were indexed with term vector"); Terms vector = mergedReader.GetTermVectors(0).Terms(DocHelper.TEXT_FIELD_2_KEY); Assert.IsNotNull(vector); Assert.AreEqual(3, vector.Size()); TermsEnum termsEnum = vector.Iterator(null); int i = 0; while (termsEnum.Next() != null) { string term = termsEnum.Term().Utf8ToString(); int freq = (int)termsEnum.TotalTermFreq(); //System.out.println("Term: " + term + " Freq: " + freq); Assert.IsTrue(DocHelper.FIELD_2_TEXT.IndexOf(term) != -1); Assert.IsTrue(DocHelper.FIELD_2_FREQS[i] == freq); i++; } TestSegmentReader.CheckNorms(mergedReader); mergedReader.Dispose(); }
/// <summary>Merges the provided indexes into this index. /// <p>After this completes, the index is optimized. </p> /// <p>The provided IndexReaders are not closed.</p> /// /// <p><b>NOTE:</b> the index in each Directory must not be /// changed (opened by a writer) while this method is /// running. Thiw method does not acquire a write lock in /// each input Directory, so it is up to the caller to /// enforce this. /// </p> /// /// <p><b>NOTE:</b> while this is running, any attempts to /// add or delete documents (with another thread) will be /// paused until this method completes.</p> /// /// <p>See {@link #AddIndexes(Directory[])} for /// details on transactional semantics, temporary free /// space required in the Directory, and non-CFS segments /// on an Exception.</p> /// </summary> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> public virtual void AddIndexes(IndexReader[] readers) { EnsureOpen(); // Do not allow add docs or deletes while we are running: docWriter.PauseAllThreads(); try { Optimize(); // start with zero or 1 seg System.String mergedName = NewSegmentName(); SegmentMerger merger = new SegmentMerger(this, mergedName, null); SegmentInfo info; IndexReader sReader = null; try { lock (this) { if (segmentInfos.Count == 1) { // add existing index, if any sReader = SegmentReader.Get(segmentInfos.Info(0)); merger.Add(sReader); } } for (int i = 0; i < readers.Length; i++) // add new indexes merger.Add(readers[i]); bool success = false; StartTransaction(); try { int docCount = merger.Merge(); // merge 'em if (sReader != null) { sReader.Close(); sReader = null; } lock (this) { segmentInfos.RemoveRange(0, segmentInfos.Count); // pop old infos & add new info = new SegmentInfo(mergedName, docCount, directory, false, true, -1, null, false); segmentInfos.Add(info); } success = true; } finally { if (!success) { if (infoStream != null) Message("hit exception in addIndexes during merge"); RollbackTransaction(); } else { CommitTransaction(); } } } finally { if (sReader != null) { sReader.Close(); } } if (mergePolicy is LogMergePolicy && GetUseCompoundFile()) { bool success = false; StartTransaction(); try { merger.CreateCompoundFile(mergedName + ".cfs"); lock (this) { info.SetUseCompoundFile(true); } } finally { if (!success) { if (infoStream != null) Message("hit exception building compound file in addIndexes during merge"); RollbackTransaction(); } else { CommitTransaction(); } } } } catch (OutOfMemoryException oom) { hitOOM = true; throw oom; } finally { docWriter.ResumeAllThreads(); } }
/// <summary>Merges the provided indexes into this index. /// <p>After this completes, the index is optimized. </p> /// <p>The provided IndexReaders are not closed.</p> /// </summary> public virtual void AddIndexes(IndexReader[] readers) { lock (this) { Optimize(); // start with zero or 1 seg System.String mergedName = NewSegmentName(); SegmentMerger merger = new SegmentMerger(this, mergedName); System.Collections.ArrayList segmentsToDelete = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10)); IndexReader sReader = null; if (segmentInfos.Count == 1) { // add existing index, if any sReader = SegmentReader.Get(segmentInfos.Info(0)); merger.Add(sReader); segmentsToDelete.Add(sReader); // queue segment for deletion } for (int i = 0; i < readers.Length; i++) // add new indexes merger.Add(readers[i]); int docCount = merger.Merge(); // merge 'em segmentInfos.RemoveRange(0, segmentInfos.Count - 0); // pop old infos & add new segmentInfos.Add(new SegmentInfo(mergedName, docCount, directory)); if (sReader != null) sReader.Close(); lock (directory) { // in- & inter-process sync new AnonymousClassWith1(segmentsToDelete, this, directory.MakeLock(COMMIT_LOCK_NAME), COMMIT_LOCK_TIMEOUT).Run(); } if (useCompoundFile) { System.Collections.ArrayList filesToDelete = merger.CreateCompoundFile(mergedName + ".tmp"); lock (directory) { // in- & inter-process sync new AnonymousClassWith2(mergedName, filesToDelete, this, directory.MakeLock(COMMIT_LOCK_NAME), COMMIT_LOCK_TIMEOUT).Run(); } } } }
/** Does the actual (time-consuming) work of the merge, * but without holding synchronized lock on IndexWriter * instance */ private int MergeMiddle(MergePolicy.OneMerge merge) { merge.CheckAborted(directory); string mergedName = merge.info.name; SegmentMerger merger = null; int mergedDocCount = 0; SegmentInfos sourceSegments = merge.segments; SegmentInfos sourceSegmentsClone = merge.segmentsClone; int numSegments = sourceSegments.Count; if (infoStream != null) Message("merging " + merge.SegString(directory)); merger = new SegmentMerger(this, mergedName, merge); bool success = false; // This is try/finally to make sure merger's readers are // closed: try { int totDocCount = 0; for (int i = 0; i < numSegments; i++) { SegmentInfo si = sourceSegmentsClone.Info(i); IndexReader reader = SegmentReader.Get(true, si, MERGE_READ_BUFFER_SIZE, merge.mergeDocStores); // no need to set deleter (yet) merger.Add(reader); totDocCount += reader.NumDocs(); } if (infoStream != null) { Message("merge: total " + totDocCount + " docs"); } merge.CheckAborted(directory); // This is where all the work happens: mergedDocCount = merge.info.docCount = merger.Merge(merge.mergeDocStores); System.Diagnostics.Debug.Assert(mergedDocCount == totDocCount); success = true; } finally { // close readers before we attempt to delete // now-obsolete segments if (merger != null) { merger.CloseReaders(); } } if (!CommitMerge(merge, merger, mergedDocCount)) // commitMerge will return false if this merge was aborted return 0; if (merge.useCompoundFile) { // Maybe force a sync here to allow reclaiming of the // disk space used by the segments we just merged: if (autoCommit && DoCommitBeforeMergeCFS(merge)) { long size; lock (this) { size = merge.info.SizeInBytes(); } Commit(size); } success = false; string compoundFileName = mergedName + "." + IndexFileNames.COMPOUND_FILE_EXTENSION; try { merger.CreateCompoundFile(compoundFileName); success = true; } catch (System.IO.IOException ioe) { lock (this) { if (merge.IsAborted()) { // This can happen if rollback or close(false) // is called -- fall through to logic below to // remove the partially created CFS: success = true; } else HandleMergeException(ioe, merge); } } catch (System.Exception t) { HandleMergeException(t, merge); } finally { if (!success) { if (infoStream != null) Message("hit exception creating compound file during merge"); lock (this) { deleter.DeleteFile(compoundFileName); } } } if (merge.IsAborted()) { if (infoStream != null) Message("abort merge after building CFS"); deleter.DeleteFile(compoundFileName); return 0; } lock (this) { if (segmentInfos.IndexOf(merge.info) == -1 || merge.IsAborted()) { // Our segment (committed in non-compound // format) got merged away while we were // building the compound format. deleter.DeleteFile(compoundFileName); } else { merge.info.SetUseCompoundFile(true); Checkpoint(); } } } // Force a sync after commiting the merge. Once this // sync completes then all index files referenced by the // current segmentInfos are on stable storage so if the // OS/machine crashes, or power cord is yanked, the // index will be intact. Note that this is just one // (somewhat arbitrary) policy; we could try other // policies like only sync if it's been > X minutes or // more than Y bytes have been written, etc. if (autoCommit) { long size; lock (this) { size = merge.info.SizeInBytes(); } Commit(size); } return mergedDocCount; }
private SegmentInfo Merge(SegmentInfo si1, SegmentInfo si2, System.String merged, bool useCompoundFile) { Directory directory = FSDirectory.GetDirectory(indexDir, false); SegmentReader r1 = SegmentReader.Get(si1); SegmentReader r2 = SegmentReader.Get(si2); SegmentMerger merger = new SegmentMerger(directory, merged); merger.Add(r1); merger.Add(r2); merger.Merge(); merger.CloseReaders(); if (useCompoundFile) { System.Collections.ArrayList filesToDelete = merger.CreateCompoundFile(merged + ".cfs"); for (System.Collections.IEnumerator iter = filesToDelete.GetEnumerator(); iter.MoveNext(); ) { directory.DeleteFile((System.String) iter.Current); } } directory.Close(); return new SegmentInfo(merged, si1.docCount + si2.docCount, directory, useCompoundFile, true); }
private SegmentCommitInfo Merge(Directory dir, SegmentCommitInfo si1, SegmentCommitInfo si2, string merged, bool useCompoundFile) { IOContext context = NewIOContext(Random()); SegmentReader r1 = new SegmentReader(si1, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, context); SegmentReader r2 = new SegmentReader(si2, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, context); Codec codec = Codec.Default; TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(si1.Info.Dir); SegmentInfo si = new SegmentInfo(si1.Info.Dir, Constants.LUCENE_MAIN_VERSION, merged, -1, false, codec, null); SegmentMerger merger = new SegmentMerger(Arrays.AsList<AtomicReader>(r1, r2), si, InfoStream.Default, trackingDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, MergeState.CheckAbort.NONE, new FieldInfos.FieldNumbers(), context, true); MergeState mergeState = merger.Merge(); r1.Dispose(); r2.Dispose(); SegmentInfo info = new SegmentInfo(si1.Info.Dir, Constants.LUCENE_MAIN_VERSION, merged, si1.Info.DocCount + si2.Info.DocCount, false, codec, null); info.Files = new HashSet<string>(trackingDir.CreatedFiles); if (useCompoundFile) { ICollection<string> filesToDelete = IndexWriter.CreateCompoundFile(InfoStream.Default, dir, MergeState.CheckAbort.NONE, info, NewIOContext(Random())); info.UseCompoundFile = true; foreach (String fileToDelete in filesToDelete) { si1.Info.Dir.DeleteFile(fileToDelete); } } return new SegmentCommitInfo(info, 0, -1L, -1L); }
public virtual void TestMerge() { Codec codec = Codec.Default; SegmentInfo si = new SegmentInfo(MergedDir, Constants.LUCENE_MAIN_VERSION, MergedSegment, -1, false, codec, null); SegmentMerger merger = new SegmentMerger(Arrays.AsList <AtomicReader>(Reader1, Reader2), si, InfoStream.Default, MergedDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, CheckAbort.NONE, new FieldInfos.FieldNumbers(), NewIOContext(Random()), true); MergeState mergeState = merger.Merge(); int docsMerged = mergeState.SegmentInfo.DocCount; Assert.IsTrue(docsMerged == 2); //Should be able to open a new SegmentReader against the new directory SegmentReader mergedReader = new SegmentReader(new SegmentCommitInfo(new SegmentInfo(MergedDir, Constants.LUCENE_MAIN_VERSION, MergedSegment, docsMerged, false, codec, null), 0, -1L, -1L), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random())); Assert.IsTrue(mergedReader != null); Assert.IsTrue(mergedReader.NumDocs == 2); Document newDoc1 = mergedReader.Document(0); Assert.IsTrue(newDoc1 != null); //There are 2 unstored fields on the document Assert.IsTrue(DocHelper.NumFields(newDoc1) == DocHelper.NumFields(Doc1) - DocHelper.Unstored.Count); Document newDoc2 = mergedReader.Document(1); Assert.IsTrue(newDoc2 != null); Assert.IsTrue(DocHelper.NumFields(newDoc2) == DocHelper.NumFields(Doc2) - DocHelper.Unstored.Count); DocsEnum termDocs = TestUtil.Docs(Random(), mergedReader, DocHelper.TEXT_FIELD_2_KEY, new BytesRef("field"), MultiFields.GetLiveDocs(mergedReader), null, 0); Assert.IsTrue(termDocs != null); Assert.IsTrue(termDocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); int tvCount = 0; foreach (FieldInfo fieldInfo in mergedReader.FieldInfos) { if (fieldInfo.HasVectors) { tvCount++; } } //System.out.println("stored size: " + stored.Size()); Assert.AreEqual(3, tvCount, "We do not have 3 fields that were indexed with term vector"); Terms vector = mergedReader.GetTermVectors(0).GetTerms(DocHelper.TEXT_FIELD_2_KEY); Assert.IsNotNull(vector); Assert.AreEqual(3, vector.Count); TermsEnum termsEnum = vector.GetIterator(null); int i = 0; while (termsEnum.Next() != null) { string term = termsEnum.Term.Utf8ToString(); int freq = (int)termsEnum.TotalTermFreq; //System.out.println("Term: " + term + " Freq: " + freq); Assert.IsTrue(DocHelper.FIELD_2_TEXT.IndexOf(term) != -1); Assert.IsTrue(DocHelper.FIELD_2_FREQS[i] == freq); i++; } TestSegmentReader.CheckNorms(mergedReader); mergedReader.Dispose(); }
/// <summary>Does the actual (time-consuming) work of the merge, /// but without holding synchronized lock on IndexWriter /// instance /// </summary> private int MergeMiddle(MergePolicy.OneMerge merge) { merge.CheckAborted(directory); System.String mergedName = merge.info.name; SegmentMerger merger = null; int mergedDocCount = 0; SegmentInfos sourceSegments = merge.segments; SegmentInfos sourceSegmentsClone = merge.segmentsClone; int numSegments = sourceSegments.Count; if (infoStream != null) Message("merging " + merge.SegString(directory)); merger = new SegmentMerger(this, mergedName, merge); // This is try/finally to make sure merger's readers are // closed: bool success = false; try { int totDocCount = 0; for (int i = 0; i < numSegments; i++) { SegmentInfo si = sourceSegmentsClone.Info(i); IndexReader reader = SegmentReader.Get(si, MERGE_READ_BUFFER_SIZE, merge.mergeDocStores); // no need to set deleter (yet) merger.Add(reader); totDocCount += reader.NumDocs(); } if (infoStream != null) { Message("merge: total " + totDocCount + " docs"); } merge.CheckAborted(directory); mergedDocCount = merge.info.docCount = merger.Merge(merge.mergeDocStores); System.Diagnostics.Debug.Assert(mergedDocCount == totDocCount); success = true; } finally { // close readers before we attempt to delete // now-obsolete segments if (merger != null) { merger.CloseReaders(); } if (!success) { if (infoStream != null) Message("hit exception during merge; now refresh deleter on segment " + mergedName); lock (this) { AddMergeException(merge); deleter.Refresh(mergedName); } } } if (!CommitMerge(merge)) // commitMerge will return false if this merge was aborted return 0; if (merge.useCompoundFile) { success = false; bool skip = false; System.String compoundFileName = mergedName + "." + IndexFileNames.COMPOUND_FILE_EXTENSION; try { try { merger.CreateCompoundFile(compoundFileName); success = true; } catch (System.IO.IOException ioe) { lock (this) { if (segmentInfos.IndexOf(merge.info) == - 1) { // If another merge kicked in and merged our // new segment away while we were trying to // build the compound file, we can hit a // FileNotFoundException and possibly // IOException over NFS. We can tell this has // happened because our SegmentInfo is no // longer in the segments; if this has // happened it is safe to ignore the exception // & skip finishing/committing our compound // file creating. if (infoStream != null) Message("hit exception creating compound file; ignoring it because our info (segment " + merge.info.name + ") has been merged away"); skip = true; } else throw ioe; } } } finally { if (!success) { if (infoStream != null) Message("hit exception creating compound file during merge: skip=" + skip); lock (this) { if (!skip) AddMergeException(merge); deleter.DeleteFile(compoundFileName); } } } if (!skip) { lock (this) { if (skip || segmentInfos.IndexOf(merge.info) == - 1 || merge.IsAborted()) { // Our segment (committed in non-compound // format) got merged away while we were // building the compound format. deleter.DeleteFile(compoundFileName); } else { success = false; try { merge.info.SetUseCompoundFile(true); Checkpoint(); success = true; } finally { if (!success) { if (infoStream != null) Message("hit exception checkpointing compound file during merge"); // Must rollback: AddMergeException(merge); merge.info.SetUseCompoundFile(false); DeletePartialSegmentsFile(); deleter.DeleteFile(compoundFileName); } } // Give deleter a chance to remove files now. deleter.Checkpoint(segmentInfos, autoCommit); } } } } return mergedDocCount; }
/// <summary>Merges the named range of segments, replacing them in the stack with a /// single segment. /// </summary> private void MergeSegments(int minSegment, int end) { System.String mergedName = NewSegmentName(); if (infoStream != null) infoStream.Write("merging segments"); SegmentMerger merger = new SegmentMerger(this, mergedName); System.Collections.ArrayList segmentsToDelete = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10)); for (int i = minSegment; i < end; i++) { SegmentInfo si = segmentInfos.Info(i); if (infoStream != null) infoStream.Write(" " + si.name + " (" + si.docCount + " docs)"); IndexReader reader = SegmentReader.Get(si); merger.Add(reader); if ((reader.Directory() == this.directory) || (reader.Directory() == this.ramDirectory)) segmentsToDelete.Add(reader); // queue segment for deletion } int mergedDocCount = merger.Merge(); if (infoStream != null) { infoStream.WriteLine(" into " + mergedName + " (" + mergedDocCount + " docs)"); } for (int i = end - 1; i >= minSegment; i--) // remove old infos & add new segmentInfos.RemoveAt(i); segmentInfos.Add(new SegmentInfo(mergedName, mergedDocCount, directory)); // close readers before we attempt to delete now-obsolete segments merger.CloseReaders(); lock (directory) { // in- & inter-process sync new AnonymousClassWith3(segmentsToDelete, this, directory.MakeLock(COMMIT_LOCK_NAME), COMMIT_LOCK_TIMEOUT).Run(); } if (useCompoundFile) { System.Collections.ArrayList filesToDelete = merger.CreateCompoundFile(mergedName + ".tmp"); lock (directory) { // in- & inter-process sync new AnonymousClassWith4(mergedName, filesToDelete, this, directory.MakeLock(COMMIT_LOCK_NAME), COMMIT_LOCK_TIMEOUT).Run(); } } }
public virtual void TestMerge() { SegmentMerger merger = new SegmentMerger(mergedDir, mergedSegment); merger.Add(reader1); merger.Add(reader2); int docsMerged = merger.Merge(); merger.CloseReaders(); Assert.IsTrue(docsMerged == 2); //Should be able to open a new SegmentReader against the new directory SegmentReader mergedReader = SegmentReader.Get(new SegmentInfo(mergedSegment, docsMerged, mergedDir, false, true)); Assert.IsTrue(mergedReader != null); Assert.IsTrue(mergedReader.NumDocs() == 2); Lucene.Net.Documents.Document newDoc1 = mergedReader.Document(0); Assert.IsTrue(newDoc1 != null); //There are 2 unstored fields on the document Assert.IsTrue(DocHelper.NumFields(newDoc1) == DocHelper.NumFields(doc1) - DocHelper.unstored.Count); Lucene.Net.Documents.Document newDoc2 = mergedReader.Document(1); Assert.IsTrue(newDoc2 != null); Assert.IsTrue(DocHelper.NumFields(newDoc2) == DocHelper.NumFields(doc2) - DocHelper.unstored.Count); TermDocs termDocs = mergedReader.TermDocs(new Term(DocHelper.TEXT_FIELD_2_KEY, "field")); Assert.IsTrue(termDocs != null); Assert.IsTrue(termDocs.Next() == true); System.Collections.ICollection stored = mergedReader.GetFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR); Assert.IsTrue(stored != null); //System.out.println("stored size: " + stored.size()); Assert.IsTrue(stored.Count == 4, "We do not have 4 fields that were indexed with term vector"); TermFreqVector vector = mergedReader.GetTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY); Assert.IsTrue(vector != null); System.String[] terms = vector.GetTerms(); Assert.IsTrue(terms != null); //System.out.println("Terms size: " + terms.length); Assert.IsTrue(terms.Length == 3); int[] freqs = vector.GetTermFrequencies(); Assert.IsTrue(freqs != null); //System.out.println("Freqs size: " + freqs.length); Assert.IsTrue(vector is TermPositionVector == true); for (int i = 0; i < terms.Length; i++) { System.String term = terms[i]; int freq = freqs[i]; //System.out.println("Term: " + term + " Freq: " + freq); Assert.IsTrue(DocHelper.FIELD_2_TEXT.IndexOf(term) != - 1); Assert.IsTrue(DocHelper.FIELD_2_FREQS[i] == freq); } TestSegmentReader.CheckNorms(mergedReader); }
/// <summary> Merges the provided indexes into this index. /// <p> /// After this completes, the index is optimized. /// </p> /// <p> /// The provided IndexReaders are not closed. /// </p> /// /// <p> /// See {@link #AddIndexes(Directory[])} for details on transactional /// semantics, temporary free space required in the Directory, and non-CFS /// segments on an Exception. /// </p> /// </summary> public virtual void AddIndexes(IndexReader[] readers) { lock (this) { Optimize(); // start with zero or 1 seg System.String mergedName = NewSegmentName(); SegmentMerger merger = new SegmentMerger(this, mergedName); System.Collections.ArrayList segmentsToDelete = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10)); IndexReader sReader = null; if (segmentInfos.Count == 1) { // add existing index, if any sReader = SegmentReader.Get(segmentInfos.Info(0)); merger.Add(sReader); segmentsToDelete.Add(sReader); // queue segment for // deletion } for (int i = 0; i < readers.Length; i++) // add new indexes merger.Add(readers[i]); SegmentInfo info; System.String segmentsInfosFileName = segmentInfos.GetCurrentSegmentFileName(); bool success = false; StartTransaction(); try { int docCount = merger.Merge(); // merge 'em segmentInfos.RemoveRange(0, segmentInfos.Count); // pop old infos & add new info = new SegmentInfo(mergedName, docCount, directory, false, true); segmentInfos.Add(info); commitPending = true; if (sReader != null) sReader.Close(); success = true; } finally { if (!success) { RollbackTransaction(); } else { CommitTransaction(); } } deleter.DeleteFile(segmentsInfosFileName); // delete old segments_N // file deleter.DeleteSegments(segmentsToDelete); // delete now-unused // segments if (useCompoundFile) { success = false; segmentsInfosFileName = segmentInfos.GetCurrentSegmentFileName(); System.Collections.ArrayList filesToDelete; StartTransaction(); try { filesToDelete = merger.CreateCompoundFile(mergedName + ".cfs"); info.SetUseCompoundFile(true); commitPending = true; success = true; } finally { if (!success) { RollbackTransaction(); } else { CommitTransaction(); } } deleter.DeleteFile(segmentsInfosFileName); // delete old segments_N // file deleter.DeleteFiles(filesToDelete); // delete now unused files of // segment } } }