public virtual FieldInfos CreateAndWriteFieldInfos(Directory dir, string filename) { //Positive test of FieldInfos Assert.IsTrue(TestDoc != null); FieldInfos.Builder builder = new FieldInfos.Builder(); foreach (IIndexableField field in TestDoc) { builder.AddOrUpdate(field.Name, field.IndexableFieldType); } FieldInfos fieldInfos = builder.Finish(); //Since the complement is stored as well in the fields map Assert.IsTrue(fieldInfos.Count == DocHelper.All.Count); //this is all b/c we are using the no-arg constructor IndexOutput output = dir.CreateOutput(filename, NewIOContext(Random)); Assert.IsTrue(output != null); //Use a RAMOutputStream FieldInfosWriter writer = Codec.Default.FieldInfosFormat.FieldInfosWriter; writer.Write(dir, filename, "", fieldInfos, IOContext.DEFAULT); output.Dispose(); return(fieldInfos); }
/// <summary> /// Call this to get the (merged) FieldInfos for a /// composite reader. /// <p> /// NOTE: the returned field numbers will likely not /// correspond to the actual field numbers in the underlying /// readers, and codec metadata (<seealso cref="FieldInfo#getAttribute(String)"/> /// will be unavailable. /// </summary> public static FieldInfos GetMergedFieldInfos(IndexReader reader) { var builder = new FieldInfos.Builder(); foreach (AtomicReaderContext ctx in reader.Leaves) { builder.Add(ctx.AtomicReader.FieldInfos); } return(builder.Finish()); }
public virtual void TestRandomPostings() { FieldInfos.Builder builder = new FieldInfos.Builder(); FieldData[] fields = new FieldData[NUM_FIELDS]; for (int i = 0; i < NUM_FIELDS; i++) { bool omitTF = 0 == (i % 3); bool storePayloads = 1 == (i % 3); fields[i] = new FieldData(this, fieldNames[i], builder, this.MakeRandomTerms(omitTF, storePayloads), omitTF, storePayloads); } // LUCENENET specific - BUG: we must wrap this in a using block in case anything in the below loop throws using (Directory dir = NewDirectory()) { FieldInfos fieldInfos = builder.Finish(); if (Verbose) { Console.WriteLine("TEST: now write postings"); } this.Write(fieldInfos, dir, fields, false); Codec codec = Codec.Default; SegmentInfo si = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null); if (Verbose) { Console.WriteLine("TEST: now read postings"); } // LUCENENET specific - BUG: we must wrap this in a using block in case anything in the below loop throws using (FieldsProducer terms = codec.PostingsFormat.FieldsProducer(new SegmentReadState(dir, si, fieldInfos, NewIOContext(Random), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR))) { Verify[] threads = new Verify[NUM_TEST_THREADS - 1]; for (int i = 0; i < NUM_TEST_THREADS - 1; i++) { threads[i] = new Verify(this, si, fields, terms); threads[i].IsBackground = (true); threads[i].Start(); } (new Verify(this, si, fields, terms)).Run(); for (int i = 0; i < NUM_TEST_THREADS - 1; i++) { threads[i].Join(); if (Debugging.AssertsEnabled) { Debugging.Assert(!threads[i].failed); } } } } }
public void MergeFieldInfos() { foreach (AtomicReader reader in mergeState.Readers) { FieldInfos readerFieldInfos = reader.FieldInfos; foreach (FieldInfo fi in readerFieldInfos) { fieldInfosBuilder.Add(fi); } } mergeState.FieldInfos = fieldInfosBuilder.Finish(); }
public virtual void TestRandomPostings() { FieldInfos.Builder builder = new FieldInfos.Builder(); FieldData[] fields = new FieldData[NUM_FIELDS]; for (int i = 0; i < NUM_FIELDS; i++) { bool omitTF = 0 == (i % 3); bool storePayloads = 1 == (i % 3); fields[i] = new FieldData(this, FieldNames[i], builder, this.MakeRandomTerms(omitTF, storePayloads), omitTF, storePayloads); } Directory dir = NewDirectory(); FieldInfos fieldInfos = builder.Finish(); if (VERBOSE) { Console.WriteLine("TEST: now write postings"); } this.Write(fieldInfos, dir, fields, false); Codec codec = Codec.Default; SegmentInfo si = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null); if (VERBOSE) { Console.WriteLine("TEST: now read postings"); } FieldsProducer terms = codec.PostingsFormat().FieldsProducer(new SegmentReadState(dir, si, fieldInfos, NewIOContext(Random()), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR)); Verify[] threads = new Verify[NUM_TEST_THREADS - 1]; for (int i = 0; i < NUM_TEST_THREADS - 1; i++) { threads[i] = new Verify(this, si, fields, terms); threads[i].SetDaemon(true); threads[i].Start(); } (new Verify(this, si, fields, terms)).Run(); for (int i = 0; i < NUM_TEST_THREADS - 1; i++) { threads[i].Join(); Debug.Assert(!threads[i].Failed); } terms.Dispose(); dir.Dispose(); }
internal virtual FlushedSegment Flush() { if (Debugging.AssertsEnabled) { Debugging.Assert(numDocsInRAM > 0); Debugging.Assert(deleteSlice.IsEmpty, "all deletes must be applied in prepareFlush"); } segmentInfo.DocCount = numDocsInRAM; SegmentWriteState flushState = new SegmentWriteState(infoStream, directory, segmentInfo, fieldInfos.Finish(), indexWriterConfig.TermIndexInterval, pendingUpdates, new IOContext(new FlushInfo(numDocsInRAM, BytesUsed))); double startMBUsed = BytesUsed / 1024.0 / 1024.0; // Apply delete-by-docID now (delete-byDocID only // happens when an exception is hit processing that // doc, eg if analyzer has some problem w/ the text): if (pendingUpdates.docIDs.Count > 0) { flushState.LiveDocs = codec.LiveDocsFormat.NewLiveDocs(numDocsInRAM); foreach (int delDocID in pendingUpdates.docIDs) { flushState.LiveDocs.Clear(delDocID); } flushState.DelCountOnFlush = pendingUpdates.docIDs.Count; pendingUpdates.bytesUsed.AddAndGet(-pendingUpdates.docIDs.Count * BufferedUpdates.BYTES_PER_DEL_DOCID); pendingUpdates.docIDs.Clear(); } if (aborting) { if (infoStream.IsEnabled("DWPT")) { infoStream.Message("DWPT", "flush: skip because aborting is set"); } return(null); } if (infoStream.IsEnabled("DWPT")) { infoStream.Message("DWPT", "flush postings as segment " + flushState.SegmentInfo.Name + " numDocs=" + numDocsInRAM); } bool success = false; try { consumer.Flush(flushState); pendingUpdates.terms.Clear(); segmentInfo.SetFiles(new JCG.HashSet <string>(directory.CreatedFiles)); SegmentCommitInfo segmentInfoPerCommit = new SegmentCommitInfo(segmentInfo, 0, -1L, -1L); if (infoStream.IsEnabled("DWPT")) { infoStream.Message("DWPT", "new segment has " + (flushState.LiveDocs == null ? 0 : (flushState.SegmentInfo.DocCount - flushState.DelCountOnFlush)) + " deleted docs"); infoStream.Message("DWPT", "new segment has " + (flushState.FieldInfos.HasVectors ? "vectors" : "no vectors") + "; " + (flushState.FieldInfos.HasNorms ? "norms" : "no norms") + "; " + (flushState.FieldInfos.HasDocValues ? "docValues" : "no docValues") + "; " + (flushState.FieldInfos.HasProx ? "prox" : "no prox") + "; " + (flushState.FieldInfos.HasFreq ? "freqs" : "no freqs")); infoStream.Message("DWPT", "flushedFiles=" + string.Format(J2N.Text.StringFormatter.InvariantCulture, "{0}", segmentInfoPerCommit.GetFiles())); infoStream.Message("DWPT", "flushed codec=" + codec); } BufferedUpdates segmentDeletes; if (pendingUpdates.queries.Count == 0 && pendingUpdates.numericUpdates.Count == 0 && pendingUpdates.binaryUpdates.Count == 0) { pendingUpdates.Clear(); segmentDeletes = null; } else { segmentDeletes = pendingUpdates; } if (infoStream.IsEnabled("DWPT")) { double newSegmentSize = segmentInfoPerCommit.GetSizeInBytes() / 1024.0 / 1024.0; infoStream.Message("DWPT", "flushed: segment=" + segmentInfo.Name + " ramUsed=" + startMBUsed.ToString(nf) + " MB" + " newFlushedSize(includes docstores)=" + newSegmentSize.ToString(nf) + " MB" + " docs/MB=" + (flushState.SegmentInfo.DocCount / newSegmentSize).ToString(nf)); } if (Debugging.AssertsEnabled) { Debugging.Assert(segmentInfo != null); } FlushedSegment fs = new FlushedSegment(segmentInfoPerCommit, flushState.FieldInfos, segmentDeletes, flushState.LiveDocs, flushState.DelCountOnFlush); SealFlushedSegment(fs); success = true; return(fs); } finally { if (!success) { Abort(filesToDelete); } } }
public virtual void TestFixedPostings() { const int NUM_TERMS = 100; TermData[] terms = new TermData[NUM_TERMS]; for (int i = 0; i < NUM_TERMS; i++) { int[] docs = new int[] { i }; string text = Convert.ToString(i); terms[i] = new TermData(this, text, docs, null); } FieldInfos.Builder builder = new FieldInfos.Builder(); FieldData field = new FieldData(this, "field", builder, terms, true, false); FieldData[] fields = new FieldData[] { field }; FieldInfos fieldInfos = builder.Finish(); // LUCENENET specific - BUG: we must wrap this in a using block in case anything in the below loop throws using (Directory dir = NewDirectory()) { this.Write(fieldInfos, dir, fields, true); Codec codec = Codec.Default; SegmentInfo si = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null); // LUCENENET specific - BUG: we must wrap this in a using block in case anything in the below loop throws using (FieldsProducer reader = codec.PostingsFormat.FieldsProducer(new SegmentReadState(dir, si, fieldInfos, NewIOContext(Random), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR))) { IEnumerator <string> fieldsEnum = reader.GetEnumerator(); fieldsEnum.MoveNext(); string fieldName = fieldsEnum.Current; Assert.IsNotNull(fieldName); Terms terms2 = reader.GetTerms(fieldName); Assert.IsNotNull(terms2); TermsEnum termsEnum = terms2.GetIterator(null); DocsEnum docsEnum = null; for (int i = 0; i < NUM_TERMS; i++) { BytesRef term = termsEnum.Next(); Assert.IsNotNull(term); Assert.AreEqual(terms[i].text2, term.Utf8ToString()); // do this twice to stress test the codec's reuse, ie, // make sure it properly fully resets (rewinds) its // internal state: for (int iter = 0; iter < 2; iter++) { docsEnum = TestUtil.Docs(Random, termsEnum, null, docsEnum, DocsFlags.NONE); Assert.AreEqual(terms[i].docs[0], docsEnum.NextDoc()); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, docsEnum.NextDoc()); } } Assert.IsNull(termsEnum.Next()); for (int i = 0; i < NUM_TERMS; i++) { Assert.AreEqual(termsEnum.SeekCeil(new BytesRef(terms[i].text2)), TermsEnum.SeekStatus.FOUND); } Assert.IsFalse(fieldsEnum.MoveNext()); } } }
/// <summary> /// Expert: create a <see cref="ParallelAtomicReader"/> based on the provided /// <paramref name="readers"/> and <paramref name="storedFieldsReaders"/>; when a document is /// loaded, only <paramref name="storedFieldsReaders"/> will be used. /// </summary> public ParallelAtomicReader(bool closeSubReaders, AtomicReader[] readers, AtomicReader[] storedFieldsReaders) { this.closeSubReaders = closeSubReaders; if (readers.Length == 0 && storedFieldsReaders.Length > 0) { throw new ArgumentException("There must be at least one main reader if storedFieldsReaders are used."); } this.parallelReaders = (AtomicReader[])readers.Clone(); this.storedFieldsReaders = (AtomicReader[])storedFieldsReaders.Clone(); if (parallelReaders.Length > 0) { AtomicReader first = parallelReaders[0]; this.maxDoc = first.MaxDoc; this.numDocs = first.NumDocs; this.hasDeletions = first.HasDeletions; } else { this.maxDoc = this.numDocs = 0; this.hasDeletions = false; } completeReaderSet.UnionWith(this.parallelReaders); completeReaderSet.UnionWith(this.storedFieldsReaders); // check compatibility: foreach (AtomicReader reader in completeReaderSet) { if (reader.MaxDoc != maxDoc) { throw new ArgumentException("All readers must have same MaxDoc: " + maxDoc + "!=" + reader.MaxDoc); } } // TODO: make this read-only in a cleaner way? FieldInfos.Builder builder = new FieldInfos.Builder(); // build FieldInfos and fieldToReader map: foreach (AtomicReader reader in this.parallelReaders) { FieldInfos readerFieldInfos = reader.FieldInfos; foreach (FieldInfo fieldInfo in readerFieldInfos) { // NOTE: first reader having a given field "wins": if (!fieldToReader.ContainsKey(fieldInfo.Name)) { builder.Add(fieldInfo); fieldToReader[fieldInfo.Name] = reader; if (fieldInfo.HasVectors) { tvFieldToReader[fieldInfo.Name] = reader; } } } } fieldInfos = builder.Finish(); // build Fields instance foreach (AtomicReader reader in this.parallelReaders) { Fields readerFields = reader.Fields; if (readerFields != null) { foreach (string field in readerFields) { // only add if the reader responsible for that field name is the current: if (fieldToReader[field].Equals(reader)) { this.fields.AddField(field, readerFields.GetTerms(field)); } } } } // do this finally so any Exceptions occurred before don't affect refcounts: foreach (AtomicReader reader in completeReaderSet) { if (!closeSubReaders) { reader.IncRef(); } reader.RegisterParentReader(this); } }
public virtual void WriteFieldUpdates(Directory dir, DocValuesFieldUpdates.Container dvUpdates) { UninterruptableMonitor.Enter(this); try { if (Debugging.AssertsEnabled) { Debugging.Assert(UninterruptableMonitor.IsEntered(writer)); } //System.out.println("rld.writeFieldUpdates: seg=" + info + " numericFieldUpdates=" + numericFieldUpdates); if (Debugging.AssertsEnabled) { Debugging.Assert(dvUpdates.Any()); } // Do this so we can delete any created files on // exception; this saves all codecs from having to do // it: TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir); FieldInfos fieldInfos = null; bool success = false; try { Codec codec = Info.Info.Codec; // reader could be null e.g. for a just merged segment (from // IndexWriter.commitMergedDeletes). SegmentReader reader = this.reader ?? new SegmentReader(Info, writer.Config.ReaderTermsIndexDivisor, IOContext.READ_ONCE); try { // clone FieldInfos so that we can update their dvGen separately from // the reader's infos and write them to a new fieldInfos_gen file FieldInfos.Builder builder = new FieldInfos.Builder(writer.globalFieldNumberMap); // cannot use builder.add(reader.getFieldInfos()) because it does not // clone FI.attributes as well FI.dvGen foreach (FieldInfo fi in reader.FieldInfos) { FieldInfo clone = builder.Add(fi); // copy the stuff FieldInfos.Builder doesn't copy if (fi.Attributes != null) { foreach (KeyValuePair <string, string> e in fi.Attributes) { clone.PutAttribute(e.Key, e.Value); } } clone.DocValuesGen = fi.DocValuesGen; } // create new fields or update existing ones to have NumericDV type foreach (string f in dvUpdates.numericDVUpdates.Keys) { builder.AddOrUpdate(f, NumericDocValuesField.TYPE); } // create new fields or update existing ones to have BinaryDV type foreach (string f in dvUpdates.binaryDVUpdates.Keys) { builder.AddOrUpdate(f, BinaryDocValuesField.TYPE); } fieldInfos = builder.Finish(); long nextFieldInfosGen = Info.NextFieldInfosGen; // LUCENENET specific: We created the segments names wrong in 4.8.0-beta00001 - 4.8.0-beta00015, // so we added a switch to be able to read these indexes in later versions. This logic as well as an // optimization on the first 100 segment values is implmeneted in SegmentInfos.SegmentNumberToString(). string segmentSuffix = SegmentInfos.SegmentNumberToString(nextFieldInfosGen); SegmentWriteState state = new SegmentWriteState(null, trackingDir, Info.Info, fieldInfos, writer.Config.TermIndexInterval, null, IOContext.DEFAULT, segmentSuffix); DocValuesFormat docValuesFormat = codec.DocValuesFormat; DocValuesConsumer fieldsConsumer = docValuesFormat.FieldsConsumer(state); bool fieldsConsumerSuccess = false; try { // System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeFieldUpdates: applying numeric updates; seg=" + info + " updates=" + numericFieldUpdates); foreach (KeyValuePair <string, NumericDocValuesFieldUpdates> e in dvUpdates.numericDVUpdates) { string field = e.Key; NumericDocValuesFieldUpdates fieldUpdates = e.Value; FieldInfo fieldInfo = fieldInfos.FieldInfo(field); if (Debugging.AssertsEnabled) { Debugging.Assert(fieldInfo != null); } fieldInfo.DocValuesGen = nextFieldInfosGen; // write the numeric updates to a new gen'd docvalues file fieldsConsumer.AddNumericField(fieldInfo, GetInt64Enumerable(reader, field, fieldUpdates)); } // System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " updates=" + dvUpdates.binaryDVUpdates); foreach (KeyValuePair <string, BinaryDocValuesFieldUpdates> e in dvUpdates.binaryDVUpdates) { string field = e.Key; BinaryDocValuesFieldUpdates dvFieldUpdates = e.Value; FieldInfo fieldInfo = fieldInfos.FieldInfo(field); if (Debugging.AssertsEnabled) { Debugging.Assert(fieldInfo != null); } // System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " f=" + dvFieldUpdates + ", updates=" + dvFieldUpdates); fieldInfo.DocValuesGen = nextFieldInfosGen; // write the numeric updates to a new gen'd docvalues file fieldsConsumer.AddBinaryField(fieldInfo, GetBytesRefEnumerable(reader, field, dvFieldUpdates)); } codec.FieldInfosFormat.FieldInfosWriter.Write(trackingDir, Info.Info.Name, segmentSuffix, fieldInfos, IOContext.DEFAULT); fieldsConsumerSuccess = true; } finally { if (fieldsConsumerSuccess) { fieldsConsumer.Dispose(); } else { IOUtils.DisposeWhileHandlingException(fieldsConsumer); } } } finally { if (reader != this.reader) { // System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeLiveDocs: closeReader " + reader); reader.Dispose(); } } success = true; } finally { if (!success) { // Advance only the nextWriteDocValuesGen so that a 2nd // attempt to write will write to a new file Info.AdvanceNextWriteFieldInfosGen(); // Delete any partially created file(s): foreach (string fileName in trackingDir.CreatedFiles) { try { dir.DeleteFile(fileName); } catch (Exception t) when(t.IsThrowable()) { // Ignore so we throw only the first exc } } } } Info.AdvanceFieldInfosGen(); // copy all the updates to mergingUpdates, so they can later be applied to the merged segment if (isMerging) { foreach (KeyValuePair <string, NumericDocValuesFieldUpdates> e in dvUpdates.numericDVUpdates) { if (!mergingDVUpdates.TryGetValue(e.Key, out DocValuesFieldUpdates updates)) { mergingDVUpdates[e.Key] = e.Value; } else { updates.Merge(e.Value); } } foreach (KeyValuePair <string, BinaryDocValuesFieldUpdates> e in dvUpdates.binaryDVUpdates) { if (!mergingDVUpdates.TryGetValue(e.Key, out DocValuesFieldUpdates updates)) { mergingDVUpdates[e.Key] = e.Value; } else { updates.Merge(e.Value); } } } // create a new map, keeping only the gens that are in use IDictionary <long, ISet <string> > genUpdatesFiles = Info.UpdatesFiles; IDictionary <long, ISet <string> > newGenUpdatesFiles = new Dictionary <long, ISet <string> >(); long fieldInfosGen = Info.FieldInfosGen; foreach (FieldInfo fi in fieldInfos) { long dvGen = fi.DocValuesGen; if (dvGen != -1 && !newGenUpdatesFiles.ContainsKey(dvGen)) { if (dvGen == fieldInfosGen) { newGenUpdatesFiles[fieldInfosGen] = trackingDir.CreatedFiles; } else { newGenUpdatesFiles[dvGen] = genUpdatesFiles[dvGen]; } } } Info.SetGenUpdatesFiles(newGenUpdatesFiles); // wrote new files, should checkpoint() writer.Checkpoint(); // if there is a reader open, reopen it to reflect the updates if (reader != null) { SegmentReader newReader = new SegmentReader(Info, reader, liveDocs, Info.Info.DocCount - Info.DelCount - pendingDeleteCount); bool reopened = false; try { reader.DecRef(); reader = newReader; reopened = true; } finally { if (!reopened) { newReader.DecRef(); } } } } finally { UninterruptableMonitor.Exit(this); } }
// Writes field updates (new _X_N updates files) to the directory public virtual void WriteFieldUpdates(Directory dir, DocValuesFieldUpdates.Container dvUpdates) { lock (this) { //Debug.Assert(Thread.holdsLock(Writer)); //System.out.println("rld.writeFieldUpdates: seg=" + info + " numericFieldUpdates=" + numericFieldUpdates); Debug.Assert(dvUpdates.Any()); // Do this so we can delete any created files on // exception; this saves all codecs from having to do // it: TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir); FieldInfos fieldInfos = null; bool success = false; try { Codec codec = Info.Info.Codec; // reader could be null e.g. for a just merged segment (from // IndexWriter.commitMergedDeletes). SegmentReader reader = this.Reader == null ? new SegmentReader(Info, Writer.Config.ReaderTermsIndexDivisor, IOContext.READONCE) : this.Reader; try { // clone FieldInfos so that we can update their dvGen separately from // the reader's infos and write them to a new fieldInfos_gen file FieldInfos.Builder builder = new FieldInfos.Builder(Writer.GlobalFieldNumberMap); // cannot use builder.add(reader.getFieldInfos()) because it does not // clone FI.attributes as well FI.dvGen foreach (FieldInfo fi in reader.FieldInfos) { FieldInfo clone = builder.Add(fi); // copy the stuff FieldInfos.Builder doesn't copy if (fi.Attributes() != null) { foreach (KeyValuePair <string, string> e in fi.Attributes()) { clone.PutAttribute(e.Key, e.Value); } } clone.DocValuesGen = fi.DocValuesGen; } // create new fields or update existing ones to have NumericDV type foreach (string f in dvUpdates.NumericDVUpdates.Keys) { builder.AddOrUpdate(f, NumericDocValuesField.TYPE); } // create new fields or update existing ones to have BinaryDV type foreach (string f in dvUpdates.BinaryDVUpdates.Keys) { builder.AddOrUpdate(f, BinaryDocValuesField.fType); } fieldInfos = builder.Finish(); long nextFieldInfosGen = Info.NextFieldInfosGen; string segmentSuffix = nextFieldInfosGen.ToString(CultureInfo.InvariantCulture);//Convert.ToString(nextFieldInfosGen, Character.MAX_RADIX)); SegmentWriteState state = new SegmentWriteState(null, trackingDir, Info.Info, fieldInfos, Writer.Config.TermIndexInterval, null, IOContext.DEFAULT, segmentSuffix); DocValuesFormat docValuesFormat = codec.DocValuesFormat(); DocValuesConsumer fieldsConsumer = docValuesFormat.FieldsConsumer(state); bool fieldsConsumerSuccess = false; try { // System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeFieldUpdates: applying numeric updates; seg=" + info + " updates=" + numericFieldUpdates); foreach (KeyValuePair <string, NumericDocValuesFieldUpdates> e in dvUpdates.NumericDVUpdates) { string field = e.Key; NumericDocValuesFieldUpdates fieldUpdates = e.Value; FieldInfo fieldInfo = fieldInfos.FieldInfo(field); Debug.Assert(fieldInfo != null); fieldInfo.DocValuesGen = nextFieldInfosGen; // write the numeric updates to a new gen'd docvalues file fieldsConsumer.AddNumericField(fieldInfo, GetLongEnumerable(reader, field, fieldUpdates)); } // System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " updates=" + dvUpdates.binaryDVUpdates); foreach (KeyValuePair <string, BinaryDocValuesFieldUpdates> e in dvUpdates.BinaryDVUpdates) { string field = e.Key; BinaryDocValuesFieldUpdates dvFieldUpdates = e.Value; FieldInfo fieldInfo = fieldInfos.FieldInfo(field); Debug.Assert(fieldInfo != null); // System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " f=" + dvFieldUpdates + ", updates=" + dvFieldUpdates); fieldInfo.DocValuesGen = nextFieldInfosGen; // write the numeric updates to a new gen'd docvalues file fieldsConsumer.AddBinaryField(fieldInfo, GetBytesRefEnumerable(reader, field, dvFieldUpdates)); } codec.FieldInfosFormat().FieldInfosWriter.Write(trackingDir, Info.Info.Name, segmentSuffix, fieldInfos, IOContext.DEFAULT); fieldsConsumerSuccess = true; } finally { if (fieldsConsumerSuccess) { fieldsConsumer.Dispose(); } else { IOUtils.CloseWhileHandlingException(fieldsConsumer); } } } finally { if (reader != this.Reader) { // System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeLiveDocs: closeReader " + reader); reader.Dispose(); } } success = true; } finally { if (!success) { // Advance only the nextWriteDocValuesGen so that a 2nd // attempt to write will write to a new file Info.AdvanceNextWriteFieldInfosGen(); // Delete any partially created file(s): foreach (string fileName in trackingDir.CreatedFiles) { try { dir.DeleteFile(fileName); } catch (Exception) { // Ignore so we throw only the first exc } } } } Info.AdvanceFieldInfosGen(); // copy all the updates to mergingUpdates, so they can later be applied to the merged segment if (IsMerging) { foreach (KeyValuePair <string, NumericDocValuesFieldUpdates> e in dvUpdates.NumericDVUpdates) { DocValuesFieldUpdates updates; if (!MergingDVUpdates.TryGetValue(e.Key, out updates)) { MergingDVUpdates[e.Key] = e.Value; } else { updates.Merge(e.Value); } } foreach (KeyValuePair <string, BinaryDocValuesFieldUpdates> e in dvUpdates.BinaryDVUpdates) { DocValuesFieldUpdates updates; if (!MergingDVUpdates.TryGetValue(e.Key, out updates)) { MergingDVUpdates[e.Key] = e.Value; } else { updates.Merge(e.Value); } } } // create a new map, keeping only the gens that are in use IDictionary <long, ISet <string> > genUpdatesFiles = Info.UpdatesFiles; IDictionary <long, ISet <string> > newGenUpdatesFiles = new Dictionary <long, ISet <string> >(); long fieldInfosGen = Info.FieldInfosGen; foreach (FieldInfo fi in fieldInfos) { long dvGen = fi.DocValuesGen; if (dvGen != -1 && !newGenUpdatesFiles.ContainsKey(dvGen)) { if (dvGen == fieldInfosGen) { newGenUpdatesFiles[fieldInfosGen] = trackingDir.CreatedFiles; } else { newGenUpdatesFiles[dvGen] = genUpdatesFiles[dvGen]; } } } Info.GenUpdatesFiles = newGenUpdatesFiles; // wrote new files, should checkpoint() Writer.Checkpoint(); // if there is a reader open, reopen it to reflect the updates if (Reader != null) { SegmentReader newReader = new SegmentReader(Info, Reader, LiveDocs_Renamed, Info.Info.DocCount - Info.DelCount - PendingDeleteCount_Renamed); bool reopened = false; try { Reader.DecRef(); Reader = newReader; reopened = true; } finally { if (!reopened) { newReader.DecRef(); } } } } }
/// <summary> /// Flush all pending docs to a new segment </summary> internal virtual FlushedSegment Flush() { Debug.Assert(numDocsInRAM > 0); Debug.Assert(DeleteSlice.Empty, "all deletes must be applied in prepareFlush"); SegmentInfo_Renamed.DocCount = numDocsInRAM; SegmentWriteState flushState = new SegmentWriteState(InfoStream, Directory, SegmentInfo_Renamed, FieldInfos.Finish(), IndexWriterConfig.TermIndexInterval, PendingUpdates, new IOContext(new FlushInfo(numDocsInRAM, BytesUsed()))); double startMBUsed = BytesUsed() / 1024.0 / 1024.0; // Apply delete-by-docID now (delete-byDocID only // happens when an exception is hit processing that // doc, eg if analyzer has some problem w/ the text): if (PendingUpdates.DocIDs.Count > 0) { flushState.LiveDocs = Codec.LiveDocsFormat().NewLiveDocs(numDocsInRAM); foreach (int delDocID in PendingUpdates.DocIDs) { flushState.LiveDocs.Clear(delDocID); } flushState.DelCountOnFlush = PendingUpdates.DocIDs.Count; PendingUpdates.BytesUsed.AddAndGet(-PendingUpdates.DocIDs.Count * BufferedUpdates.BYTES_PER_DEL_DOCID); PendingUpdates.DocIDs.Clear(); } if (Aborting) { if (InfoStream.IsEnabled("DWPT")) { InfoStream.Message("DWPT", "flush: skip because aborting is set"); } return(null); } if (InfoStream.IsEnabled("DWPT")) { InfoStream.Message("DWPT", "flush postings as segment " + flushState.SegmentInfo.Name + " numDocs=" + numDocsInRAM); } bool success = false; try { Consumer.Flush(flushState); PendingUpdates.Terms.Clear(); SegmentInfo_Renamed.Files = new HashSet <string>(Directory.CreatedFiles); SegmentCommitInfo segmentInfoPerCommit = new SegmentCommitInfo(SegmentInfo_Renamed, 0, -1L, -1L); if (InfoStream.IsEnabled("DWPT")) { InfoStream.Message("DWPT", "new segment has " + (flushState.LiveDocs == null ? 0 : (flushState.SegmentInfo.DocCount - flushState.DelCountOnFlush)) + " deleted docs"); InfoStream.Message("DWPT", "new segment has " + (flushState.FieldInfos.HasVectors() ? "vectors" : "no vectors") + "; " + (flushState.FieldInfos.HasNorms() ? "norms" : "no norms") + "; " + (flushState.FieldInfos.HasDocValues() ? "docValues" : "no docValues") + "; " + (flushState.FieldInfos.HasProx() ? "prox" : "no prox") + "; " + (flushState.FieldInfos.HasFreq() ? "freqs" : "no freqs")); InfoStream.Message("DWPT", "flushedFiles=" + segmentInfoPerCommit.Files()); InfoStream.Message("DWPT", "flushed codec=" + Codec); } BufferedUpdates segmentDeletes; if (PendingUpdates.Queries.Count == 0 && PendingUpdates.NumericUpdates.Count == 0 && PendingUpdates.BinaryUpdates.Count == 0) { PendingUpdates.Clear(); segmentDeletes = null; } else { segmentDeletes = PendingUpdates; } if (InfoStream.IsEnabled("DWPT")) { double newSegmentSize = segmentInfoPerCommit.SizeInBytes() / 1024.0 / 1024.0; InfoStream.Message("DWPT", "flushed: segment=" + SegmentInfo_Renamed.Name + " ramUsed=" + startMBUsed.ToString(Nf) + " MB" + " newFlushedSize(includes docstores)=" + newSegmentSize.ToString(Nf) + " MB" + " docs/MB=" + (flushState.SegmentInfo.DocCount / newSegmentSize).ToString(Nf)); } Debug.Assert(SegmentInfo_Renamed != null); FlushedSegment fs = new FlushedSegment(segmentInfoPerCommit, flushState.FieldInfos, segmentDeletes, flushState.LiveDocs, flushState.DelCountOnFlush); SealFlushedSegment(fs); success = true; return(fs); } finally { if (!success) { Abort(FilesToDelete); } } }