Exemple #1
        public DocumentsWriterPerThread(string segmentName, Directory directory, LiveIndexWriterConfig indexWriterConfig, InfoStream infoStream, DocumentsWriterDeleteQueue deleteQueue, FieldInfos.Builder fieldInfos)
            this.directoryOrig       = directory;
            this.directory           = new TrackingDirectoryWrapper(directory);
            this.fieldInfos          = fieldInfos;
            this.indexWriterConfig   = indexWriterConfig;
            this.infoStream          = infoStream;
            this.codec               = indexWriterConfig.Codec;
            this.docState            = new DocState(this, infoStream);
            this.docState.similarity = indexWriterConfig.Similarity;
            bytesUsed          = Counter.NewCounter();
            byteBlockAllocator = new DirectTrackingAllocator(bytesUsed);
            pendingUpdates     = new BufferedUpdates();
            intBlockAllocator  = new Int32BlockAllocator(bytesUsed);
            this.deleteQueue   = deleteQueue;
            Debug.Assert(numDocsInRAM == 0, "num docs " + numDocsInRAM);
            deleteSlice = deleteQueue.NewSlice();

            segmentInfo = new SegmentInfo(directoryOrig, Constants.LUCENE_MAIN_VERSION, segmentName, -1, false, codec, null);
            Debug.Assert(numDocsInRAM == 0);
            if (INFO_VERBOSE && infoStream.IsEnabled("DWPT"))
                infoStream.Message("DWPT", Thread.CurrentThread.Name + " init seg=" + segmentName + " delQueue=" + deleteQueue);
            // this should be the last call in the ctor
            // it really sucks that we need to pull this within the ctor and pass this ref to the chain!
            consumer = indexWriterConfig.IndexingChain.GetChain(this);
Exemple #2
 private void EnsureInitialized(ThreadState state)
     if (state.IsActive && state.dwpt == null)
         FieldInfos.Builder infos = new FieldInfos.Builder(writer.globalFieldNumberMap);
         state.dwpt = new DocumentsWriterPerThread(writer.NewSegmentName(), directory, config, infoStream, deleteQueue, infos);
        /// <summary>
        /// Call this to get the (merged) <see cref="FieldInfos"/> for a
        /// composite reader.
        /// <para/>
        /// NOTE: the returned field numbers will likely not
        /// correspond to the actual field numbers in the underlying
        /// readers, and codec metadata (<see cref="FieldInfo.GetAttribute(string)"/>)
        /// will be unavailable.
        /// </summary>
        public static FieldInfos GetMergedFieldInfos(IndexReader reader)
            var builder = new FieldInfos.Builder();

            foreach (AtomicReaderContext ctx in reader.Leaves)
 // note, just like in codec apis Directory 'dir' is NOT the same as segmentInfo.dir!!
 internal SegmentMerger(IList <AtomicReader> readers, SegmentInfo segmentInfo, InfoStream infoStream, Directory dir, int termIndexInterval, CheckAbort checkAbort, FieldInfos.FieldNumbers fieldNumbers, IOContext context, bool validate)
     // validate incoming readers
     if (validate)
         foreach (AtomicReader reader in readers)
     mergeState                      = new MergeState(readers, segmentInfo, infoStream, checkAbort);
     directory                       = dir;
     this.termIndexInterval          = termIndexInterval;
     this.codec                      = segmentInfo.Codec;
     this.context                    = context;
     this.fieldInfosBuilder          = new FieldInfos.Builder(fieldNumbers);
     mergeState.SegmentInfo.DocCount = SetDocMaps();
Exemple #5
        public virtual void WriteFieldUpdates(Directory dir, DocValuesFieldUpdates.Container dvUpdates)
                if (Debugging.AssertsEnabled)
                //System.out.println("rld.writeFieldUpdates: seg=" + info + " numericFieldUpdates=" + numericFieldUpdates);

                if (Debugging.AssertsEnabled)

                // Do this so we can delete any created files on
                // exception; this saves all codecs from having to do
                // it:
                TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir);

                FieldInfos fieldInfos = null;
                bool       success    = false;
                    Codec codec = Info.Info.Codec;

                    // reader could be null e.g. for a just merged segment (from
                    // IndexWriter.commitMergedDeletes).
                    SegmentReader reader = this.reader ?? new SegmentReader(Info, writer.Config.ReaderTermsIndexDivisor, IOContext.READ_ONCE);
                        // clone FieldInfos so that we can update their dvGen separately from
                        // the reader's infos and write them to a new fieldInfos_gen file
                        FieldInfos.Builder builder = new FieldInfos.Builder(writer.globalFieldNumberMap);
                        // cannot use builder.add(reader.getFieldInfos()) because it does not
                        // clone FI.attributes as well FI.dvGen
                        foreach (FieldInfo fi in reader.FieldInfos)
                            FieldInfo clone = builder.Add(fi);
                            // copy the stuff FieldInfos.Builder doesn't copy
                            if (fi.Attributes != null)
                                foreach (KeyValuePair <string, string> e in fi.Attributes)
                                    clone.PutAttribute(e.Key, e.Value);
                            clone.DocValuesGen = fi.DocValuesGen;
                        // create new fields or update existing ones to have NumericDV type
                        foreach (string f in dvUpdates.numericDVUpdates.Keys)
                            builder.AddOrUpdate(f, NumericDocValuesField.TYPE);
                        // create new fields or update existing ones to have BinaryDV type
                        foreach (string f in dvUpdates.binaryDVUpdates.Keys)
                            builder.AddOrUpdate(f, BinaryDocValuesField.TYPE);

                        fieldInfos = builder.Finish();
                        long nextFieldInfosGen = Info.NextFieldInfosGen;
                        // LUCENENET specific: We created the segments names wrong in 4.8.0-beta00001 - 4.8.0-beta00015,
                        // so we added a switch to be able to read these indexes in later versions. This logic as well as an
                        // optimization on the first 100 segment values is implmeneted in SegmentInfos.SegmentNumberToString().
                        string            segmentSuffix   = SegmentInfos.SegmentNumberToString(nextFieldInfosGen);
                        SegmentWriteState state           = new SegmentWriteState(null, trackingDir, Info.Info, fieldInfos, writer.Config.TermIndexInterval, null, IOContext.DEFAULT, segmentSuffix);
                        DocValuesFormat   docValuesFormat = codec.DocValuesFormat;
                        DocValuesConsumer fieldsConsumer  = docValuesFormat.FieldsConsumer(state);
                        bool fieldsConsumerSuccess        = false;
                            //          System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeFieldUpdates: applying numeric updates; seg=" + info + " updates=" + numericFieldUpdates);
                            foreach (KeyValuePair <string, NumericDocValuesFieldUpdates> e in dvUpdates.numericDVUpdates)
                                string field = e.Key;
                                NumericDocValuesFieldUpdates fieldUpdates = e.Value;
                                FieldInfo fieldInfo = fieldInfos.FieldInfo(field);
                                if (Debugging.AssertsEnabled)
                                    Debugging.Assert(fieldInfo != null);

                                fieldInfo.DocValuesGen = nextFieldInfosGen;
                                // write the numeric updates to a new gen'd docvalues file
                                fieldsConsumer.AddNumericField(fieldInfo, GetInt64Enumerable(reader, field, fieldUpdates));

                            //        System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " updates=" + dvUpdates.binaryDVUpdates);
                            foreach (KeyValuePair <string, BinaryDocValuesFieldUpdates> e in dvUpdates.binaryDVUpdates)
                                string field = e.Key;
                                BinaryDocValuesFieldUpdates dvFieldUpdates = e.Value;
                                FieldInfo fieldInfo = fieldInfos.FieldInfo(field);
                                if (Debugging.AssertsEnabled)
                                    Debugging.Assert(fieldInfo != null);

                                //          System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " f=" + dvFieldUpdates + ", updates=" + dvFieldUpdates);

                                fieldInfo.DocValuesGen = nextFieldInfosGen;
                                // write the numeric updates to a new gen'd docvalues file
                                fieldsConsumer.AddBinaryField(fieldInfo, GetBytesRefEnumerable(reader, field, dvFieldUpdates));

                            codec.FieldInfosFormat.FieldInfosWriter.Write(trackingDir, Info.Info.Name, segmentSuffix, fieldInfos, IOContext.DEFAULT);
                            fieldsConsumerSuccess = true;
                            if (fieldsConsumerSuccess)
                        if (reader != this.reader)
                            //          System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeLiveDocs: closeReader " + reader);

                    success = true;
                    if (!success)
                        // Advance only the nextWriteDocValuesGen so that a 2nd
                        // attempt to write will write to a new file

                        // Delete any partially created file(s):
                        foreach (string fileName in trackingDir.CreatedFiles)
                            catch (Exception t) when(t.IsThrowable())
                                // Ignore so we throw only the first exc

                // copy all the updates to mergingUpdates, so they can later be applied to the merged segment
                if (isMerging)
                    foreach (KeyValuePair <string, NumericDocValuesFieldUpdates> e in dvUpdates.numericDVUpdates)
                        if (!mergingDVUpdates.TryGetValue(e.Key, out DocValuesFieldUpdates updates))
                            mergingDVUpdates[e.Key] = e.Value;
                    foreach (KeyValuePair <string, BinaryDocValuesFieldUpdates> e in dvUpdates.binaryDVUpdates)
                        if (!mergingDVUpdates.TryGetValue(e.Key, out DocValuesFieldUpdates updates))
                            mergingDVUpdates[e.Key] = e.Value;

                // create a new map, keeping only the gens that are in use
                IDictionary <long, ISet <string> > genUpdatesFiles    = Info.UpdatesFiles;
                IDictionary <long, ISet <string> > newGenUpdatesFiles = new Dictionary <long, ISet <string> >();
                long fieldInfosGen = Info.FieldInfosGen;
                foreach (FieldInfo fi in fieldInfos)
                    long dvGen = fi.DocValuesGen;
                    if (dvGen != -1 && !newGenUpdatesFiles.ContainsKey(dvGen))
                        if (dvGen == fieldInfosGen)
                            newGenUpdatesFiles[fieldInfosGen] = trackingDir.CreatedFiles;
                            newGenUpdatesFiles[dvGen] = genUpdatesFiles[dvGen];


                // wrote new files, should checkpoint()

                // if there is a reader open, reopen it to reflect the updates
                if (reader != null)
                    SegmentReader newReader = new SegmentReader(Info, reader, liveDocs, Info.Info.DocCount - Info.DelCount - pendingDeleteCount);
                    bool          reopened  = false;
                        reader   = newReader;
                        reopened = true;
                        if (!reopened)
Exemple #6
        /// <summary>
        /// Expert: create a <see cref="ParallelAtomicReader"/> based on the provided
        /// <paramref name="readers"/> and <paramref name="storedFieldsReaders"/>; when a document is
        /// loaded, only <paramref name="storedFieldsReaders"/> will be used.
        /// </summary>
        public ParallelAtomicReader(bool closeSubReaders, AtomicReader[] readers, AtomicReader[] storedFieldsReaders)
            this.closeSubReaders = closeSubReaders;
            if (readers.Length == 0 && storedFieldsReaders.Length > 0)
                throw new ArgumentException("There must be at least one main reader if storedFieldsReaders are used.");
            this.parallelReaders     = (AtomicReader[])readers.Clone();
            this.storedFieldsReaders = (AtomicReader[])storedFieldsReaders.Clone();
            if (parallelReaders.Length > 0)
                AtomicReader first = parallelReaders[0];
                this.maxDoc       = first.MaxDoc;
                this.numDocs      = first.NumDocs;
                this.hasDeletions = first.HasDeletions;
                this.maxDoc       = this.numDocs = 0;
                this.hasDeletions = false;

            // check compatibility:
            foreach (AtomicReader reader in completeReaderSet)
                if (reader.MaxDoc != maxDoc)
                    throw new ArgumentException("All readers must have same MaxDoc: " + maxDoc + "!=" + reader.MaxDoc);

            // TODO: make this read-only in a cleaner way?
            FieldInfos.Builder builder = new FieldInfos.Builder();
            // build FieldInfos and fieldToReader map:
            foreach (AtomicReader reader in this.parallelReaders)
                FieldInfos readerFieldInfos = reader.FieldInfos;
                foreach (FieldInfo fieldInfo in readerFieldInfos)
                    // NOTE: first reader having a given field "wins":
                    if (!fieldToReader.ContainsKey(fieldInfo.Name))
                        fieldToReader[fieldInfo.Name] = reader;
                        if (fieldInfo.HasVectors)
                            tvFieldToReader[fieldInfo.Name] = reader;
            fieldInfos = builder.Finish();

            // build Fields instance
            foreach (AtomicReader reader in this.parallelReaders)
                Fields readerFields = reader.Fields;
                if (readerFields != null)
                    foreach (string field in readerFields)
                        // only add if the reader responsible for that field name is the current:
                        if (fieldToReader[field].Equals(reader))
                            this.fields.AddField(field, readerFields.GetTerms(field));

            // do this finally so any Exceptions occurred before don't affect refcounts:
            foreach (AtomicReader reader in completeReaderSet)
                if (!closeSubReaders)
Exemple #7
 public abstract void ProcessDocument(FieldInfos.Builder fieldInfos);
        public override void ProcessDocument(FieldInfos.Builder fieldInfos)

            fieldCount = 0;

            int thisFieldGen = fieldGen++;

            // Absorb any new fields first seen in this document.
            // Also absorb any changes to fields we had already
            // seen before (eg suddenly turning on norms or
            // vectors, etc.):

            foreach (IIndexableField field in docState.doc)
                string fieldName = field.Name;

                // Make sure we have a PerField allocated
                int hashPos = fieldName.GetHashCode() & hashMask;
                DocFieldProcessorPerField fp = fieldHash[hashPos];
                while (fp != null && !fp.fieldInfo.Name.Equals(fieldName, StringComparison.Ordinal))
                    fp = fp.next;

                if (fp == null)
                    // TODO FI: we need to genericize the "flags" that a
                    // field holds, and, how these flags are merged; it
                    // needs to be more "pluggable" such that if I want
                    // to have a new "thing" my Fields can do, I can
                    // easily add it
                    FieldInfo fi = fieldInfos.AddOrUpdate(fieldName, field.IndexableFieldType);

                    fp                 = new DocFieldProcessorPerField(this, fi);
                    fp.next            = fieldHash[hashPos];
                    fieldHash[hashPos] = fp;

                    if (totalFieldCount >= fieldHash.Length / 2)
                    // need to addOrUpdate so that FieldInfos can update globalFieldNumbers
                    // with the correct DocValue type (LUCENE-5192)
                    FieldInfo fi = fieldInfos.AddOrUpdate(fieldName, field.IndexableFieldType);
                    Debug.Assert(fi == fp.fieldInfo, "should only have updated an existing FieldInfo instance");

                if (thisFieldGen != fp.lastGen)
                    // First time we're seeing this field for this doc
                    fp.fieldCount = 0;

                    if (fieldCount == fields.Length)
                        int newSize = fields.Length * 2;
                        DocFieldProcessorPerField[] newArray = new DocFieldProcessorPerField[newSize];
                        Array.Copy(fields, 0, newArray, 0, fieldCount);
                        fields = newArray;

                    fields[fieldCount++] = fp;
                    fp.lastGen           = thisFieldGen;

                storedConsumer.AddField(docState.docID, field, fp.fieldInfo);

            // If we are writing vectors then we must visit
            // fields in sorted order so they are written in
            // sorted order.  TODO: we actually only need to
            // sort the subset of fields that have vectors
            // enabled; we could save [small amount of] CPU
            // here.
            ArrayUtil.IntroSort(fields, 0, fieldCount, fieldsComp);
            for (int i = 0; i < fieldCount; i++)
                DocFieldProcessorPerField perField = fields[i];
                perField.consumer.ProcessFields(perField.fields, perField.fieldCount);