Esempio n. 1
0
        public virtual FieldInfos CreateAndWriteFieldInfos(Directory dir, string filename)
        {
            //Positive test of FieldInfos
            Assert.IsTrue(TestDoc != null);
            FieldInfos.Builder builder = new FieldInfos.Builder();
            foreach (IIndexableField field in TestDoc)
            {
                builder.AddOrUpdate(field.Name, field.IndexableFieldType);
            }
            FieldInfos fieldInfos = builder.Finish();

            //Since the complement is stored as well in the fields map
            Assert.IsTrue(fieldInfos.Count == DocHelper.All.Count); //this is all b/c we are using the no-arg constructor

            IndexOutput output = dir.CreateOutput(filename, NewIOContext(Random));

            Assert.IsTrue(output != null);
            //Use a RAMOutputStream

            FieldInfosWriter writer = Codec.Default.FieldInfosFormat.FieldInfosWriter;

            writer.Write(dir, filename, "", fieldInfos, IOContext.DEFAULT);
            output.Dispose();
            return(fieldInfos);
        }
Esempio n. 2
0
            public FieldData(TestCodecs outerInstance, string name, FieldInfos.Builder fieldInfos, TermData[] terms, bool omitTF, bool storePayloads)
            {
                this.outerInstance = outerInstance;
                this.omitTF        = omitTF;
                this.storePayloads = storePayloads;
                // TODO: change this test to use all three
                fieldInfo = fieldInfos.AddOrUpdate(name, new IndexableFieldTypeAnonymousInnerClassHelper(this, omitTF));
                if (storePayloads)
                {
                    fieldInfo.SetStorePayloads();
                }
                this.terms = terms;
                for (int i = 0; i < terms.Length; i++)
                {
                    terms[i].field = this;
                }

                Array.Sort(terms);
            }
Esempio n. 3
0
        public void BeforeClass()
        {
            TestDoc    = new Document();
            FieldInfos = new FieldInfos.Builder();
            DocHelper.SetupDoc(TestDoc);
            foreach (IndexableField field in TestDoc)
            {
                FieldInfos.AddOrUpdate(field.Name, field.FieldType);
            }
            Dir = NewDirectory();
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy());

            conf.MergePolicy.NoCFSRatio = 0.0;
            IndexWriter writer = new IndexWriter(Dir, conf);

            writer.AddDocument(TestDoc);
            writer.Dispose();
            FaultyIndexInput.DoFail = false;
        }
Esempio n. 4
0
        public virtual void WriteFieldUpdates(Directory dir, DocValuesFieldUpdates.Container dvUpdates)
        {
            UninterruptableMonitor.Enter(this);
            try
            {
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(UninterruptableMonitor.IsEntered(writer));
                }
                //System.out.println("rld.writeFieldUpdates: seg=" + info + " numericFieldUpdates=" + numericFieldUpdates);

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(dvUpdates.Any());
                }

                // Do this so we can delete any created files on
                // exception; this saves all codecs from having to do
                // it:
                TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir);

                FieldInfos fieldInfos = null;
                bool       success    = false;
                try
                {
                    Codec codec = Info.Info.Codec;

                    // reader could be null e.g. for a just merged segment (from
                    // IndexWriter.commitMergedDeletes).
                    SegmentReader reader = this.reader ?? new SegmentReader(Info, writer.Config.ReaderTermsIndexDivisor, IOContext.READ_ONCE);
                    try
                    {
                        // clone FieldInfos so that we can update their dvGen separately from
                        // the reader's infos and write them to a new fieldInfos_gen file
                        FieldInfos.Builder builder = new FieldInfos.Builder(writer.globalFieldNumberMap);
                        // cannot use builder.add(reader.getFieldInfos()) because it does not
                        // clone FI.attributes as well FI.dvGen
                        foreach (FieldInfo fi in reader.FieldInfos)
                        {
                            FieldInfo clone = builder.Add(fi);
                            // copy the stuff FieldInfos.Builder doesn't copy
                            if (fi.Attributes != null)
                            {
                                foreach (KeyValuePair <string, string> e in fi.Attributes)
                                {
                                    clone.PutAttribute(e.Key, e.Value);
                                }
                            }
                            clone.DocValuesGen = fi.DocValuesGen;
                        }
                        // create new fields or update existing ones to have NumericDV type
                        foreach (string f in dvUpdates.numericDVUpdates.Keys)
                        {
                            builder.AddOrUpdate(f, NumericDocValuesField.TYPE);
                        }
                        // create new fields or update existing ones to have BinaryDV type
                        foreach (string f in dvUpdates.binaryDVUpdates.Keys)
                        {
                            builder.AddOrUpdate(f, BinaryDocValuesField.TYPE);
                        }

                        fieldInfos = builder.Finish();
                        long nextFieldInfosGen = Info.NextFieldInfosGen;
                        // LUCENENET specific: We created the segments names wrong in 4.8.0-beta00001 - 4.8.0-beta00015,
                        // so we added a switch to be able to read these indexes in later versions. This logic as well as an
                        // optimization on the first 100 segment values is implmeneted in SegmentInfos.SegmentNumberToString().
                        string            segmentSuffix   = SegmentInfos.SegmentNumberToString(nextFieldInfosGen);
                        SegmentWriteState state           = new SegmentWriteState(null, trackingDir, Info.Info, fieldInfos, writer.Config.TermIndexInterval, null, IOContext.DEFAULT, segmentSuffix);
                        DocValuesFormat   docValuesFormat = codec.DocValuesFormat;
                        DocValuesConsumer fieldsConsumer  = docValuesFormat.FieldsConsumer(state);
                        bool fieldsConsumerSuccess        = false;
                        try
                        {
                            //          System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeFieldUpdates: applying numeric updates; seg=" + info + " updates=" + numericFieldUpdates);
                            foreach (KeyValuePair <string, NumericDocValuesFieldUpdates> e in dvUpdates.numericDVUpdates)
                            {
                                string field = e.Key;
                                NumericDocValuesFieldUpdates fieldUpdates = e.Value;
                                FieldInfo fieldInfo = fieldInfos.FieldInfo(field);
                                if (Debugging.AssertsEnabled)
                                {
                                    Debugging.Assert(fieldInfo != null);
                                }

                                fieldInfo.DocValuesGen = nextFieldInfosGen;
                                // write the numeric updates to a new gen'd docvalues file
                                fieldsConsumer.AddNumericField(fieldInfo, GetInt64Enumerable(reader, field, fieldUpdates));
                            }

                            //        System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " updates=" + dvUpdates.binaryDVUpdates);
                            foreach (KeyValuePair <string, BinaryDocValuesFieldUpdates> e in dvUpdates.binaryDVUpdates)
                            {
                                string field = e.Key;
                                BinaryDocValuesFieldUpdates dvFieldUpdates = e.Value;
                                FieldInfo fieldInfo = fieldInfos.FieldInfo(field);
                                if (Debugging.AssertsEnabled)
                                {
                                    Debugging.Assert(fieldInfo != null);
                                }

                                //          System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " f=" + dvFieldUpdates + ", updates=" + dvFieldUpdates);

                                fieldInfo.DocValuesGen = nextFieldInfosGen;
                                // write the numeric updates to a new gen'd docvalues file
                                fieldsConsumer.AddBinaryField(fieldInfo, GetBytesRefEnumerable(reader, field, dvFieldUpdates));
                            }

                            codec.FieldInfosFormat.FieldInfosWriter.Write(trackingDir, Info.Info.Name, segmentSuffix, fieldInfos, IOContext.DEFAULT);
                            fieldsConsumerSuccess = true;
                        }
                        finally
                        {
                            if (fieldsConsumerSuccess)
                            {
                                fieldsConsumer.Dispose();
                            }
                            else
                            {
                                IOUtils.DisposeWhileHandlingException(fieldsConsumer);
                            }
                        }
                    }
                    finally
                    {
                        if (reader != this.reader)
                        {
                            //          System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeLiveDocs: closeReader " + reader);
                            reader.Dispose();
                        }
                    }

                    success = true;
                }
                finally
                {
                    if (!success)
                    {
                        // Advance only the nextWriteDocValuesGen so that a 2nd
                        // attempt to write will write to a new file
                        Info.AdvanceNextWriteFieldInfosGen();

                        // Delete any partially created file(s):
                        foreach (string fileName in trackingDir.CreatedFiles)
                        {
                            try
                            {
                                dir.DeleteFile(fileName);
                            }
                            catch (Exception t) when(t.IsThrowable())
                            {
                                // Ignore so we throw only the first exc
                            }
                        }
                    }
                }

                Info.AdvanceFieldInfosGen();
                // copy all the updates to mergingUpdates, so they can later be applied to the merged segment
                if (isMerging)
                {
                    foreach (KeyValuePair <string, NumericDocValuesFieldUpdates> e in dvUpdates.numericDVUpdates)
                    {
                        if (!mergingDVUpdates.TryGetValue(e.Key, out DocValuesFieldUpdates updates))
                        {
                            mergingDVUpdates[e.Key] = e.Value;
                        }
                        else
                        {
                            updates.Merge(e.Value);
                        }
                    }
                    foreach (KeyValuePair <string, BinaryDocValuesFieldUpdates> e in dvUpdates.binaryDVUpdates)
                    {
                        if (!mergingDVUpdates.TryGetValue(e.Key, out DocValuesFieldUpdates updates))
                        {
                            mergingDVUpdates[e.Key] = e.Value;
                        }
                        else
                        {
                            updates.Merge(e.Value);
                        }
                    }
                }

                // create a new map, keeping only the gens that are in use
                IDictionary <long, ISet <string> > genUpdatesFiles    = Info.UpdatesFiles;
                IDictionary <long, ISet <string> > newGenUpdatesFiles = new Dictionary <long, ISet <string> >();
                long fieldInfosGen = Info.FieldInfosGen;
                foreach (FieldInfo fi in fieldInfos)
                {
                    long dvGen = fi.DocValuesGen;
                    if (dvGen != -1 && !newGenUpdatesFiles.ContainsKey(dvGen))
                    {
                        if (dvGen == fieldInfosGen)
                        {
                            newGenUpdatesFiles[fieldInfosGen] = trackingDir.CreatedFiles;
                        }
                        else
                        {
                            newGenUpdatesFiles[dvGen] = genUpdatesFiles[dvGen];
                        }
                    }
                }

                Info.SetGenUpdatesFiles(newGenUpdatesFiles);

                // wrote new files, should checkpoint()
                writer.Checkpoint();

                // if there is a reader open, reopen it to reflect the updates
                if (reader != null)
                {
                    SegmentReader newReader = new SegmentReader(Info, reader, liveDocs, Info.Info.DocCount - Info.DelCount - pendingDeleteCount);
                    bool          reopened  = false;
                    try
                    {
                        reader.DecRef();
                        reader   = newReader;
                        reopened = true;
                    }
                    finally
                    {
                        if (!reopened)
                        {
                            newReader.DecRef();
                        }
                    }
                }
            }
            finally
            {
                UninterruptableMonitor.Exit(this);
            }
        }
Esempio n. 5
0
        public override void ProcessDocument(FieldInfos.Builder fieldInfos)
        {
            consumer.StartDocument();
            storedConsumer.StartDocument();

            fieldCount = 0;

            int thisFieldGen = fieldGen++;

            // Absorb any new fields first seen in this document.
            // Also absorb any changes to fields we had already
            // seen before (eg suddenly turning on norms or
            // vectors, etc.):

            foreach (IIndexableField field in docState.doc)
            {
                string fieldName = field.Name;

                // Make sure we have a PerField allocated
                int hashPos = fieldName.GetHashCode() & hashMask;
                DocFieldProcessorPerField fp = fieldHash[hashPos];
                while (fp != null && !fp.fieldInfo.Name.Equals(fieldName, StringComparison.Ordinal))
                {
                    fp = fp.next;
                }

                if (fp == null)
                {
                    // TODO FI: we need to genericize the "flags" that a
                    // field holds, and, how these flags are merged; it
                    // needs to be more "pluggable" such that if I want
                    // to have a new "thing" my Fields can do, I can
                    // easily add it
                    FieldInfo fi = fieldInfos.AddOrUpdate(fieldName, field.IndexableFieldType);

                    fp                 = new DocFieldProcessorPerField(this, fi);
                    fp.next            = fieldHash[hashPos];
                    fieldHash[hashPos] = fp;
                    totalFieldCount++;

                    if (totalFieldCount >= fieldHash.Length / 2)
                    {
                        Rehash();
                    }
                }
                else
                {
                    // need to addOrUpdate so that FieldInfos can update globalFieldNumbers
                    // with the correct DocValue type (LUCENE-5192)
                    FieldInfo fi = fieldInfos.AddOrUpdate(fieldName, field.IndexableFieldType);
                    Debug.Assert(fi == fp.fieldInfo, "should only have updated an existing FieldInfo instance");
                }

                if (thisFieldGen != fp.lastGen)
                {
                    // First time we're seeing this field for this doc
                    fp.fieldCount = 0;

                    if (fieldCount == fields.Length)
                    {
                        int newSize = fields.Length * 2;
                        DocFieldProcessorPerField[] newArray = new DocFieldProcessorPerField[newSize];
                        Array.Copy(fields, 0, newArray, 0, fieldCount);
                        fields = newArray;
                    }

                    fields[fieldCount++] = fp;
                    fp.lastGen           = thisFieldGen;
                }

                fp.AddField(field);
                storedConsumer.AddField(docState.docID, field, fp.fieldInfo);
            }

            // If we are writing vectors then we must visit
            // fields in sorted order so they are written in
            // sorted order.  TODO: we actually only need to
            // sort the subset of fields that have vectors
            // enabled; we could save [small amount of] CPU
            // here.
            ArrayUtil.IntroSort(fields, 0, fieldCount, fieldsComp);
            for (int i = 0; i < fieldCount; i++)
            {
                DocFieldProcessorPerField perField = fields[i];
                perField.consumer.ProcessFields(perField.fields, perField.fieldCount);
            }
        }
Esempio n. 6
0
        // Writes field updates (new _X_N updates files) to the directory
        public virtual void WriteFieldUpdates(Directory dir, DocValuesFieldUpdates.Container dvUpdates)
        {
            lock (this)
            {
                //Debug.Assert(Thread.holdsLock(Writer));
                //System.out.println("rld.writeFieldUpdates: seg=" + info + " numericFieldUpdates=" + numericFieldUpdates);

                Debug.Assert(dvUpdates.Any());

                // Do this so we can delete any created files on
                // exception; this saves all codecs from having to do
                // it:
                TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir);

                FieldInfos fieldInfos = null;
                bool       success    = false;
                try
                {
                    Codec codec = Info.Info.Codec;

                    // reader could be null e.g. for a just merged segment (from
                    // IndexWriter.commitMergedDeletes).
                    SegmentReader reader = this.Reader == null ? new SegmentReader(Info, Writer.Config.ReaderTermsIndexDivisor, IOContext.READONCE) : this.Reader;
                    try
                    {
                        // clone FieldInfos so that we can update their dvGen separately from
                        // the reader's infos and write them to a new fieldInfos_gen file
                        FieldInfos.Builder builder = new FieldInfos.Builder(Writer.GlobalFieldNumberMap);
                        // cannot use builder.add(reader.getFieldInfos()) because it does not
                        // clone FI.attributes as well FI.dvGen
                        foreach (FieldInfo fi in reader.FieldInfos)
                        {
                            FieldInfo clone = builder.Add(fi);
                            // copy the stuff FieldInfos.Builder doesn't copy
                            if (fi.Attributes() != null)
                            {
                                foreach (KeyValuePair <string, string> e in fi.Attributes())
                                {
                                    clone.PutAttribute(e.Key, e.Value);
                                }
                            }
                            clone.DocValuesGen = fi.DocValuesGen;
                        }
                        // create new fields or update existing ones to have NumericDV type
                        foreach (string f in dvUpdates.NumericDVUpdates.Keys)
                        {
                            builder.AddOrUpdate(f, NumericDocValuesField.TYPE);
                        }
                        // create new fields or update existing ones to have BinaryDV type
                        foreach (string f in dvUpdates.BinaryDVUpdates.Keys)
                        {
                            builder.AddOrUpdate(f, BinaryDocValuesField.fType);
                        }

                        fieldInfos = builder.Finish();
                        long              nextFieldInfosGen     = Info.NextFieldInfosGen;
                        string            segmentSuffix         = nextFieldInfosGen.ToString(CultureInfo.InvariantCulture);//Convert.ToString(nextFieldInfosGen, Character.MAX_RADIX));
                        SegmentWriteState state                 = new SegmentWriteState(null, trackingDir, Info.Info, fieldInfos, Writer.Config.TermIndexInterval, null, IOContext.DEFAULT, segmentSuffix);
                        DocValuesFormat   docValuesFormat       = codec.DocValuesFormat();
                        DocValuesConsumer fieldsConsumer        = docValuesFormat.FieldsConsumer(state);
                        bool              fieldsConsumerSuccess = false;
                        try
                        {
                            //          System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeFieldUpdates: applying numeric updates; seg=" + info + " updates=" + numericFieldUpdates);
                            foreach (KeyValuePair <string, NumericDocValuesFieldUpdates> e in dvUpdates.NumericDVUpdates)
                            {
                                string field = e.Key;
                                NumericDocValuesFieldUpdates fieldUpdates = e.Value;
                                FieldInfo fieldInfo = fieldInfos.FieldInfo(field);
                                Debug.Assert(fieldInfo != null);

                                fieldInfo.DocValuesGen = nextFieldInfosGen;
                                // write the numeric updates to a new gen'd docvalues file
                                fieldsConsumer.AddNumericField(fieldInfo, GetLongEnumerable(reader, field, fieldUpdates));
                            }

                            //        System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " updates=" + dvUpdates.binaryDVUpdates);
                            foreach (KeyValuePair <string, BinaryDocValuesFieldUpdates> e in dvUpdates.BinaryDVUpdates)
                            {
                                string field = e.Key;
                                BinaryDocValuesFieldUpdates dvFieldUpdates = e.Value;
                                FieldInfo fieldInfo = fieldInfos.FieldInfo(field);
                                Debug.Assert(fieldInfo != null);

                                //          System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " f=" + dvFieldUpdates + ", updates=" + dvFieldUpdates);

                                fieldInfo.DocValuesGen = nextFieldInfosGen;
                                // write the numeric updates to a new gen'd docvalues file
                                fieldsConsumer.AddBinaryField(fieldInfo, GetBytesRefEnumerable(reader, field, dvFieldUpdates));
                            }

                            codec.FieldInfosFormat().FieldInfosWriter.Write(trackingDir, Info.Info.Name, segmentSuffix, fieldInfos, IOContext.DEFAULT);
                            fieldsConsumerSuccess = true;
                        }
                        finally
                        {
                            if (fieldsConsumerSuccess)
                            {
                                fieldsConsumer.Dispose();
                            }
                            else
                            {
                                IOUtils.CloseWhileHandlingException(fieldsConsumer);
                            }
                        }
                    }
                    finally
                    {
                        if (reader != this.Reader)
                        {
                            //          System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeLiveDocs: closeReader " + reader);
                            reader.Dispose();
                        }
                    }

                    success = true;
                }
                finally
                {
                    if (!success)
                    {
                        // Advance only the nextWriteDocValuesGen so that a 2nd
                        // attempt to write will write to a new file
                        Info.AdvanceNextWriteFieldInfosGen();

                        // Delete any partially created file(s):
                        foreach (string fileName in trackingDir.CreatedFiles)
                        {
                            try
                            {
                                dir.DeleteFile(fileName);
                            }
                            catch (Exception)
                            {
                                // Ignore so we throw only the first exc
                            }
                        }
                    }
                }

                Info.AdvanceFieldInfosGen();
                // copy all the updates to mergingUpdates, so they can later be applied to the merged segment
                if (IsMerging)
                {
                    foreach (KeyValuePair <string, NumericDocValuesFieldUpdates> e in dvUpdates.NumericDVUpdates)
                    {
                        DocValuesFieldUpdates updates;
                        if (!MergingDVUpdates.TryGetValue(e.Key, out updates))
                        {
                            MergingDVUpdates[e.Key] = e.Value;
                        }
                        else
                        {
                            updates.Merge(e.Value);
                        }
                    }
                    foreach (KeyValuePair <string, BinaryDocValuesFieldUpdates> e in dvUpdates.BinaryDVUpdates)
                    {
                        DocValuesFieldUpdates updates;
                        if (!MergingDVUpdates.TryGetValue(e.Key, out updates))
                        {
                            MergingDVUpdates[e.Key] = e.Value;
                        }
                        else
                        {
                            updates.Merge(e.Value);
                        }
                    }
                }

                // create a new map, keeping only the gens that are in use
                IDictionary <long, ISet <string> > genUpdatesFiles    = Info.UpdatesFiles;
                IDictionary <long, ISet <string> > newGenUpdatesFiles = new Dictionary <long, ISet <string> >();
                long fieldInfosGen = Info.FieldInfosGen;
                foreach (FieldInfo fi in fieldInfos)
                {
                    long dvGen = fi.DocValuesGen;
                    if (dvGen != -1 && !newGenUpdatesFiles.ContainsKey(dvGen))
                    {
                        if (dvGen == fieldInfosGen)
                        {
                            newGenUpdatesFiles[fieldInfosGen] = trackingDir.CreatedFiles;
                        }
                        else
                        {
                            newGenUpdatesFiles[dvGen] = genUpdatesFiles[dvGen];
                        }
                    }
                }

                Info.GenUpdatesFiles = newGenUpdatesFiles;

                // wrote new files, should checkpoint()
                Writer.Checkpoint();

                // if there is a reader open, reopen it to reflect the updates
                if (Reader != null)
                {
                    SegmentReader newReader = new SegmentReader(Info, Reader, LiveDocs_Renamed, Info.Info.DocCount - Info.DelCount - PendingDeleteCount_Renamed);
                    bool          reopened  = false;
                    try
                    {
                        Reader.DecRef();
                        Reader   = newReader;
                        reopened = true;
                    }
                    finally
                    {
                        if (!reopened)
                        {
                            newReader.DecRef();
                        }
                    }
                }
            }
        }