Esempio n. 1
0
        public virtual FieldInfos CreateAndWriteFieldInfos(Directory dir, string filename)
        {
            //Positive test of FieldInfos
            Assert.IsTrue(TestDoc != null);
            FieldInfos.Builder builder = new FieldInfos.Builder();
            foreach (IIndexableField field in TestDoc)
            {
                builder.AddOrUpdate(field.Name, field.IndexableFieldType);
            }
            FieldInfos fieldInfos = builder.Finish();

            //Since the complement is stored as well in the fields map
            Assert.IsTrue(fieldInfos.Count == DocHelper.All.Count); //this is all b/c we are using the no-arg constructor

            IndexOutput output = dir.CreateOutput(filename, NewIOContext(Random));

            Assert.IsTrue(output != null);
            //Use a RAMOutputStream

            FieldInfosWriter writer = Codec.Default.FieldInfosFormat.FieldInfosWriter;

            writer.Write(dir, filename, "", fieldInfos, IOContext.DEFAULT);
            output.Dispose();
            return(fieldInfos);
        }
Esempio n. 2
0
        /// <summary>
        /// Call this to get the (merged) FieldInfos for a
        ///  composite reader.
        ///  <p>
        ///  NOTE: the returned field numbers will likely not
        ///  correspond to the actual field numbers in the underlying
        ///  readers, and codec metadata (<seealso cref="FieldInfo#getAttribute(String)"/>
        ///  will be unavailable.
        /// </summary>
        public static FieldInfos GetMergedFieldInfos(IndexReader reader)
        {
            var builder = new FieldInfos.Builder();

            foreach (AtomicReaderContext ctx in reader.Leaves)
            {
                builder.Add(ctx.AtomicReader.FieldInfos);
            }
            return(builder.Finish());
        }
Esempio n. 3
0
        public virtual void TestRandomPostings()
        {
            FieldInfos.Builder builder = new FieldInfos.Builder();

            FieldData[] fields = new FieldData[NUM_FIELDS];
            for (int i = 0; i < NUM_FIELDS; i++)
            {
                bool omitTF        = 0 == (i % 3);
                bool storePayloads = 1 == (i % 3);
                fields[i] = new FieldData(this, fieldNames[i], builder, this.MakeRandomTerms(omitTF, storePayloads), omitTF, storePayloads);
            }

            // LUCENENET specific - BUG: we must wrap this in a using block in case anything in the below loop throws
            using (Directory dir = NewDirectory())
            {
                FieldInfos fieldInfos = builder.Finish();

                if (Verbose)
                {
                    Console.WriteLine("TEST: now write postings");
                }

                this.Write(fieldInfos, dir, fields, false);
                Codec       codec = Codec.Default;
                SegmentInfo si    = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null);

                if (Verbose)
                {
                    Console.WriteLine("TEST: now read postings");
                }

                // LUCENENET specific - BUG: we must wrap this in a using block in case anything in the below loop throws
                using (FieldsProducer terms = codec.PostingsFormat.FieldsProducer(new SegmentReadState(dir, si, fieldInfos, NewIOContext(Random), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR)))
                {
                    Verify[] threads = new Verify[NUM_TEST_THREADS - 1];
                    for (int i = 0; i < NUM_TEST_THREADS - 1; i++)
                    {
                        threads[i] = new Verify(this, si, fields, terms);
                        threads[i].IsBackground = (true);
                        threads[i].Start();
                    }

                    (new Verify(this, si, fields, terms)).Run();

                    for (int i = 0; i < NUM_TEST_THREADS - 1; i++)
                    {
                        threads[i].Join();
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(!threads[i].failed);
                        }
                    }
                }
            }
        }
Esempio n. 4
0
 public void MergeFieldInfos()
 {
     foreach (AtomicReader reader in mergeState.Readers)
     {
         FieldInfos readerFieldInfos = reader.FieldInfos;
         foreach (FieldInfo fi in readerFieldInfos)
         {
             fieldInfosBuilder.Add(fi);
         }
     }
     mergeState.FieldInfos = fieldInfosBuilder.Finish();
 }
Esempio n. 5
0
        public virtual void TestRandomPostings()
        {
            FieldInfos.Builder builder = new FieldInfos.Builder();

            FieldData[] fields = new FieldData[NUM_FIELDS];
            for (int i = 0; i < NUM_FIELDS; i++)
            {
                bool omitTF        = 0 == (i % 3);
                bool storePayloads = 1 == (i % 3);
                fields[i] = new FieldData(this, FieldNames[i], builder, this.MakeRandomTerms(omitTF, storePayloads), omitTF, storePayloads);
            }

            Directory  dir        = NewDirectory();
            FieldInfos fieldInfos = builder.Finish();

            if (VERBOSE)
            {
                Console.WriteLine("TEST: now write postings");
            }

            this.Write(fieldInfos, dir, fields, false);
            Codec       codec = Codec.Default;
            SegmentInfo si    = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: now read postings");
            }
            FieldsProducer terms = codec.PostingsFormat().FieldsProducer(new SegmentReadState(dir, si, fieldInfos, NewIOContext(Random()), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR));

            Verify[] threads = new Verify[NUM_TEST_THREADS - 1];
            for (int i = 0; i < NUM_TEST_THREADS - 1; i++)
            {
                threads[i] = new Verify(this, si, fields, terms);
                threads[i].SetDaemon(true);
                threads[i].Start();
            }

            (new Verify(this, si, fields, terms)).Run();

            for (int i = 0; i < NUM_TEST_THREADS - 1; i++)
            {
                threads[i].Join();
                Debug.Assert(!threads[i].Failed);
            }

            terms.Dispose();
            dir.Dispose();
        }
        internal virtual FlushedSegment Flush()
        {
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(numDocsInRAM > 0);
                Debugging.Assert(deleteSlice.IsEmpty, "all deletes must be applied in prepareFlush");
            }
            segmentInfo.DocCount = numDocsInRAM;
            SegmentWriteState flushState  = new SegmentWriteState(infoStream, directory, segmentInfo, fieldInfos.Finish(), indexWriterConfig.TermIndexInterval, pendingUpdates, new IOContext(new FlushInfo(numDocsInRAM, BytesUsed)));
            double            startMBUsed = BytesUsed / 1024.0 / 1024.0;

            // Apply delete-by-docID now (delete-byDocID only
            // happens when an exception is hit processing that
            // doc, eg if analyzer has some problem w/ the text):
            if (pendingUpdates.docIDs.Count > 0)
            {
                flushState.LiveDocs = codec.LiveDocsFormat.NewLiveDocs(numDocsInRAM);
                foreach (int delDocID in pendingUpdates.docIDs)
                {
                    flushState.LiveDocs.Clear(delDocID);
                }
                flushState.DelCountOnFlush = pendingUpdates.docIDs.Count;
                pendingUpdates.bytesUsed.AddAndGet(-pendingUpdates.docIDs.Count * BufferedUpdates.BYTES_PER_DEL_DOCID);
                pendingUpdates.docIDs.Clear();
            }

            if (aborting)
            {
                if (infoStream.IsEnabled("DWPT"))
                {
                    infoStream.Message("DWPT", "flush: skip because aborting is set");
                }
                return(null);
            }

            if (infoStream.IsEnabled("DWPT"))
            {
                infoStream.Message("DWPT", "flush postings as segment " + flushState.SegmentInfo.Name + " numDocs=" + numDocsInRAM);
            }

            bool success = false;

            try
            {
                consumer.Flush(flushState);
                pendingUpdates.terms.Clear();
                segmentInfo.SetFiles(new JCG.HashSet <string>(directory.CreatedFiles));

                SegmentCommitInfo segmentInfoPerCommit = new SegmentCommitInfo(segmentInfo, 0, -1L, -1L);
                if (infoStream.IsEnabled("DWPT"))
                {
                    infoStream.Message("DWPT", "new segment has " + (flushState.LiveDocs == null ? 0 : (flushState.SegmentInfo.DocCount - flushState.DelCountOnFlush)) + " deleted docs");
                    infoStream.Message("DWPT", "new segment has " + (flushState.FieldInfos.HasVectors ? "vectors" : "no vectors") + "; " + (flushState.FieldInfos.HasNorms ? "norms" : "no norms") + "; " + (flushState.FieldInfos.HasDocValues ? "docValues" : "no docValues") + "; " + (flushState.FieldInfos.HasProx ? "prox" : "no prox") + "; " + (flushState.FieldInfos.HasFreq ? "freqs" : "no freqs"));
                    infoStream.Message("DWPT", "flushedFiles=" + string.Format(J2N.Text.StringFormatter.InvariantCulture, "{0}", segmentInfoPerCommit.GetFiles()));
                    infoStream.Message("DWPT", "flushed codec=" + codec);
                }

                BufferedUpdates segmentDeletes;
                if (pendingUpdates.queries.Count == 0 && pendingUpdates.numericUpdates.Count == 0 && pendingUpdates.binaryUpdates.Count == 0)
                {
                    pendingUpdates.Clear();
                    segmentDeletes = null;
                }
                else
                {
                    segmentDeletes = pendingUpdates;
                }

                if (infoStream.IsEnabled("DWPT"))
                {
                    double newSegmentSize = segmentInfoPerCommit.GetSizeInBytes() / 1024.0 / 1024.0;
                    infoStream.Message("DWPT", "flushed: segment=" + segmentInfo.Name + " ramUsed=" + startMBUsed.ToString(nf) + " MB" + " newFlushedSize(includes docstores)=" + newSegmentSize.ToString(nf) + " MB" + " docs/MB=" + (flushState.SegmentInfo.DocCount / newSegmentSize).ToString(nf));
                }

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(segmentInfo != null);
                }

                FlushedSegment fs = new FlushedSegment(segmentInfoPerCommit, flushState.FieldInfos, segmentDeletes, flushState.LiveDocs, flushState.DelCountOnFlush);
                SealFlushedSegment(fs);
                success = true;

                return(fs);
            }
            finally
            {
                if (!success)
                {
                    Abort(filesToDelete);
                }
            }
        }
Esempio n. 7
0
        public virtual void TestFixedPostings()
        {
            const int NUM_TERMS = 100;

            TermData[] terms = new TermData[NUM_TERMS];
            for (int i = 0; i < NUM_TERMS; i++)
            {
                int[]  docs = new int[] { i };
                string text = Convert.ToString(i);
                terms[i] = new TermData(this, text, docs, null);
            }

            FieldInfos.Builder builder = new FieldInfos.Builder();

            FieldData field = new FieldData(this, "field", builder, terms, true, false);

            FieldData[] fields     = new FieldData[] { field };
            FieldInfos  fieldInfos = builder.Finish();

            // LUCENENET specific - BUG: we must wrap this in a using block in case anything in the below loop throws
            using (Directory dir = NewDirectory())
            {
                this.Write(fieldInfos, dir, fields, true);
                Codec       codec = Codec.Default;
                SegmentInfo si    = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null);

                // LUCENENET specific - BUG: we must wrap this in a using block in case anything in the below loop throws
                using (FieldsProducer reader = codec.PostingsFormat.FieldsProducer(new SegmentReadState(dir, si, fieldInfos, NewIOContext(Random), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR)))
                {
                    IEnumerator <string> fieldsEnum = reader.GetEnumerator();
                    fieldsEnum.MoveNext();
                    string fieldName = fieldsEnum.Current;
                    Assert.IsNotNull(fieldName);
                    Terms terms2 = reader.GetTerms(fieldName);
                    Assert.IsNotNull(terms2);

                    TermsEnum termsEnum = terms2.GetIterator(null);

                    DocsEnum docsEnum = null;
                    for (int i = 0; i < NUM_TERMS; i++)
                    {
                        BytesRef term = termsEnum.Next();
                        Assert.IsNotNull(term);
                        Assert.AreEqual(terms[i].text2, term.Utf8ToString());

                        // do this twice to stress test the codec's reuse, ie,
                        // make sure it properly fully resets (rewinds) its
                        // internal state:
                        for (int iter = 0; iter < 2; iter++)
                        {
                            docsEnum = TestUtil.Docs(Random, termsEnum, null, docsEnum, DocsFlags.NONE);
                            Assert.AreEqual(terms[i].docs[0], docsEnum.NextDoc());
                            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, docsEnum.NextDoc());
                        }
                    }
                    Assert.IsNull(termsEnum.Next());

                    for (int i = 0; i < NUM_TERMS; i++)
                    {
                        Assert.AreEqual(termsEnum.SeekCeil(new BytesRef(terms[i].text2)), TermsEnum.SeekStatus.FOUND);
                    }

                    Assert.IsFalse(fieldsEnum.MoveNext());
                }
            }
        }
Esempio n. 8
0
        /// <summary>
        /// Expert: create a <see cref="ParallelAtomicReader"/> based on the provided
        /// <paramref name="readers"/> and <paramref name="storedFieldsReaders"/>; when a document is
        /// loaded, only <paramref name="storedFieldsReaders"/> will be used.
        /// </summary>
        public ParallelAtomicReader(bool closeSubReaders, AtomicReader[] readers, AtomicReader[] storedFieldsReaders)
        {
            this.closeSubReaders = closeSubReaders;
            if (readers.Length == 0 && storedFieldsReaders.Length > 0)
            {
                throw new ArgumentException("There must be at least one main reader if storedFieldsReaders are used.");
            }
            this.parallelReaders     = (AtomicReader[])readers.Clone();
            this.storedFieldsReaders = (AtomicReader[])storedFieldsReaders.Clone();
            if (parallelReaders.Length > 0)
            {
                AtomicReader first = parallelReaders[0];
                this.maxDoc       = first.MaxDoc;
                this.numDocs      = first.NumDocs;
                this.hasDeletions = first.HasDeletions;
            }
            else
            {
                this.maxDoc       = this.numDocs = 0;
                this.hasDeletions = false;
            }
            completeReaderSet.UnionWith(this.parallelReaders);
            completeReaderSet.UnionWith(this.storedFieldsReaders);

            // check compatibility:
            foreach (AtomicReader reader in completeReaderSet)
            {
                if (reader.MaxDoc != maxDoc)
                {
                    throw new ArgumentException("All readers must have same MaxDoc: " + maxDoc + "!=" + reader.MaxDoc);
                }
            }

            // TODO: make this read-only in a cleaner way?
            FieldInfos.Builder builder = new FieldInfos.Builder();
            // build FieldInfos and fieldToReader map:
            foreach (AtomicReader reader in this.parallelReaders)
            {
                FieldInfos readerFieldInfos = reader.FieldInfos;
                foreach (FieldInfo fieldInfo in readerFieldInfos)
                {
                    // NOTE: first reader having a given field "wins":
                    if (!fieldToReader.ContainsKey(fieldInfo.Name))
                    {
                        builder.Add(fieldInfo);
                        fieldToReader[fieldInfo.Name] = reader;
                        if (fieldInfo.HasVectors)
                        {
                            tvFieldToReader[fieldInfo.Name] = reader;
                        }
                    }
                }
            }
            fieldInfos = builder.Finish();

            // build Fields instance
            foreach (AtomicReader reader in this.parallelReaders)
            {
                Fields readerFields = reader.Fields;
                if (readerFields != null)
                {
                    foreach (string field in readerFields)
                    {
                        // only add if the reader responsible for that field name is the current:
                        if (fieldToReader[field].Equals(reader))
                        {
                            this.fields.AddField(field, readerFields.GetTerms(field));
                        }
                    }
                }
            }

            // do this finally so any Exceptions occurred before don't affect refcounts:
            foreach (AtomicReader reader in completeReaderSet)
            {
                if (!closeSubReaders)
                {
                    reader.IncRef();
                }
                reader.RegisterParentReader(this);
            }
        }
Esempio n. 9
0
        public virtual void WriteFieldUpdates(Directory dir, DocValuesFieldUpdates.Container dvUpdates)
        {
            UninterruptableMonitor.Enter(this);
            try
            {
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(UninterruptableMonitor.IsEntered(writer));
                }
                //System.out.println("rld.writeFieldUpdates: seg=" + info + " numericFieldUpdates=" + numericFieldUpdates);

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(dvUpdates.Any());
                }

                // Do this so we can delete any created files on
                // exception; this saves all codecs from having to do
                // it:
                TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir);

                FieldInfos fieldInfos = null;
                bool       success    = false;
                try
                {
                    Codec codec = Info.Info.Codec;

                    // reader could be null e.g. for a just merged segment (from
                    // IndexWriter.commitMergedDeletes).
                    SegmentReader reader = this.reader ?? new SegmentReader(Info, writer.Config.ReaderTermsIndexDivisor, IOContext.READ_ONCE);
                    try
                    {
                        // clone FieldInfos so that we can update their dvGen separately from
                        // the reader's infos and write them to a new fieldInfos_gen file
                        FieldInfos.Builder builder = new FieldInfos.Builder(writer.globalFieldNumberMap);
                        // cannot use builder.add(reader.getFieldInfos()) because it does not
                        // clone FI.attributes as well FI.dvGen
                        foreach (FieldInfo fi in reader.FieldInfos)
                        {
                            FieldInfo clone = builder.Add(fi);
                            // copy the stuff FieldInfos.Builder doesn't copy
                            if (fi.Attributes != null)
                            {
                                foreach (KeyValuePair <string, string> e in fi.Attributes)
                                {
                                    clone.PutAttribute(e.Key, e.Value);
                                }
                            }
                            clone.DocValuesGen = fi.DocValuesGen;
                        }
                        // create new fields or update existing ones to have NumericDV type
                        foreach (string f in dvUpdates.numericDVUpdates.Keys)
                        {
                            builder.AddOrUpdate(f, NumericDocValuesField.TYPE);
                        }
                        // create new fields or update existing ones to have BinaryDV type
                        foreach (string f in dvUpdates.binaryDVUpdates.Keys)
                        {
                            builder.AddOrUpdate(f, BinaryDocValuesField.TYPE);
                        }

                        fieldInfos = builder.Finish();
                        long nextFieldInfosGen = Info.NextFieldInfosGen;
                        // LUCENENET specific: We created the segments names wrong in 4.8.0-beta00001 - 4.8.0-beta00015,
                        // so we added a switch to be able to read these indexes in later versions. This logic as well as an
                        // optimization on the first 100 segment values is implmeneted in SegmentInfos.SegmentNumberToString().
                        string            segmentSuffix   = SegmentInfos.SegmentNumberToString(nextFieldInfosGen);
                        SegmentWriteState state           = new SegmentWriteState(null, trackingDir, Info.Info, fieldInfos, writer.Config.TermIndexInterval, null, IOContext.DEFAULT, segmentSuffix);
                        DocValuesFormat   docValuesFormat = codec.DocValuesFormat;
                        DocValuesConsumer fieldsConsumer  = docValuesFormat.FieldsConsumer(state);
                        bool fieldsConsumerSuccess        = false;
                        try
                        {
                            //          System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeFieldUpdates: applying numeric updates; seg=" + info + " updates=" + numericFieldUpdates);
                            foreach (KeyValuePair <string, NumericDocValuesFieldUpdates> e in dvUpdates.numericDVUpdates)
                            {
                                string field = e.Key;
                                NumericDocValuesFieldUpdates fieldUpdates = e.Value;
                                FieldInfo fieldInfo = fieldInfos.FieldInfo(field);
                                if (Debugging.AssertsEnabled)
                                {
                                    Debugging.Assert(fieldInfo != null);
                                }

                                fieldInfo.DocValuesGen = nextFieldInfosGen;
                                // write the numeric updates to a new gen'd docvalues file
                                fieldsConsumer.AddNumericField(fieldInfo, GetInt64Enumerable(reader, field, fieldUpdates));
                            }

                            //        System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " updates=" + dvUpdates.binaryDVUpdates);
                            foreach (KeyValuePair <string, BinaryDocValuesFieldUpdates> e in dvUpdates.binaryDVUpdates)
                            {
                                string field = e.Key;
                                BinaryDocValuesFieldUpdates dvFieldUpdates = e.Value;
                                FieldInfo fieldInfo = fieldInfos.FieldInfo(field);
                                if (Debugging.AssertsEnabled)
                                {
                                    Debugging.Assert(fieldInfo != null);
                                }

                                //          System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " f=" + dvFieldUpdates + ", updates=" + dvFieldUpdates);

                                fieldInfo.DocValuesGen = nextFieldInfosGen;
                                // write the numeric updates to a new gen'd docvalues file
                                fieldsConsumer.AddBinaryField(fieldInfo, GetBytesRefEnumerable(reader, field, dvFieldUpdates));
                            }

                            codec.FieldInfosFormat.FieldInfosWriter.Write(trackingDir, Info.Info.Name, segmentSuffix, fieldInfos, IOContext.DEFAULT);
                            fieldsConsumerSuccess = true;
                        }
                        finally
                        {
                            if (fieldsConsumerSuccess)
                            {
                                fieldsConsumer.Dispose();
                            }
                            else
                            {
                                IOUtils.DisposeWhileHandlingException(fieldsConsumer);
                            }
                        }
                    }
                    finally
                    {
                        if (reader != this.reader)
                        {
                            //          System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeLiveDocs: closeReader " + reader);
                            reader.Dispose();
                        }
                    }

                    success = true;
                }
                finally
                {
                    if (!success)
                    {
                        // Advance only the nextWriteDocValuesGen so that a 2nd
                        // attempt to write will write to a new file
                        Info.AdvanceNextWriteFieldInfosGen();

                        // Delete any partially created file(s):
                        foreach (string fileName in trackingDir.CreatedFiles)
                        {
                            try
                            {
                                dir.DeleteFile(fileName);
                            }
                            catch (Exception t) when(t.IsThrowable())
                            {
                                // Ignore so we throw only the first exc
                            }
                        }
                    }
                }

                Info.AdvanceFieldInfosGen();
                // copy all the updates to mergingUpdates, so they can later be applied to the merged segment
                if (isMerging)
                {
                    foreach (KeyValuePair <string, NumericDocValuesFieldUpdates> e in dvUpdates.numericDVUpdates)
                    {
                        if (!mergingDVUpdates.TryGetValue(e.Key, out DocValuesFieldUpdates updates))
                        {
                            mergingDVUpdates[e.Key] = e.Value;
                        }
                        else
                        {
                            updates.Merge(e.Value);
                        }
                    }
                    foreach (KeyValuePair <string, BinaryDocValuesFieldUpdates> e in dvUpdates.binaryDVUpdates)
                    {
                        if (!mergingDVUpdates.TryGetValue(e.Key, out DocValuesFieldUpdates updates))
                        {
                            mergingDVUpdates[e.Key] = e.Value;
                        }
                        else
                        {
                            updates.Merge(e.Value);
                        }
                    }
                }

                // create a new map, keeping only the gens that are in use
                IDictionary <long, ISet <string> > genUpdatesFiles    = Info.UpdatesFiles;
                IDictionary <long, ISet <string> > newGenUpdatesFiles = new Dictionary <long, ISet <string> >();
                long fieldInfosGen = Info.FieldInfosGen;
                foreach (FieldInfo fi in fieldInfos)
                {
                    long dvGen = fi.DocValuesGen;
                    if (dvGen != -1 && !newGenUpdatesFiles.ContainsKey(dvGen))
                    {
                        if (dvGen == fieldInfosGen)
                        {
                            newGenUpdatesFiles[fieldInfosGen] = trackingDir.CreatedFiles;
                        }
                        else
                        {
                            newGenUpdatesFiles[dvGen] = genUpdatesFiles[dvGen];
                        }
                    }
                }

                Info.SetGenUpdatesFiles(newGenUpdatesFiles);

                // wrote new files, should checkpoint()
                writer.Checkpoint();

                // if there is a reader open, reopen it to reflect the updates
                if (reader != null)
                {
                    SegmentReader newReader = new SegmentReader(Info, reader, liveDocs, Info.Info.DocCount - Info.DelCount - pendingDeleteCount);
                    bool          reopened  = false;
                    try
                    {
                        reader.DecRef();
                        reader   = newReader;
                        reopened = true;
                    }
                    finally
                    {
                        if (!reopened)
                        {
                            newReader.DecRef();
                        }
                    }
                }
            }
            finally
            {
                UninterruptableMonitor.Exit(this);
            }
        }
Esempio n. 10
0
        // Writes field updates (new _X_N updates files) to the directory
        public virtual void WriteFieldUpdates(Directory dir, DocValuesFieldUpdates.Container dvUpdates)
        {
            lock (this)
            {
                //Debug.Assert(Thread.holdsLock(Writer));
                //System.out.println("rld.writeFieldUpdates: seg=" + info + " numericFieldUpdates=" + numericFieldUpdates);

                Debug.Assert(dvUpdates.Any());

                // Do this so we can delete any created files on
                // exception; this saves all codecs from having to do
                // it:
                TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir);

                FieldInfos fieldInfos = null;
                bool       success    = false;
                try
                {
                    Codec codec = Info.Info.Codec;

                    // reader could be null e.g. for a just merged segment (from
                    // IndexWriter.commitMergedDeletes).
                    SegmentReader reader = this.Reader == null ? new SegmentReader(Info, Writer.Config.ReaderTermsIndexDivisor, IOContext.READONCE) : this.Reader;
                    try
                    {
                        // clone FieldInfos so that we can update their dvGen separately from
                        // the reader's infos and write them to a new fieldInfos_gen file
                        FieldInfos.Builder builder = new FieldInfos.Builder(Writer.GlobalFieldNumberMap);
                        // cannot use builder.add(reader.getFieldInfos()) because it does not
                        // clone FI.attributes as well FI.dvGen
                        foreach (FieldInfo fi in reader.FieldInfos)
                        {
                            FieldInfo clone = builder.Add(fi);
                            // copy the stuff FieldInfos.Builder doesn't copy
                            if (fi.Attributes() != null)
                            {
                                foreach (KeyValuePair <string, string> e in fi.Attributes())
                                {
                                    clone.PutAttribute(e.Key, e.Value);
                                }
                            }
                            clone.DocValuesGen = fi.DocValuesGen;
                        }
                        // create new fields or update existing ones to have NumericDV type
                        foreach (string f in dvUpdates.NumericDVUpdates.Keys)
                        {
                            builder.AddOrUpdate(f, NumericDocValuesField.TYPE);
                        }
                        // create new fields or update existing ones to have BinaryDV type
                        foreach (string f in dvUpdates.BinaryDVUpdates.Keys)
                        {
                            builder.AddOrUpdate(f, BinaryDocValuesField.fType);
                        }

                        fieldInfos = builder.Finish();
                        long              nextFieldInfosGen     = Info.NextFieldInfosGen;
                        string            segmentSuffix         = nextFieldInfosGen.ToString(CultureInfo.InvariantCulture);//Convert.ToString(nextFieldInfosGen, Character.MAX_RADIX));
                        SegmentWriteState state                 = new SegmentWriteState(null, trackingDir, Info.Info, fieldInfos, Writer.Config.TermIndexInterval, null, IOContext.DEFAULT, segmentSuffix);
                        DocValuesFormat   docValuesFormat       = codec.DocValuesFormat();
                        DocValuesConsumer fieldsConsumer        = docValuesFormat.FieldsConsumer(state);
                        bool              fieldsConsumerSuccess = false;
                        try
                        {
                            //          System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeFieldUpdates: applying numeric updates; seg=" + info + " updates=" + numericFieldUpdates);
                            foreach (KeyValuePair <string, NumericDocValuesFieldUpdates> e in dvUpdates.NumericDVUpdates)
                            {
                                string field = e.Key;
                                NumericDocValuesFieldUpdates fieldUpdates = e.Value;
                                FieldInfo fieldInfo = fieldInfos.FieldInfo(field);
                                Debug.Assert(fieldInfo != null);

                                fieldInfo.DocValuesGen = nextFieldInfosGen;
                                // write the numeric updates to a new gen'd docvalues file
                                fieldsConsumer.AddNumericField(fieldInfo, GetLongEnumerable(reader, field, fieldUpdates));
                            }

                            //        System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " updates=" + dvUpdates.binaryDVUpdates);
                            foreach (KeyValuePair <string, BinaryDocValuesFieldUpdates> e in dvUpdates.BinaryDVUpdates)
                            {
                                string field = e.Key;
                                BinaryDocValuesFieldUpdates dvFieldUpdates = e.Value;
                                FieldInfo fieldInfo = fieldInfos.FieldInfo(field);
                                Debug.Assert(fieldInfo != null);

                                //          System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " f=" + dvFieldUpdates + ", updates=" + dvFieldUpdates);

                                fieldInfo.DocValuesGen = nextFieldInfosGen;
                                // write the numeric updates to a new gen'd docvalues file
                                fieldsConsumer.AddBinaryField(fieldInfo, GetBytesRefEnumerable(reader, field, dvFieldUpdates));
                            }

                            codec.FieldInfosFormat().FieldInfosWriter.Write(trackingDir, Info.Info.Name, segmentSuffix, fieldInfos, IOContext.DEFAULT);
                            fieldsConsumerSuccess = true;
                        }
                        finally
                        {
                            if (fieldsConsumerSuccess)
                            {
                                fieldsConsumer.Dispose();
                            }
                            else
                            {
                                IOUtils.CloseWhileHandlingException(fieldsConsumer);
                            }
                        }
                    }
                    finally
                    {
                        if (reader != this.Reader)
                        {
                            //          System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeLiveDocs: closeReader " + reader);
                            reader.Dispose();
                        }
                    }

                    success = true;
                }
                finally
                {
                    if (!success)
                    {
                        // Advance only the nextWriteDocValuesGen so that a 2nd
                        // attempt to write will write to a new file
                        Info.AdvanceNextWriteFieldInfosGen();

                        // Delete any partially created file(s):
                        foreach (string fileName in trackingDir.CreatedFiles)
                        {
                            try
                            {
                                dir.DeleteFile(fileName);
                            }
                            catch (Exception)
                            {
                                // Ignore so we throw only the first exc
                            }
                        }
                    }
                }

                Info.AdvanceFieldInfosGen();
                // copy all the updates to mergingUpdates, so they can later be applied to the merged segment
                if (IsMerging)
                {
                    foreach (KeyValuePair <string, NumericDocValuesFieldUpdates> e in dvUpdates.NumericDVUpdates)
                    {
                        DocValuesFieldUpdates updates;
                        if (!MergingDVUpdates.TryGetValue(e.Key, out updates))
                        {
                            MergingDVUpdates[e.Key] = e.Value;
                        }
                        else
                        {
                            updates.Merge(e.Value);
                        }
                    }
                    foreach (KeyValuePair <string, BinaryDocValuesFieldUpdates> e in dvUpdates.BinaryDVUpdates)
                    {
                        DocValuesFieldUpdates updates;
                        if (!MergingDVUpdates.TryGetValue(e.Key, out updates))
                        {
                            MergingDVUpdates[e.Key] = e.Value;
                        }
                        else
                        {
                            updates.Merge(e.Value);
                        }
                    }
                }

                // create a new map, keeping only the gens that are in use
                IDictionary <long, ISet <string> > genUpdatesFiles    = Info.UpdatesFiles;
                IDictionary <long, ISet <string> > newGenUpdatesFiles = new Dictionary <long, ISet <string> >();
                long fieldInfosGen = Info.FieldInfosGen;
                foreach (FieldInfo fi in fieldInfos)
                {
                    long dvGen = fi.DocValuesGen;
                    if (dvGen != -1 && !newGenUpdatesFiles.ContainsKey(dvGen))
                    {
                        if (dvGen == fieldInfosGen)
                        {
                            newGenUpdatesFiles[fieldInfosGen] = trackingDir.CreatedFiles;
                        }
                        else
                        {
                            newGenUpdatesFiles[dvGen] = genUpdatesFiles[dvGen];
                        }
                    }
                }

                Info.GenUpdatesFiles = newGenUpdatesFiles;

                // wrote new files, should checkpoint()
                Writer.Checkpoint();

                // if there is a reader open, reopen it to reflect the updates
                if (Reader != null)
                {
                    SegmentReader newReader = new SegmentReader(Info, Reader, LiveDocs_Renamed, Info.Info.DocCount - Info.DelCount - PendingDeleteCount_Renamed);
                    bool          reopened  = false;
                    try
                    {
                        Reader.DecRef();
                        Reader   = newReader;
                        reopened = true;
                    }
                    finally
                    {
                        if (!reopened)
                        {
                            newReader.DecRef();
                        }
                    }
                }
            }
        }
Esempio n. 11
0
        /// <summary>
        /// Flush all pending docs to a new segment </summary>
        internal virtual FlushedSegment Flush()
        {
            Debug.Assert(numDocsInRAM > 0);
            Debug.Assert(DeleteSlice.Empty, "all deletes must be applied in prepareFlush");
            SegmentInfo_Renamed.DocCount = numDocsInRAM;
            SegmentWriteState flushState  = new SegmentWriteState(InfoStream, Directory, SegmentInfo_Renamed, FieldInfos.Finish(), IndexWriterConfig.TermIndexInterval, PendingUpdates, new IOContext(new FlushInfo(numDocsInRAM, BytesUsed())));
            double            startMBUsed = BytesUsed() / 1024.0 / 1024.0;

            // Apply delete-by-docID now (delete-byDocID only
            // happens when an exception is hit processing that
            // doc, eg if analyzer has some problem w/ the text):
            if (PendingUpdates.DocIDs.Count > 0)
            {
                flushState.LiveDocs = Codec.LiveDocsFormat().NewLiveDocs(numDocsInRAM);
                foreach (int delDocID in PendingUpdates.DocIDs)
                {
                    flushState.LiveDocs.Clear(delDocID);
                }
                flushState.DelCountOnFlush = PendingUpdates.DocIDs.Count;
                PendingUpdates.BytesUsed.AddAndGet(-PendingUpdates.DocIDs.Count * BufferedUpdates.BYTES_PER_DEL_DOCID);
                PendingUpdates.DocIDs.Clear();
            }

            if (Aborting)
            {
                if (InfoStream.IsEnabled("DWPT"))
                {
                    InfoStream.Message("DWPT", "flush: skip because aborting is set");
                }
                return(null);
            }

            if (InfoStream.IsEnabled("DWPT"))
            {
                InfoStream.Message("DWPT", "flush postings as segment " + flushState.SegmentInfo.Name + " numDocs=" + numDocsInRAM);
            }

            bool success = false;

            try
            {
                Consumer.Flush(flushState);
                PendingUpdates.Terms.Clear();
                SegmentInfo_Renamed.Files = new HashSet <string>(Directory.CreatedFiles);

                SegmentCommitInfo segmentInfoPerCommit = new SegmentCommitInfo(SegmentInfo_Renamed, 0, -1L, -1L);
                if (InfoStream.IsEnabled("DWPT"))
                {
                    InfoStream.Message("DWPT", "new segment has " + (flushState.LiveDocs == null ? 0 : (flushState.SegmentInfo.DocCount - flushState.DelCountOnFlush)) + " deleted docs");
                    InfoStream.Message("DWPT", "new segment has " + (flushState.FieldInfos.HasVectors() ? "vectors" : "no vectors") + "; " + (flushState.FieldInfos.HasNorms() ? "norms" : "no norms") + "; " + (flushState.FieldInfos.HasDocValues() ? "docValues" : "no docValues") + "; " + (flushState.FieldInfos.HasProx() ? "prox" : "no prox") + "; " + (flushState.FieldInfos.HasFreq() ? "freqs" : "no freqs"));
                    InfoStream.Message("DWPT", "flushedFiles=" + segmentInfoPerCommit.Files());
                    InfoStream.Message("DWPT", "flushed codec=" + Codec);
                }

                BufferedUpdates segmentDeletes;
                if (PendingUpdates.Queries.Count == 0 && PendingUpdates.NumericUpdates.Count == 0 && PendingUpdates.BinaryUpdates.Count == 0)
                {
                    PendingUpdates.Clear();
                    segmentDeletes = null;
                }
                else
                {
                    segmentDeletes = PendingUpdates;
                }

                if (InfoStream.IsEnabled("DWPT"))
                {
                    double newSegmentSize = segmentInfoPerCommit.SizeInBytes() / 1024.0 / 1024.0;
                    InfoStream.Message("DWPT", "flushed: segment=" + SegmentInfo_Renamed.Name + " ramUsed=" + startMBUsed.ToString(Nf) + " MB" + " newFlushedSize(includes docstores)=" + newSegmentSize.ToString(Nf) + " MB" + " docs/MB=" + (flushState.SegmentInfo.DocCount / newSegmentSize).ToString(Nf));
                }

                Debug.Assert(SegmentInfo_Renamed != null);

                FlushedSegment fs = new FlushedSegment(segmentInfoPerCommit, flushState.FieldInfos, segmentDeletes, flushState.LiveDocs, flushState.DelCountOnFlush);
                SealFlushedSegment(fs);
                success = true;

                return(fs);
            }
            finally
            {
                if (!success)
                {
                    Abort(FilesToDelete);
                }
            }
        }