Esempio n. 1
0
        public virtual FieldInfos CreateAndWriteFieldInfos(Directory dir, string filename)
        {
            //Positive test of FieldInfos
            Assert.IsTrue(TestDoc != null);
            FieldInfos.Builder builder = new FieldInfos.Builder();
            foreach (IIndexableField field in TestDoc)
            {
                builder.AddOrUpdate(field.Name, field.IndexableFieldType);
            }
            FieldInfos fieldInfos = builder.Finish();

            //Since the complement is stored as well in the fields map
            Assert.IsTrue(fieldInfos.Count == DocHelper.All.Count); //this is all b/c we are using the no-arg constructor

            IndexOutput output = dir.CreateOutput(filename, NewIOContext(Random));

            Assert.IsTrue(output != null);
            //Use a RAMOutputStream

            FieldInfosWriter writer = Codec.Default.FieldInfosFormat.FieldInfosWriter;

            writer.Write(dir, filename, "", fieldInfos, IOContext.DEFAULT);
            output.Dispose();
            return(fieldInfos);
        }
Esempio n. 2
0
 public static void AfterClass()
 {
     Dir.Dispose();
     Dir        = null;
     FieldInfos = null;
     TestDoc    = null;
 }
        public DocumentsWriterPerThread(string segmentName, Directory directory, LiveIndexWriterConfig indexWriterConfig, InfoStream infoStream, DocumentsWriterDeleteQueue deleteQueue, FieldInfos.Builder fieldInfos)
        {
            this.directoryOrig       = directory;
            this.directory           = new TrackingDirectoryWrapper(directory);
            this.fieldInfos          = fieldInfos;
            this.indexWriterConfig   = indexWriterConfig;
            this.infoStream          = infoStream;
            this.codec               = indexWriterConfig.Codec;
            this.docState            = new DocState(this, infoStream);
            this.docState.similarity = indexWriterConfig.Similarity;
            bytesUsed          = Counter.NewCounter();
            byteBlockAllocator = new DirectTrackingAllocator(bytesUsed);
            pendingUpdates     = new BufferedUpdates();
            intBlockAllocator  = new Int32BlockAllocator(bytesUsed);
            this.deleteQueue   = deleteQueue;
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(numDocsInRAM == 0, "num docs {0}", numDocsInRAM);
            }
            pendingUpdates.Clear();
            deleteSlice = deleteQueue.NewSlice();

            segmentInfo = new SegmentInfo(directoryOrig, Constants.LUCENE_MAIN_VERSION, segmentName, -1, false, codec, null);
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(numDocsInRAM == 0);
            }
            if (INFO_VERBOSE && infoStream.IsEnabled("DWPT"))
            {
                infoStream.Message("DWPT", Thread.CurrentThread.Name + " init seg=" + segmentName + " delQueue=" + deleteQueue);
            }
            // this should be the last call in the ctor
            // it really sucks that we need to pull this within the ctor and pass this ref to the chain!
            consumer = indexWriterConfig.IndexingChain.GetChain(this);
        }
Esempio n. 4
0
 private void EnsureInitialized(ThreadState state)
 {
     if (state.Active && state.Dwpt == null)
     {
         FieldInfos.Builder infos = new FieldInfos.Builder(Writer.GlobalFieldNumberMap);
         state.Dwpt = new DocumentsWriterPerThread(Writer.NewSegmentName(), Directory, LIWConfig, InfoStream, DeleteQueue, infos);
     }
 }
Esempio n. 5
0
 private void EnsureInitialized(ThreadState state)
 {
     if (state.IsActive && state.dwpt == null)
     {
         FieldInfos.Builder infos = new FieldInfos.Builder(writer.globalFieldNumberMap);
         state.dwpt = new DocumentsWriterPerThread(writer.NewSegmentName(), directory, config, infoStream, deleteQueue, infos);
     }
 }
Esempio n. 6
0
 public override void AfterClass()
 {
     Dir.Dispose();
     Dir        = null;
     FieldInfos = null;
     TestDoc    = null;
     base.AfterClass();
 }
Esempio n. 7
0
 public override void AfterClass()
 {
     dir.Dispose();
     dir        = null;
     fieldInfos = null;
     testDoc    = null;
     base.AfterClass();
 }
Esempio n. 8
0
        /// <summary>
        /// Call this to get the (merged) FieldInfos for a
        ///  composite reader.
        ///  <p>
        ///  NOTE: the returned field numbers will likely not
        ///  correspond to the actual field numbers in the underlying
        ///  readers, and codec metadata (<seealso cref="FieldInfo#getAttribute(String)"/>
        ///  will be unavailable.
        /// </summary>
        public static FieldInfos GetMergedFieldInfos(IndexReader reader)
        {
            var builder = new FieldInfos.Builder();

            foreach (AtomicReaderContext ctx in reader.Leaves)
            {
                builder.Add(ctx.AtomicReader.FieldInfos);
            }
            return(builder.Finish());
        }
Esempio n. 9
0
        public virtual void TestRandomPostings()
        {
            FieldInfos.Builder builder = new FieldInfos.Builder();

            FieldData[] fields = new FieldData[NUM_FIELDS];
            for (int i = 0; i < NUM_FIELDS; i++)
            {
                bool omitTF        = 0 == (i % 3);
                bool storePayloads = 1 == (i % 3);
                fields[i] = new FieldData(this, fieldNames[i], builder, this.MakeRandomTerms(omitTF, storePayloads), omitTF, storePayloads);
            }

            // LUCENENET specific - BUG: we must wrap this in a using block in case anything in the below loop throws
            using (Directory dir = NewDirectory())
            {
                FieldInfos fieldInfos = builder.Finish();

                if (Verbose)
                {
                    Console.WriteLine("TEST: now write postings");
                }

                this.Write(fieldInfos, dir, fields, false);
                Codec       codec = Codec.Default;
                SegmentInfo si    = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null);

                if (Verbose)
                {
                    Console.WriteLine("TEST: now read postings");
                }

                // LUCENENET specific - BUG: we must wrap this in a using block in case anything in the below loop throws
                using (FieldsProducer terms = codec.PostingsFormat.FieldsProducer(new SegmentReadState(dir, si, fieldInfos, NewIOContext(Random), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR)))
                {
                    Verify[] threads = new Verify[NUM_TEST_THREADS - 1];
                    for (int i = 0; i < NUM_TEST_THREADS - 1; i++)
                    {
                        threads[i] = new Verify(this, si, fields, terms);
                        threads[i].IsBackground = (true);
                        threads[i].Start();
                    }

                    (new Verify(this, si, fields, terms)).Run();

                    for (int i = 0; i < NUM_TEST_THREADS - 1; i++)
                    {
                        threads[i].Join();
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(!threads[i].failed);
                        }
                    }
                }
            }
        }
Esempio n. 10
0
        public virtual void TestRandomPostings()
        {
            FieldInfos.Builder builder = new FieldInfos.Builder();

            FieldData[] fields = new FieldData[NUM_FIELDS];
            for (int i = 0; i < NUM_FIELDS; i++)
            {
                bool omitTF        = 0 == (i % 3);
                bool storePayloads = 1 == (i % 3);
                fields[i] = new FieldData(this, FieldNames[i], builder, this.MakeRandomTerms(omitTF, storePayloads), omitTF, storePayloads);
            }

            Directory  dir        = NewDirectory();
            FieldInfos fieldInfos = builder.Finish();

            if (VERBOSE)
            {
                Console.WriteLine("TEST: now write postings");
            }

            this.Write(fieldInfos, dir, fields, false);
            Codec       codec = Codec.Default;
            SegmentInfo si    = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: now read postings");
            }
            FieldsProducer terms = codec.PostingsFormat().FieldsProducer(new SegmentReadState(dir, si, fieldInfos, NewIOContext(Random()), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR));

            Verify[] threads = new Verify[NUM_TEST_THREADS - 1];
            for (int i = 0; i < NUM_TEST_THREADS - 1; i++)
            {
                threads[i] = new Verify(this, si, fields, terms);
                threads[i].SetDaemon(true);
                threads[i].Start();
            }

            (new Verify(this, si, fields, terms)).Run();

            for (int i = 0; i < NUM_TEST_THREADS - 1; i++)
            {
                threads[i].Join();
                Debug.Assert(!threads[i].Failed);
            }

            terms.Dispose();
            dir.Dispose();
        }
Esempio n. 11
0
 // note, just like in codec apis Directory 'dir' is NOT the same as segmentInfo.dir!!
 internal SegmentMerger(IList <AtomicReader> readers, SegmentInfo segmentInfo, InfoStream infoStream, Directory dir, int termIndexInterval, CheckAbort checkAbort, FieldInfos.FieldNumbers fieldNumbers, IOContext context, bool validate)
 {
     // validate incoming readers
     if (validate)
     {
         foreach (AtomicReader reader in readers)
         {
             reader.CheckIntegrity();
         }
     }
     mergeState                      = new MergeState(readers, segmentInfo, infoStream, checkAbort);
     directory                       = dir;
     this.termIndexInterval          = termIndexInterval;
     this.codec                      = segmentInfo.Codec;
     this.context                    = context;
     this.fieldInfosBuilder          = new FieldInfos.Builder(fieldNumbers);
     mergeState.SegmentInfo.DocCount = SetDocMaps();
 }
Esempio n. 12
0
            public FieldData(TestCodecs outerInstance, string name, FieldInfos.Builder fieldInfos, TermData[] terms, bool omitTF, bool storePayloads)
            {
                this.outerInstance = outerInstance;
                this.omitTF        = omitTF;
                this.storePayloads = storePayloads;
                // TODO: change this test to use all three
                fieldInfo = fieldInfos.AddOrUpdate(name, new IndexableFieldTypeAnonymousInnerClassHelper(this, omitTF));
                if (storePayloads)
                {
                    fieldInfo.SetStorePayloads();
                }
                this.terms = terms;
                for (int i = 0; i < terms.Length; i++)
                {
                    terms[i].field = this;
                }

                Array.Sort(terms);
            }
Esempio n. 13
0
        public void BeforeClass()
        {
            TestDoc    = new Document();
            FieldInfos = new FieldInfos.Builder();
            DocHelper.SetupDoc(TestDoc);
            foreach (IndexableField field in TestDoc)
            {
                FieldInfos.AddOrUpdate(field.Name, field.FieldType);
            }
            Dir = NewDirectory();
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy());

            conf.MergePolicy.NoCFSRatio = 0.0;
            IndexWriter writer = new IndexWriter(Dir, conf);

            writer.AddDocument(TestDoc);
            writer.Dispose();
            FaultyIndexInput.DoFail = false;
        }
Esempio n. 14
0
        public virtual void TestFixedPostings()
        {
            const int NUM_TERMS = 100;

            TermData[] terms = new TermData[NUM_TERMS];
            for (int i = 0; i < NUM_TERMS; i++)
            {
                int[]  docs = new int[] { i };
                string text = Convert.ToString(i);
                terms[i] = new TermData(this, text, docs, null);
            }

            FieldInfos.Builder builder = new FieldInfos.Builder();

            FieldData field = new FieldData(this, "field", builder, terms, true, false);

            FieldData[] fields     = new FieldData[] { field };
            FieldInfos  fieldInfos = builder.Finish();

            // LUCENENET specific - BUG: we must wrap this in a using block in case anything in the below loop throws
            using (Directory dir = NewDirectory())
            {
                this.Write(fieldInfos, dir, fields, true);
                Codec       codec = Codec.Default;
                SegmentInfo si    = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null);

                // LUCENENET specific - BUG: we must wrap this in a using block in case anything in the below loop throws
                using (FieldsProducer reader = codec.PostingsFormat.FieldsProducer(new SegmentReadState(dir, si, fieldInfos, NewIOContext(Random), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR)))
                {
                    IEnumerator <string> fieldsEnum = reader.GetEnumerator();
                    fieldsEnum.MoveNext();
                    string fieldName = fieldsEnum.Current;
                    Assert.IsNotNull(fieldName);
                    Terms terms2 = reader.GetTerms(fieldName);
                    Assert.IsNotNull(terms2);

                    TermsEnum termsEnum = terms2.GetIterator(null);

                    DocsEnum docsEnum = null;
                    for (int i = 0; i < NUM_TERMS; i++)
                    {
                        BytesRef term = termsEnum.Next();
                        Assert.IsNotNull(term);
                        Assert.AreEqual(terms[i].text2, term.Utf8ToString());

                        // do this twice to stress test the codec's reuse, ie,
                        // make sure it properly fully resets (rewinds) its
                        // internal state:
                        for (int iter = 0; iter < 2; iter++)
                        {
                            docsEnum = TestUtil.Docs(Random, termsEnum, null, docsEnum, DocsFlags.NONE);
                            Assert.AreEqual(terms[i].docs[0], docsEnum.NextDoc());
                            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, docsEnum.NextDoc());
                        }
                    }
                    Assert.IsNull(termsEnum.Next());

                    for (int i = 0; i < NUM_TERMS; i++)
                    {
                        Assert.AreEqual(termsEnum.SeekCeil(new BytesRef(terms[i].text2)), TermsEnum.SeekStatus.FOUND);
                    }

                    Assert.IsFalse(fieldsEnum.MoveNext());
                }
            }
        }
Esempio n. 15
0
        /// <summary>
        /// Expert: create a <see cref="ParallelAtomicReader"/> based on the provided
        /// <paramref name="readers"/> and <paramref name="storedFieldsReaders"/>; when a document is
        /// loaded, only <paramref name="storedFieldsReaders"/> will be used.
        /// </summary>
        public ParallelAtomicReader(bool closeSubReaders, AtomicReader[] readers, AtomicReader[] storedFieldsReaders)
        {
            this.closeSubReaders = closeSubReaders;
            if (readers.Length == 0 && storedFieldsReaders.Length > 0)
            {
                throw new ArgumentException("There must be at least one main reader if storedFieldsReaders are used.");
            }
            this.parallelReaders     = (AtomicReader[])readers.Clone();
            this.storedFieldsReaders = (AtomicReader[])storedFieldsReaders.Clone();
            if (parallelReaders.Length > 0)
            {
                AtomicReader first = parallelReaders[0];
                this.maxDoc       = first.MaxDoc;
                this.numDocs      = first.NumDocs;
                this.hasDeletions = first.HasDeletions;
            }
            else
            {
                this.maxDoc       = this.numDocs = 0;
                this.hasDeletions = false;
            }
            completeReaderSet.UnionWith(this.parallelReaders);
            completeReaderSet.UnionWith(this.storedFieldsReaders);

            // check compatibility:
            foreach (AtomicReader reader in completeReaderSet)
            {
                if (reader.MaxDoc != maxDoc)
                {
                    throw new ArgumentException("All readers must have same MaxDoc: " + maxDoc + "!=" + reader.MaxDoc);
                }
            }

            // TODO: make this read-only in a cleaner way?
            FieldInfos.Builder builder = new FieldInfos.Builder();
            // build FieldInfos and fieldToReader map:
            foreach (AtomicReader reader in this.parallelReaders)
            {
                FieldInfos readerFieldInfos = reader.FieldInfos;
                foreach (FieldInfo fieldInfo in readerFieldInfos)
                {
                    // NOTE: first reader having a given field "wins":
                    if (!fieldToReader.ContainsKey(fieldInfo.Name))
                    {
                        builder.Add(fieldInfo);
                        fieldToReader[fieldInfo.Name] = reader;
                        if (fieldInfo.HasVectors)
                        {
                            tvFieldToReader[fieldInfo.Name] = reader;
                        }
                    }
                }
            }
            fieldInfos = builder.Finish();

            // build Fields instance
            foreach (AtomicReader reader in this.parallelReaders)
            {
                Fields readerFields = reader.Fields;
                if (readerFields != null)
                {
                    foreach (string field in readerFields)
                    {
                        // only add if the reader responsible for that field name is the current:
                        if (fieldToReader[field].Equals(reader))
                        {
                            this.fields.AddField(field, readerFields.GetTerms(field));
                        }
                    }
                }
            }

            // do this finally so any Exceptions occurred before don't affect refcounts:
            foreach (AtomicReader reader in completeReaderSet)
            {
                if (!closeSubReaders)
                {
                    reader.IncRef();
                }
                reader.RegisterParentReader(this);
            }
        }
Esempio n. 16
0
        public virtual void WriteFieldUpdates(Directory dir, DocValuesFieldUpdates.Container dvUpdates)
        {
            UninterruptableMonitor.Enter(this);
            try
            {
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(UninterruptableMonitor.IsEntered(writer));
                }
                //System.out.println("rld.writeFieldUpdates: seg=" + info + " numericFieldUpdates=" + numericFieldUpdates);

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(dvUpdates.Any());
                }

                // Do this so we can delete any created files on
                // exception; this saves all codecs from having to do
                // it:
                TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir);

                FieldInfos fieldInfos = null;
                bool       success    = false;
                try
                {
                    Codec codec = Info.Info.Codec;

                    // reader could be null e.g. for a just merged segment (from
                    // IndexWriter.commitMergedDeletes).
                    SegmentReader reader = this.reader ?? new SegmentReader(Info, writer.Config.ReaderTermsIndexDivisor, IOContext.READ_ONCE);
                    try
                    {
                        // clone FieldInfos so that we can update their dvGen separately from
                        // the reader's infos and write them to a new fieldInfos_gen file
                        FieldInfos.Builder builder = new FieldInfos.Builder(writer.globalFieldNumberMap);
                        // cannot use builder.add(reader.getFieldInfos()) because it does not
                        // clone FI.attributes as well FI.dvGen
                        foreach (FieldInfo fi in reader.FieldInfos)
                        {
                            FieldInfo clone = builder.Add(fi);
                            // copy the stuff FieldInfos.Builder doesn't copy
                            if (fi.Attributes != null)
                            {
                                foreach (KeyValuePair <string, string> e in fi.Attributes)
                                {
                                    clone.PutAttribute(e.Key, e.Value);
                                }
                            }
                            clone.DocValuesGen = fi.DocValuesGen;
                        }
                        // create new fields or update existing ones to have NumericDV type
                        foreach (string f in dvUpdates.numericDVUpdates.Keys)
                        {
                            builder.AddOrUpdate(f, NumericDocValuesField.TYPE);
                        }
                        // create new fields or update existing ones to have BinaryDV type
                        foreach (string f in dvUpdates.binaryDVUpdates.Keys)
                        {
                            builder.AddOrUpdate(f, BinaryDocValuesField.TYPE);
                        }

                        fieldInfos = builder.Finish();
                        long nextFieldInfosGen = Info.NextFieldInfosGen;
                        // LUCENENET specific: We created the segments names wrong in 4.8.0-beta00001 - 4.8.0-beta00015,
                        // so we added a switch to be able to read these indexes in later versions. This logic as well as an
                        // optimization on the first 100 segment values is implmeneted in SegmentInfos.SegmentNumberToString().
                        string            segmentSuffix   = SegmentInfos.SegmentNumberToString(nextFieldInfosGen);
                        SegmentWriteState state           = new SegmentWriteState(null, trackingDir, Info.Info, fieldInfos, writer.Config.TermIndexInterval, null, IOContext.DEFAULT, segmentSuffix);
                        DocValuesFormat   docValuesFormat = codec.DocValuesFormat;
                        DocValuesConsumer fieldsConsumer  = docValuesFormat.FieldsConsumer(state);
                        bool fieldsConsumerSuccess        = false;
                        try
                        {
                            //          System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeFieldUpdates: applying numeric updates; seg=" + info + " updates=" + numericFieldUpdates);
                            foreach (KeyValuePair <string, NumericDocValuesFieldUpdates> e in dvUpdates.numericDVUpdates)
                            {
                                string field = e.Key;
                                NumericDocValuesFieldUpdates fieldUpdates = e.Value;
                                FieldInfo fieldInfo = fieldInfos.FieldInfo(field);
                                if (Debugging.AssertsEnabled)
                                {
                                    Debugging.Assert(fieldInfo != null);
                                }

                                fieldInfo.DocValuesGen = nextFieldInfosGen;
                                // write the numeric updates to a new gen'd docvalues file
                                fieldsConsumer.AddNumericField(fieldInfo, GetInt64Enumerable(reader, field, fieldUpdates));
                            }

                            //        System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " updates=" + dvUpdates.binaryDVUpdates);
                            foreach (KeyValuePair <string, BinaryDocValuesFieldUpdates> e in dvUpdates.binaryDVUpdates)
                            {
                                string field = e.Key;
                                BinaryDocValuesFieldUpdates dvFieldUpdates = e.Value;
                                FieldInfo fieldInfo = fieldInfos.FieldInfo(field);
                                if (Debugging.AssertsEnabled)
                                {
                                    Debugging.Assert(fieldInfo != null);
                                }

                                //          System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " f=" + dvFieldUpdates + ", updates=" + dvFieldUpdates);

                                fieldInfo.DocValuesGen = nextFieldInfosGen;
                                // write the numeric updates to a new gen'd docvalues file
                                fieldsConsumer.AddBinaryField(fieldInfo, GetBytesRefEnumerable(reader, field, dvFieldUpdates));
                            }

                            codec.FieldInfosFormat.FieldInfosWriter.Write(trackingDir, Info.Info.Name, segmentSuffix, fieldInfos, IOContext.DEFAULT);
                            fieldsConsumerSuccess = true;
                        }
                        finally
                        {
                            if (fieldsConsumerSuccess)
                            {
                                fieldsConsumer.Dispose();
                            }
                            else
                            {
                                IOUtils.DisposeWhileHandlingException(fieldsConsumer);
                            }
                        }
                    }
                    finally
                    {
                        if (reader != this.reader)
                        {
                            //          System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeLiveDocs: closeReader " + reader);
                            reader.Dispose();
                        }
                    }

                    success = true;
                }
                finally
                {
                    if (!success)
                    {
                        // Advance only the nextWriteDocValuesGen so that a 2nd
                        // attempt to write will write to a new file
                        Info.AdvanceNextWriteFieldInfosGen();

                        // Delete any partially created file(s):
                        foreach (string fileName in trackingDir.CreatedFiles)
                        {
                            try
                            {
                                dir.DeleteFile(fileName);
                            }
                            catch (Exception t) when(t.IsThrowable())
                            {
                                // Ignore so we throw only the first exc
                            }
                        }
                    }
                }

                Info.AdvanceFieldInfosGen();
                // copy all the updates to mergingUpdates, so they can later be applied to the merged segment
                if (isMerging)
                {
                    foreach (KeyValuePair <string, NumericDocValuesFieldUpdates> e in dvUpdates.numericDVUpdates)
                    {
                        if (!mergingDVUpdates.TryGetValue(e.Key, out DocValuesFieldUpdates updates))
                        {
                            mergingDVUpdates[e.Key] = e.Value;
                        }
                        else
                        {
                            updates.Merge(e.Value);
                        }
                    }
                    foreach (KeyValuePair <string, BinaryDocValuesFieldUpdates> e in dvUpdates.binaryDVUpdates)
                    {
                        if (!mergingDVUpdates.TryGetValue(e.Key, out DocValuesFieldUpdates updates))
                        {
                            mergingDVUpdates[e.Key] = e.Value;
                        }
                        else
                        {
                            updates.Merge(e.Value);
                        }
                    }
                }

                // create a new map, keeping only the gens that are in use
                IDictionary <long, ISet <string> > genUpdatesFiles    = Info.UpdatesFiles;
                IDictionary <long, ISet <string> > newGenUpdatesFiles = new Dictionary <long, ISet <string> >();
                long fieldInfosGen = Info.FieldInfosGen;
                foreach (FieldInfo fi in fieldInfos)
                {
                    long dvGen = fi.DocValuesGen;
                    if (dvGen != -1 && !newGenUpdatesFiles.ContainsKey(dvGen))
                    {
                        if (dvGen == fieldInfosGen)
                        {
                            newGenUpdatesFiles[fieldInfosGen] = trackingDir.CreatedFiles;
                        }
                        else
                        {
                            newGenUpdatesFiles[dvGen] = genUpdatesFiles[dvGen];
                        }
                    }
                }

                Info.SetGenUpdatesFiles(newGenUpdatesFiles);

                // wrote new files, should checkpoint()
                writer.Checkpoint();

                // if there is a reader open, reopen it to reflect the updates
                if (reader != null)
                {
                    SegmentReader newReader = new SegmentReader(Info, reader, liveDocs, Info.Info.DocCount - Info.DelCount - pendingDeleteCount);
                    bool          reopened  = false;
                    try
                    {
                        reader.DecRef();
                        reader   = newReader;
                        reopened = true;
                    }
                    finally
                    {
                        if (!reopened)
                        {
                            newReader.DecRef();
                        }
                    }
                }
            }
            finally
            {
                UninterruptableMonitor.Exit(this);
            }
        }
Esempio n. 17
0
 public abstract void ProcessDocument(FieldInfos.Builder fieldInfos);
Esempio n. 18
0
        public override void ProcessDocument(FieldInfos.Builder fieldInfos)
        {
            consumer.StartDocument();
            storedConsumer.StartDocument();

            fieldCount = 0;

            int thisFieldGen = fieldGen++;

            // Absorb any new fields first seen in this document.
            // Also absorb any changes to fields we had already
            // seen before (eg suddenly turning on norms or
            // vectors, etc.):

            foreach (IIndexableField field in docState.doc)
            {
                string fieldName = field.Name;

                // Make sure we have a PerField allocated
                int hashPos = fieldName.GetHashCode() & hashMask;
                DocFieldProcessorPerField fp = fieldHash[hashPos];
                while (fp != null && !fp.fieldInfo.Name.Equals(fieldName, StringComparison.Ordinal))
                {
                    fp = fp.next;
                }

                if (fp == null)
                {
                    // TODO FI: we need to genericize the "flags" that a
                    // field holds, and, how these flags are merged; it
                    // needs to be more "pluggable" such that if I want
                    // to have a new "thing" my Fields can do, I can
                    // easily add it
                    FieldInfo fi = fieldInfos.AddOrUpdate(fieldName, field.IndexableFieldType);

                    fp                 = new DocFieldProcessorPerField(this, fi);
                    fp.next            = fieldHash[hashPos];
                    fieldHash[hashPos] = fp;
                    totalFieldCount++;

                    if (totalFieldCount >= fieldHash.Length / 2)
                    {
                        Rehash();
                    }
                }
                else
                {
                    // need to addOrUpdate so that FieldInfos can update globalFieldNumbers
                    // with the correct DocValue type (LUCENE-5192)
                    FieldInfo fi = fieldInfos.AddOrUpdate(fieldName, field.IndexableFieldType);
                    Debug.Assert(fi == fp.fieldInfo, "should only have updated an existing FieldInfo instance");
                }

                if (thisFieldGen != fp.lastGen)
                {
                    // First time we're seeing this field for this doc
                    fp.fieldCount = 0;

                    if (fieldCount == fields.Length)
                    {
                        int newSize = fields.Length * 2;
                        DocFieldProcessorPerField[] newArray = new DocFieldProcessorPerField[newSize];
                        Array.Copy(fields, 0, newArray, 0, fieldCount);
                        fields = newArray;
                    }

                    fields[fieldCount++] = fp;
                    fp.lastGen           = thisFieldGen;
                }

                fp.AddField(field);
                storedConsumer.AddField(docState.docID, field, fp.fieldInfo);
            }

            // If we are writing vectors then we must visit
            // fields in sorted order so they are written in
            // sorted order.  TODO: we actually only need to
            // sort the subset of fields that have vectors
            // enabled; we could save [small amount of] CPU
            // here.
            ArrayUtil.IntroSort(fields, 0, fieldCount, fieldsComp);
            for (int i = 0; i < fieldCount; i++)
            {
                DocFieldProcessorPerField perField = fields[i];
                perField.consumer.ProcessFields(perField.fields, perField.fieldCount);
            }
        }
Esempio n. 19
0
        // Writes field updates (new _X_N updates files) to the directory
        public virtual void WriteFieldUpdates(Directory dir, DocValuesFieldUpdates.Container dvUpdates)
        {
            lock (this)
            {
                //Debug.Assert(Thread.holdsLock(Writer));
                //System.out.println("rld.writeFieldUpdates: seg=" + info + " numericFieldUpdates=" + numericFieldUpdates);

                Debug.Assert(dvUpdates.Any());

                // Do this so we can delete any created files on
                // exception; this saves all codecs from having to do
                // it:
                TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir);

                FieldInfos fieldInfos = null;
                bool       success    = false;
                try
                {
                    Codec codec = Info.Info.Codec;

                    // reader could be null e.g. for a just merged segment (from
                    // IndexWriter.commitMergedDeletes).
                    SegmentReader reader = this.Reader == null ? new SegmentReader(Info, Writer.Config.ReaderTermsIndexDivisor, IOContext.READONCE) : this.Reader;
                    try
                    {
                        // clone FieldInfos so that we can update their dvGen separately from
                        // the reader's infos and write them to a new fieldInfos_gen file
                        FieldInfos.Builder builder = new FieldInfos.Builder(Writer.GlobalFieldNumberMap);
                        // cannot use builder.add(reader.getFieldInfos()) because it does not
                        // clone FI.attributes as well FI.dvGen
                        foreach (FieldInfo fi in reader.FieldInfos)
                        {
                            FieldInfo clone = builder.Add(fi);
                            // copy the stuff FieldInfos.Builder doesn't copy
                            if (fi.Attributes() != null)
                            {
                                foreach (KeyValuePair <string, string> e in fi.Attributes())
                                {
                                    clone.PutAttribute(e.Key, e.Value);
                                }
                            }
                            clone.DocValuesGen = fi.DocValuesGen;
                        }
                        // create new fields or update existing ones to have NumericDV type
                        foreach (string f in dvUpdates.NumericDVUpdates.Keys)
                        {
                            builder.AddOrUpdate(f, NumericDocValuesField.TYPE);
                        }
                        // create new fields or update existing ones to have BinaryDV type
                        foreach (string f in dvUpdates.BinaryDVUpdates.Keys)
                        {
                            builder.AddOrUpdate(f, BinaryDocValuesField.fType);
                        }

                        fieldInfos = builder.Finish();
                        long              nextFieldInfosGen     = Info.NextFieldInfosGen;
                        string            segmentSuffix         = nextFieldInfosGen.ToString(CultureInfo.InvariantCulture);//Convert.ToString(nextFieldInfosGen, Character.MAX_RADIX));
                        SegmentWriteState state                 = new SegmentWriteState(null, trackingDir, Info.Info, fieldInfos, Writer.Config.TermIndexInterval, null, IOContext.DEFAULT, segmentSuffix);
                        DocValuesFormat   docValuesFormat       = codec.DocValuesFormat();
                        DocValuesConsumer fieldsConsumer        = docValuesFormat.FieldsConsumer(state);
                        bool              fieldsConsumerSuccess = false;
                        try
                        {
                            //          System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeFieldUpdates: applying numeric updates; seg=" + info + " updates=" + numericFieldUpdates);
                            foreach (KeyValuePair <string, NumericDocValuesFieldUpdates> e in dvUpdates.NumericDVUpdates)
                            {
                                string field = e.Key;
                                NumericDocValuesFieldUpdates fieldUpdates = e.Value;
                                FieldInfo fieldInfo = fieldInfos.FieldInfo(field);
                                Debug.Assert(fieldInfo != null);

                                fieldInfo.DocValuesGen = nextFieldInfosGen;
                                // write the numeric updates to a new gen'd docvalues file
                                fieldsConsumer.AddNumericField(fieldInfo, GetLongEnumerable(reader, field, fieldUpdates));
                            }

                            //        System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " updates=" + dvUpdates.binaryDVUpdates);
                            foreach (KeyValuePair <string, BinaryDocValuesFieldUpdates> e in dvUpdates.BinaryDVUpdates)
                            {
                                string field = e.Key;
                                BinaryDocValuesFieldUpdates dvFieldUpdates = e.Value;
                                FieldInfo fieldInfo = fieldInfos.FieldInfo(field);
                                Debug.Assert(fieldInfo != null);

                                //          System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " f=" + dvFieldUpdates + ", updates=" + dvFieldUpdates);

                                fieldInfo.DocValuesGen = nextFieldInfosGen;
                                // write the numeric updates to a new gen'd docvalues file
                                fieldsConsumer.AddBinaryField(fieldInfo, GetBytesRefEnumerable(reader, field, dvFieldUpdates));
                            }

                            codec.FieldInfosFormat().FieldInfosWriter.Write(trackingDir, Info.Info.Name, segmentSuffix, fieldInfos, IOContext.DEFAULT);
                            fieldsConsumerSuccess = true;
                        }
                        finally
                        {
                            if (fieldsConsumerSuccess)
                            {
                                fieldsConsumer.Dispose();
                            }
                            else
                            {
                                IOUtils.CloseWhileHandlingException(fieldsConsumer);
                            }
                        }
                    }
                    finally
                    {
                        if (reader != this.Reader)
                        {
                            //          System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeLiveDocs: closeReader " + reader);
                            reader.Dispose();
                        }
                    }

                    success = true;
                }
                finally
                {
                    if (!success)
                    {
                        // Advance only the nextWriteDocValuesGen so that a 2nd
                        // attempt to write will write to a new file
                        Info.AdvanceNextWriteFieldInfosGen();

                        // Delete any partially created file(s):
                        foreach (string fileName in trackingDir.CreatedFiles)
                        {
                            try
                            {
                                dir.DeleteFile(fileName);
                            }
                            catch (Exception)
                            {
                                // Ignore so we throw only the first exc
                            }
                        }
                    }
                }

                Info.AdvanceFieldInfosGen();
                // copy all the updates to mergingUpdates, so they can later be applied to the merged segment
                if (IsMerging)
                {
                    foreach (KeyValuePair <string, NumericDocValuesFieldUpdates> e in dvUpdates.NumericDVUpdates)
                    {
                        DocValuesFieldUpdates updates;
                        if (!MergingDVUpdates.TryGetValue(e.Key, out updates))
                        {
                            MergingDVUpdates[e.Key] = e.Value;
                        }
                        else
                        {
                            updates.Merge(e.Value);
                        }
                    }
                    foreach (KeyValuePair <string, BinaryDocValuesFieldUpdates> e in dvUpdates.BinaryDVUpdates)
                    {
                        DocValuesFieldUpdates updates;
                        if (!MergingDVUpdates.TryGetValue(e.Key, out updates))
                        {
                            MergingDVUpdates[e.Key] = e.Value;
                        }
                        else
                        {
                            updates.Merge(e.Value);
                        }
                    }
                }

                // create a new map, keeping only the gens that are in use
                IDictionary <long, ISet <string> > genUpdatesFiles    = Info.UpdatesFiles;
                IDictionary <long, ISet <string> > newGenUpdatesFiles = new Dictionary <long, ISet <string> >();
                long fieldInfosGen = Info.FieldInfosGen;
                foreach (FieldInfo fi in fieldInfos)
                {
                    long dvGen = fi.DocValuesGen;
                    if (dvGen != -1 && !newGenUpdatesFiles.ContainsKey(dvGen))
                    {
                        if (dvGen == fieldInfosGen)
                        {
                            newGenUpdatesFiles[fieldInfosGen] = trackingDir.CreatedFiles;
                        }
                        else
                        {
                            newGenUpdatesFiles[dvGen] = genUpdatesFiles[dvGen];
                        }
                    }
                }

                Info.GenUpdatesFiles = newGenUpdatesFiles;

                // wrote new files, should checkpoint()
                Writer.Checkpoint();

                // if there is a reader open, reopen it to reflect the updates
                if (Reader != null)
                {
                    SegmentReader newReader = new SegmentReader(Info, Reader, LiveDocs_Renamed, Info.Info.DocCount - Info.DelCount - PendingDeleteCount_Renamed);
                    bool          reopened  = false;
                    try
                    {
                        Reader.DecRef();
                        Reader   = newReader;
                        reopened = true;
                    }
                    finally
                    {
                        if (!reopened)
                        {
                            newReader.DecRef();
                        }
                    }
                }
            }
        }