public override void Flush(IDictionary<DocFieldConsumerPerThread, ICollection<DocFieldConsumerPerField>> threadsAndFields, SegmentWriteState state)
		{

            var childThreadsAndFields = new HashMap<InvertedDocConsumerPerThread, ICollection<InvertedDocConsumerPerField>>();
            var endChildThreadsAndFields = new HashMap<InvertedDocEndConsumerPerThread, ICollection<InvertedDocEndConsumerPerField>>();

            foreach (var entry in threadsAndFields)
			{
				var perThread = (DocInverterPerThread) entry.Key;

				ICollection<InvertedDocConsumerPerField> childFields = new HashSet<InvertedDocConsumerPerField>();
				ICollection<InvertedDocEndConsumerPerField> endChildFields = new HashSet<InvertedDocEndConsumerPerField>();
				foreach(DocFieldConsumerPerField field in entry.Value)
				{
                    var perField = (DocInverterPerField)field;
					childFields.Add(perField.consumer);
					endChildFields.Add(perField.endConsumer);
				}
				
				childThreadsAndFields[perThread.consumer] = childFields;
				endChildThreadsAndFields[perThread.endConsumer] = endChildFields;
			}
			
			consumer.Flush(childThreadsAndFields, state);
			endConsumer.Flush(endChildThreadsAndFields, state);
		}
		public override void  Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state)
		{
			lock (this)
			{
				
				if (tvx != null)
				{
					
					if (state.numDocsInStore > 0)
					// In case there are some final documents that we
					// didn't see (because they hit a non-aborting exception):
						Fill(state.numDocsInStore - docWriter.GetDocStoreOffset());
					
					tvx.Flush();
					tvd.Flush();
					tvf.Flush();
				}

                System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator();
				while (it.MoveNext())
				{
					System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) it.Current;
					System.Collections.IEnumerator it2 = ((System.Collections.ICollection) entry.Value).GetEnumerator();
					while (it2.MoveNext())
					{
						TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField) ((System.Collections.DictionaryEntry) it2.Current).Key;
						perField.termsHashPerField.Reset();
						perField.ShrinkHash();
					}
					
					TermVectorsTermsWriterPerThread perThread = (TermVectorsTermsWriterPerThread) entry.Key;
					perThread.termsHashPerThread.Reset(true);
				}
			}
		}
 public override void Flush(SegmentWriteState state)
 {
     int numDocs = state.SegmentInfo.DocCount;
     if (numDocs > 0)
     {
         // It's possible that all documents seen in this segment
         // hit non-aborting exceptions, in which case we will
         // not have yet init'd the FieldsWriter:
         InitFieldsWriter(state.Context);
         Fill(numDocs);
     }
     if (FieldsWriter != null)
     {
         bool success = false;
         try
         {
             FieldsWriter.Finish(state.FieldInfos, numDocs);
             success = true;
         }
         finally
         {
             if (success)
             {
                 IOUtils.Close(FieldsWriter);
             }
             else
             {
                 IOUtils.CloseWhileHandlingException(FieldsWriter);
             }
         }
     }
 }
Beispiel #4
0
        public override void Flush(Support.Dictionary<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>> threadsAndFields, SegmentWriteState state)
		{

            Support.Dictionary<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>> oneThreadsAndFields = new Support.Dictionary<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>>();
            Support.Dictionary<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>> twoThreadsAndFields = new Support.Dictionary<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>>();

            foreach (KeyValuePair<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>> entry in new Support.Dictionary<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>>(threadsAndFields))
            {
            	DocFieldConsumersPerThread perThread = (DocFieldConsumersPerThread) entry.Key;
				
				IList<DocFieldConsumerPerField> fields = entry.Value;

                //IEnumerator<DocFieldConsumerPerField> fieldsIt = fields.GetEnumerator();
                IList<DocFieldConsumerPerField> oneFields = new List<DocFieldConsumerPerField>();
                IList<DocFieldConsumerPerField> twoFields = new List<DocFieldConsumerPerField>();
                foreach (DocFieldConsumersPerField perField in fields)
                {
                    oneFields.Add(perField.one);
                    twoFields.Add(perField.two);
                }
                				
				oneThreadsAndFields[perThread.one] = oneFields;
				twoThreadsAndFields[perThread.two] = twoFields;
			}
						
			one.Flush(oneThreadsAndFields, state);
			two.Flush(twoThreadsAndFields, state);
		}
        public void CloseDocStore(SegmentWriteState state)
        {
            lock (this)
            {
                int inc = state.numDocsInStore - lastDocID;
                if (inc > 0)
                {
                    InitFieldsWriter();
                    Fill(state.numDocsInStore - docWriter.GetDocStoreOffset());
                }

                if (fieldsWriter != null)
                {
                    fieldsWriter.Close();
                    fieldsWriter = null;
                    lastDocID = 0;
                    System.Diagnostics.Debug.Assert(state.docStoreSegmentName != null);
                    SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, state.docStoreSegmentName + "." + IndexFileNames.FIELDS_EXTENSION);
                    SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION);

                    state.docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.FIELDS_EXTENSION);
                    state.docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION);

                    System.String fileName = state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION;

                    if (4 + ((long) state.numDocsInStore) * 8 != state.directory.FileLength(fileName))
                        throw new System.SystemException("after flush: fdx size mismatch: " + state.numDocsInStore + " docs vs " + state.directory.FileLength(fileName) + " length in bytes of " + fileName + " file exists?=" + state.directory.FileExists(fileName));
                }
            }
        }
		public override void  Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state)
		{
			
			System.Collections.IDictionary childThreadsAndFields = new System.Collections.Hashtable();
			System.Collections.IDictionary endChildThreadsAndFields = new System.Collections.Hashtable();
			
			System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator();
			while (it.MoveNext())
			{
				
				System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) it.Current;
				
				DocInverterPerThread perThread = (DocInverterPerThread) entry.Key;
				
				System.Collections.ICollection fields = (System.Collections.ICollection) entry.Value;
				
				System.Collections.IEnumerator fieldsIt = fields.GetEnumerator();
				System.Collections.Hashtable childFields = new System.Collections.Hashtable();
				System.Collections.Hashtable endChildFields = new System.Collections.Hashtable();
				while (fieldsIt.MoveNext())
				{
					DocInverterPerField perField = (DocInverterPerField) ((System.Collections.DictionaryEntry) fieldsIt.Current).Key;
					childFields[perField.consumer] = perField.consumer;
					endChildFields[perField.endConsumer] = perField.endConsumer;
				}
				
				childThreadsAndFields[perThread.consumer] = childFields;
				endChildThreadsAndFields[perThread.endConsumer] = endChildFields;
			}
			
			consumer.Flush(childThreadsAndFields, state);
			endConsumer.Flush(endChildThreadsAndFields, state);
		}
		public override void  Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state)
		{
			
			System.Collections.IDictionary oneThreadsAndFields = new System.Collections.Hashtable();
			System.Collections.IDictionary twoThreadsAndFields = new System.Collections.Hashtable();
			
			System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator();
			while (it.MoveNext())
			{
				
				System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) it.Current;
				
				DocFieldConsumersPerThread perThread = (DocFieldConsumersPerThread) entry.Key;
				
				System.Collections.ICollection fields = (System.Collections.ICollection) entry.Value;
				
				System.Collections.IEnumerator fieldsIt = fields.GetEnumerator();
				System.Collections.Hashtable oneFields = new System.Collections.Hashtable();
				System.Collections.Hashtable twoFields = new System.Collections.Hashtable();
				while (fieldsIt.MoveNext())
				{
					DocFieldConsumersPerField perField = (DocFieldConsumersPerField) fieldsIt.Current;
					SupportClass.CollectionsHelper.AddIfNotContains(oneFields, perField.one);
					SupportClass.CollectionsHelper.AddIfNotContains(twoFields, perField.two);
				}
				
				oneThreadsAndFields[perThread.one] = oneFields;
				twoThreadsAndFields[perThread.two] = twoFields;
			}
			
			
			one.Flush(oneThreadsAndFields, state);
			two.Flush(twoThreadsAndFields, state);
		}
        public override FieldsConsumer FieldsConsumer(SegmentWriteState state)
        {
            PostingsWriterBase docsWriter = null;
            PostingsWriterBase pulsingWriterInner = null;
            PostingsWriterBase pulsingWriter = null;

            // Terms dict
            bool success = false;
            try
            {
                docsWriter = new Lucene41PostingsWriter(state);

                pulsingWriterInner = new PulsingPostingsWriter(state, 2, docsWriter);
                pulsingWriter = new PulsingPostingsWriter(state, 1, pulsingWriterInner);
                FieldsConsumer ret = new BlockTreeTermsWriter(state, pulsingWriter,
                    BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
                success = true;
                return ret;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.CloseWhileHandlingException(docsWriter, pulsingWriterInner, pulsingWriter);
                }
            }
        }
 internal FormatPostingsTermsWriter(SegmentWriteState state, FormatPostingsFieldsWriter parent)
     : base()
 {
     this.parent = parent;
     termsOut = parent.termsOut;
     docsWriter = new FormatPostingsDocsWriter(state, this);
 }
        public override void Flush(IDictionary<DocFieldConsumerPerThread, ICollection<DocFieldConsumerPerField>> threadsAndFields, SegmentWriteState state)
		{

            var oneThreadsAndFields = new HashMap<DocFieldConsumerPerThread, ICollection<DocFieldConsumerPerField>>();
			var twoThreadsAndFields = new HashMap<DocFieldConsumerPerThread, ICollection<DocFieldConsumerPerField>>();
			
			foreach(var entry in threadsAndFields)
			{
				DocFieldConsumersPerThread perThread = (DocFieldConsumersPerThread) entry.Key;
                ICollection<DocFieldConsumerPerField> fields = entry.Value;

                IEnumerator<DocFieldConsumerPerField> fieldsIt = fields.GetEnumerator();
                ICollection<DocFieldConsumerPerField> oneFields = new HashSet<DocFieldConsumerPerField>();
                ICollection<DocFieldConsumerPerField> twoFields = new HashSet<DocFieldConsumerPerField>();
				while (fieldsIt.MoveNext())
				{
					DocFieldConsumersPerField perField = (DocFieldConsumersPerField) fieldsIt.Current;
					oneFields.Add(perField.one);
					twoFields.Add(perField.two);
				}
				
				oneThreadsAndFields[perThread.one] = oneFields;
				twoThreadsAndFields[perThread.two] = twoFields;
			}
			
			
			one.Flush(oneThreadsAndFields, state);
			two.Flush(twoThreadsAndFields, state);
		}
Beispiel #11
0
        public override void Flush(Support.Dictionary<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>> threadsAndFields, SegmentWriteState state)
		{

            Support.Dictionary<InvertedDocConsumerPerThread, IList<InvertedDocConsumerPerField>> childThreadsAndFields = new Support.Dictionary<InvertedDocConsumerPerThread, IList<InvertedDocConsumerPerField>>();
            Support.Dictionary<InvertedDocEndConsumerPerThread, IList<InvertedDocEndConsumerPerField>> endChildThreadsAndFields = new Support.Dictionary<InvertedDocEndConsumerPerThread, IList<InvertedDocEndConsumerPerField>>();

            foreach (KeyValuePair<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>> entry in new Support.Dictionary<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>>(threadsAndFields))
            {
                DocInverterPerThread perThread = (DocInverterPerThread)entry.Key;
                                
                List<InvertedDocConsumerPerField> childFields = new List<InvertedDocConsumerPerField>();
                List<InvertedDocEndConsumerPerField> endChildFields = new List<InvertedDocEndConsumerPerField>();
                foreach (DocFieldConsumerPerField field in entry.Value)
                {
                    DocInverterPerField perField = (DocInverterPerField)field;
                    childFields.Add(perField.consumer);
                    endChildFields.Add(perField.endConsumer);
                }

                childThreadsAndFields[perThread.consumer] = childFields;
                endChildThreadsAndFields[perThread.endConsumer] = endChildFields;
            }
			
			consumer.Flush(childThreadsAndFields, state);
			endConsumer.Flush(endChildThreadsAndFields, state);
		}
	    public override void  CloseDocStore(SegmentWriteState state)
		{
			try
			{
				one.CloseDocStore(state);
			}
			finally
			{
				two.CloseDocStore(state);
			}
		}
Beispiel #13
0
        public override void Flush(IDictionary<string, DocFieldConsumerPerField> fieldsToFlush, SegmentWriteState state)
        {
            IDictionary<string, InvertedDocConsumerPerField> childFieldsToFlush = new Dictionary<string, InvertedDocConsumerPerField>();
            IDictionary<string, InvertedDocEndConsumerPerField> endChildFieldsToFlush = new Dictionary<string, InvertedDocEndConsumerPerField>();

            foreach (KeyValuePair<string, DocFieldConsumerPerField> fieldToFlush in fieldsToFlush)
            {
                DocInverterPerField perField = (DocInverterPerField)fieldToFlush.Value;
                childFieldsToFlush[fieldToFlush.Key] = perField.Consumer;
                endChildFieldsToFlush[fieldToFlush.Key] = perField.EndConsumer;
            }

            Consumer.Flush(childFieldsToFlush, state);
            EndConsumer.Flush(endChildFieldsToFlush, state);
        }
		internal FormatPostingsDocsWriter(SegmentWriteState state, FormatPostingsTermsWriter parent):base()
		{
			this.parent = parent;
			System.String fileName = IndexFileNames.SegmentFileName(parent.parent.segment, IndexFileNames.FREQ_EXTENSION);
			SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, fileName);
			out_Renamed = parent.parent.dir.CreateOutput(fileName);
			totalNumDocs = parent.parent.totalNumDocs;
			
			// TODO: abstraction violation
			skipInterval = parent.parent.termsOut.skipInterval;
			skipListWriter = parent.parent.skipListWriter;
			skipListWriter.SetFreqOutput(out_Renamed);
			
			posWriter = new FormatPostingsPositionsWriter(state, this);
		}
        public override FieldsConsumer FieldsConsumer(SegmentWriteState state)
        {
            PostingsWriterBase docs = new Lucene41PostingsWriter(state);

            // TODO: should we make the terms index more easily
            // pluggable?  Ie so that this codec would record which
            // index impl was used, and switch on loading?
            // Or... you must make a new Codec for this?
            TermsIndexWriterBase indexWriter;
            bool success = false;
            try
            {
                indexWriter = new FixedGapTermsIndexWriter(state);
                success = true;
            }
            finally
            {
                if (!success)
                {
                    docs.Dispose();
                }
            }

            success = false;
            try
            {
                // Must use BlockTermsWriter (not BlockTree) because
                // BlockTree doens't support ords (yet)...
                FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, docs);
                success = true;
                return ret;
            }
            finally
            {
                if (!success)
                {
                    try
                    {
                        docs.Dispose();
                    }
                    finally
                    {
                        indexWriter.Dispose();
                    }
                }
            }
        }
		internal FormatPostingsPositionsWriter(SegmentWriteState state, FormatPostingsDocsWriter parent)
		{
			this.parent = parent;
			omitTermFreqAndPositions = parent.omitTermFreqAndPositions;
			if (parent.parent.parent.fieldInfos.HasProx())
			{
				// At least one field does not omit TF, so create the
				// prox file
				System.String fileName = IndexFileNames.SegmentFileName(parent.parent.parent.segment, IndexFileNames.PROX_EXTENSION);
				state.flushedFiles.Add(fileName);
				out_Renamed = parent.parent.parent.dir.CreateOutput(fileName);
				parent.skipListWriter.SetProxOutput(out_Renamed);
			}
			// Every field omits TF so we will write no prox file
			else
				out_Renamed = null;
		}
Beispiel #17
0
        internal override void Flush(IDictionary<string, InvertedDocEndConsumerPerField> fieldsToFlush, SegmentWriteState state)
        {
            bool success = false;
            DocValuesConsumer normsConsumer = null;
            try
            {
                if (state.FieldInfos.HasNorms())
                {
                    NormsFormat normsFormat = state.SegmentInfo.Codec.NormsFormat();
                    Debug.Assert(normsFormat != null);
                    normsConsumer = normsFormat.NormsConsumer(state);

                    foreach (FieldInfo fi in state.FieldInfos)
                    {
                        NormsConsumerPerField toWrite = (NormsConsumerPerField)fieldsToFlush[fi.Name];
                        // we must check the final value of omitNorms for the fieldinfo, it could have
                        // changed for this field since the first time we added it.
                        if (!fi.OmitsNorms())
                        {
                            if (toWrite != null && !toWrite.Empty)
                            {
                                toWrite.Flush(state, normsConsumer);
                                Debug.Assert(fi.NormType == DocValuesType.NUMERIC);
                            }
                            else if (fi.Indexed)
                            {
                                Debug.Assert(fi.NormType == null, "got " + fi.NormType + "; field=" + fi.Name);
                            }
                        }
                    }
                }
                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Close(normsConsumer);
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(normsConsumer);
                }
            }
        }
        public override void Flush(IDictionary<DocConsumerPerThread, DocConsumerPerThread> threads, SegmentWriteState state)
		{

            Support.Dictionary<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>> childThreadsAndFields = new Support.Dictionary<DocFieldConsumerPerThread, IList<DocFieldConsumerPerField>>();
            foreach (DocFieldProcessorPerThread perThread in threads.Keys)
            {
                childThreadsAndFields[perThread.consumer] = perThread.Fields();
                perThread.TrimFields(state);
            }
			fieldsWriter.Flush(state);
			consumer.Flush(childThreadsAndFields, state);
			
			// Important to save after asking consumer to flush so
			// consumer can alter the FieldInfo* if necessary.  EG,
			// FreqProxTermsWriter does this with
			// FieldInfo.storePayload.
			System.String fileName = state.SegmentFileName(IndexFileNames.FIELD_INFOS_EXTENSION);
			fieldInfos.Write(state.directory, fileName);
            state.flushedFiles.Add(fileName);
		}
		public FormatPostingsFieldsWriter(SegmentWriteState state, FieldInfos fieldInfos):base()
		{
			
			dir = state.directory;
			segment = state.segmentName;
			totalNumDocs = state.numDocs;
			this.fieldInfos = fieldInfos;
			termsOut = new TermInfosWriter(dir, segment, fieldInfos, state.termIndexInterval);
			
			// TODO: this is a nasty abstraction violation (that we
			// peek down to find freqOut/proxOut) -- we need a
			// better abstraction here whereby these child consumers
			// can provide skip data or not
			skipListWriter = new DefaultSkipListWriter(termsOut.skipInterval, termsOut.maxSkipLevels, totalNumDocs, null, null);
			
			SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, state.SegmentFileName(IndexFileNames.TERMS_EXTENSION));
			SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, state.SegmentFileName(IndexFileNames.TERMS_INDEX_EXTENSION));
			
			termsWriter = new FormatPostingsTermsWriter(state, this);
		}
        public override FieldsConsumer FieldsConsumer(SegmentWriteState state)
        {
            PostingsWriterBase postingsWriter = new SepPostingsWriter(state, new MockIntFactory(baseBlockSize));

            bool success = false;
            TermsIndexWriterBase indexWriter;
            try
            {
                indexWriter = new FixedGapTermsIndexWriter(state);
                success = true;
            }
            finally
            {
                if (!success)
                {
                    postingsWriter.Dispose();
                }
            }

            success = false;
            try
            {
                FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter);
                success = true;
                return ret;
            }
            finally
            {
                if (!success)
                {
                    try
                    {
                        postingsWriter.Dispose();
                    }
                    finally
                    {
                        indexWriter.Dispose();
                    }
                }
            }
        }
		public override void  Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state)
		{
			lock (this)
			{
                // NOTE: it's possible that all documents seen in this segment
                // hit non-aborting exceptions, in which case we will
                // not have yet init'd the TermVectorsWriter.  This is
                // actually OK (unlike in the stored fields case)
                // because, although IieldInfos.hasVectors() will return
                // true, the TermVectorsReader gracefully handles
                // non-existence of the term vectors files.
				if (tvx != null)
				{
					
					if (state.numDocsInStore > 0)
					// In case there are some final documents that we
					// didn't see (because they hit a non-aborting exception):
						Fill(state.numDocsInStore - docWriter.GetDocStoreOffset());
					
					tvx.Flush();
					tvd.Flush();
					tvf.Flush();
				}

                System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator();
				while (it.MoveNext())
				{
					System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) it.Current;
					System.Collections.IEnumerator it2 = ((System.Collections.ICollection) entry.Value).GetEnumerator();
					while (it2.MoveNext())
					{
						TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField) ((System.Collections.DictionaryEntry) it2.Current).Key;
						perField.termsHashPerField.Reset();
						perField.ShrinkHash();
					}
					
					TermVectorsTermsWriterPerThread perThread = (TermVectorsTermsWriterPerThread) entry.Key;
					perThread.termsHashPerThread.Reset(true);
				}
			}
		}
        public override void Flush(System.Collections.ICollection threads, SegmentWriteState state)
        {
            System.Collections.IDictionary childThreadsAndFields = new System.Collections.Hashtable();
            System.Collections.IEnumerator it = threads.GetEnumerator();
            while (it.MoveNext())
            {
                DocFieldProcessorPerThread perThread = (DocFieldProcessorPerThread) ((System.Collections.DictionaryEntry) it.Current).Key;
                childThreadsAndFields[perThread.consumer] = perThread.Fields();
                perThread.TrimFields(state);
            }
            fieldsWriter.Flush(state);
            consumer.Flush(childThreadsAndFields, state);

            // Important to save after asking consumer to flush so
            // consumer can alter the FieldInfo* if necessary.  EG,
            // FreqProxTermsWriter does this with
            // FieldInfo.storePayload.
            System.String fileName = state.SegmentFileName(IndexFileNames.FIELD_INFOS_EXTENSION);
            fieldInfos.Write(state.directory, fileName);
            SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, fileName);
        }
		public void  Flush(SegmentWriteState state)
		{
			lock (this)
			{
				
				if (state.numDocsInStore > 0)
				{
					// It's possible that all documents seen in this segment
					// hit non-aborting exceptions, in which case we will
					// not have yet init'd the FieldsWriter:
					InitFieldsWriter();
					
					// Fill fdx file to include any final docs that we
					// skipped because they hit non-aborting exceptions
					Fill(state.numDocsInStore - docWriter.DocStoreOffset);
				}
				
				if (fieldsWriter != null)
					fieldsWriter.Flush();
			}
		}
        public override void Flush(Support.Dictionary<TermsHashConsumerPerThread, IList<TermsHashConsumerPerField>> threadsAndFields, SegmentWriteState state)
		{
			lock (this)
			{
                // NOTE: it's possible that all documents seen in this segment
                // hit non-aborting exceptions, in which case we will
                // not have yet init'd the TermVectorsWriter.  This is
                // actually OK (unlike in the stored fields case)
                // because, although IieldInfos.hasVectors() will return
                // true, the TermVectorsReader gracefully handles
                // non-existence of the term vectors files.
				if (tvx != null)
				{
					
					if (state.numDocsInStore > 0)
					// In case there are some final documents that we
					// didn't see (because they hit a non-aborting exception):
						Fill(state.numDocsInStore - docWriter.GetDocStoreOffset());
					
					tvx.Flush();
					tvd.Flush();
					tvf.Flush();
				}

                foreach(KeyValuePair<TermsHashConsumerPerThread,IList<TermsHashConsumerPerField>> entry in threadsAndFields) {
                    foreach (TermsHashConsumerPerField field in entry.Value ) 
                    {
                        TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField) field;
                        perField.termsHashPerField.Reset();
                        perField.ShrinkHash();
                    }
					
					
					TermVectorsTermsWriterPerThread perThread = (TermVectorsTermsWriterPerThread) entry.Key;
					perThread.termsHashPerThread.Reset(true);
				}
			}
		}
        // TODO: would be nice to factor out more of this, eg the
        // FreqProxFieldMergeState, and code to visit all Fields
        // under the same FieldInfo together, up into TermsHash*.
        // Other writers would presumably share alot of this...

        public override void  Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state)
        {
            // Gather all FieldData's that have postings, across all
            // ThreadStates
            System.Collections.ArrayList allFields = new System.Collections.ArrayList();

            System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator();
            while (it.MoveNext())
            {
                System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry)it.Current;

                System.Collections.ICollection fields = (System.Collections.ICollection)entry.Value;

                System.Collections.IEnumerator fieldsIt = fields.GetEnumerator();

                while (fieldsIt.MoveNext())
                {
                    FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField)((System.Collections.DictionaryEntry)fieldsIt.Current).Key;
                    if (perField.termsHashPerField.numPostings > 0)
                    {
                        allFields.Add(perField);
                    }
                }
            }

            // Sort by field name
            allFields.Sort();
            int numAllFields = allFields.Count;

            // TODO: allow Lucene user to customize this consumer:
            FormatPostingsFieldsConsumer consumer = new FormatPostingsFieldsWriter(state, fieldInfos);

            /*
             * Current writer chain:
             * FormatPostingsFieldsConsumer
             * -> IMPL: FormatPostingsFieldsWriter
             * -> FormatPostingsTermsConsumer
             * -> IMPL: FormatPostingsTermsWriter
             * -> FormatPostingsDocConsumer
             * -> IMPL: FormatPostingsDocWriter
             * -> FormatPostingsPositionsConsumer
             * -> IMPL: FormatPostingsPositionsWriter
             */

            int start = 0;

            while (start < numAllFields)
            {
                FieldInfo     fieldInfo = ((FreqProxTermsWriterPerField)allFields[start]).fieldInfo;
                System.String fieldName = fieldInfo.name;

                int end = start + 1;
                while (end < numAllFields && ((FreqProxTermsWriterPerField)allFields[end]).fieldInfo.name.Equals(fieldName))
                {
                    end++;
                }

                FreqProxTermsWriterPerField[] fields = new FreqProxTermsWriterPerField[end - start];
                for (int i = start; i < end; i++)
                {
                    fields[i - start] = (FreqProxTermsWriterPerField)allFields[i];

                    // Aggregate the storePayload as seen by the same
                    // field across multiple threads
                    fieldInfo.storePayloads |= fields[i - start].hasPayloads;
                }

                // If this field has postings then add them to the
                // segment
                AppendPostings(fields, consumer);

                for (int i = 0; i < fields.Length; i++)
                {
                    TermsHashPerField perField = fields[i].termsHashPerField;
                    int numPostings            = perField.numPostings;
                    perField.Reset();
                    perField.ShrinkHash(numPostings);
                    fields[i].Reset();
                }

                start = end;
            }

            it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator();
            while (it.MoveNext())
            {
                System.Collections.DictionaryEntry entry     = (System.Collections.DictionaryEntry)it.Current;
                FreqProxTermsWriterPerThread       perThread = (FreqProxTermsWriterPerThread)entry.Key;
                perThread.termsHashPerThread.Reset(true);
            }

            consumer.Finish();
        }
Beispiel #26
0
        internal MergeState Merge()
        {
            if (!ShouldMerge)
            {
                throw new InvalidOperationException("Merge would result in 0 document segment");
            }
            // NOTE: it's important to add calls to
            // checkAbort.work(...) if you make any changes to this
            // method that will spend alot of time.  The frequency
            // of this check impacts how long
            // IndexWriter.close(false) takes to actually stop the
            // threads.
            MergeFieldInfos();
            SetMatchingSegmentReaders();
            long t0 = 0;

            if (mergeState.InfoStream.IsEnabled("SM"))
            {
                t0 = Time.NanoTime();
            }
            int numMerged = MergeFields();

            if (mergeState.InfoStream.IsEnabled("SM"))
            {
                long t1 = Time.NanoTime();
                mergeState.InfoStream.Message("SM", ((t1 - t0) / 1000000) + " msec to merge stored fields [" + numMerged + " docs]");
            }
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(numMerged == mergeState.SegmentInfo.DocCount);
            }

            SegmentWriteState segmentWriteState = new SegmentWriteState(mergeState.InfoStream, directory, mergeState.SegmentInfo, mergeState.FieldInfos, termIndexInterval, null, context);

            if (mergeState.InfoStream.IsEnabled("SM"))
            {
                t0 = Time.NanoTime();
            }
            MergeTerms(segmentWriteState);
            if (mergeState.InfoStream.IsEnabled("SM"))
            {
                long t1 = Time.NanoTime();
                mergeState.InfoStream.Message("SM", ((t1 - t0) / 1000000) + " msec to merge postings [" + numMerged + " docs]");
            }

            if (mergeState.InfoStream.IsEnabled("SM"))
            {
                t0 = Time.NanoTime();
            }
            if (mergeState.FieldInfos.HasDocValues)
            {
                MergeDocValues(segmentWriteState);
            }
            if (mergeState.InfoStream.IsEnabled("SM"))
            {
                long t1 = Time.NanoTime();
                mergeState.InfoStream.Message("SM", ((t1 - t0) / 1000000) + " msec to merge doc values [" + numMerged + " docs]");
            }

            if (mergeState.FieldInfos.HasNorms)
            {
                if (mergeState.InfoStream.IsEnabled("SM"))
                {
                    t0 = Time.NanoTime();
                }
                MergeNorms(segmentWriteState);
                if (mergeState.InfoStream.IsEnabled("SM"))
                {
                    long t1 = Time.NanoTime();
                    mergeState.InfoStream.Message("SM", ((t1 - t0) / 1000000) + " msec to merge norms [" + numMerged + " docs]");
                }
            }

            if (mergeState.FieldInfos.HasVectors)
            {
                if (mergeState.InfoStream.IsEnabled("SM"))
                {
                    t0 = Time.NanoTime();
                }
                numMerged = MergeVectors();
                if (mergeState.InfoStream.IsEnabled("SM"))
                {
                    long t1 = Time.NanoTime();
                    mergeState.InfoStream.Message("SM", ((t1 - t0) / 1000000) + " msec to merge vectors [" + numMerged + " docs]");
                }
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(numMerged == mergeState.SegmentInfo.DocCount);
                }
            }

            // write the merged infos
            FieldInfosWriter fieldInfosWriter = codec.FieldInfosFormat.FieldInfosWriter;

            fieldInfosWriter.Write(directory, mergeState.SegmentInfo.Name, "", mergeState.FieldInfos, context);

            return(mergeState);
        }
Beispiel #27
0
        internal virtual FlushedSegment Flush()
        {
            Debug.Assert(numDocsInRAM > 0);
            Debug.Assert(deleteSlice.IsEmpty, "all deletes must be applied in prepareFlush");
            segmentInfo.DocCount = numDocsInRAM;
            SegmentWriteState flushState  = new SegmentWriteState(infoStream, directory, segmentInfo, fieldInfos.Finish(), indexWriterConfig.TermIndexInterval, pendingUpdates, new IOContext(new FlushInfo(numDocsInRAM, BytesUsed)));
            double            startMBUsed = BytesUsed / 1024.0 / 1024.0;

            // Apply delete-by-docID now (delete-byDocID only
            // happens when an exception is hit processing that
            // doc, eg if analyzer has some problem w/ the text):
            if (pendingUpdates.docIDs.Count > 0)
            {
                flushState.LiveDocs = codec.LiveDocsFormat.NewLiveDocs(numDocsInRAM);
                foreach (int delDocID in pendingUpdates.docIDs)
                {
                    flushState.LiveDocs.Clear(delDocID);
                }
                flushState.DelCountOnFlush = pendingUpdates.docIDs.Count;
                pendingUpdates.bytesUsed.AddAndGet(-pendingUpdates.docIDs.Count * BufferedUpdates.BYTES_PER_DEL_DOCID);
                pendingUpdates.docIDs.Clear();
            }

            if (aborting)
            {
                if (infoStream.IsEnabled("DWPT"))
                {
                    infoStream.Message("DWPT", "flush: skip because aborting is set");
                }
                return(null);
            }

            if (infoStream.IsEnabled("DWPT"))
            {
                infoStream.Message("DWPT", "flush postings as segment " + flushState.SegmentInfo.Name + " numDocs=" + numDocsInRAM);
            }

            bool success = false;

            try
            {
                consumer.Flush(flushState);
                pendingUpdates.terms.Clear();
                segmentInfo.SetFiles(new HashSet <string>(directory.CreatedFiles));

                SegmentCommitInfo segmentInfoPerCommit = new SegmentCommitInfo(segmentInfo, 0, -1L, -1L);
                if (infoStream.IsEnabled("DWPT"))
                {
                    infoStream.Message("DWPT", "new segment has " + (flushState.LiveDocs == null ? 0 : (flushState.SegmentInfo.DocCount - flushState.DelCountOnFlush)) + " deleted docs");
                    infoStream.Message("DWPT", "new segment has " + (flushState.FieldInfos.HasVectors ? "vectors" : "no vectors") + "; " + (flushState.FieldInfos.HasNorms ? "norms" : "no norms") + "; " + (flushState.FieldInfos.HasDocValues ? "docValues" : "no docValues") + "; " + (flushState.FieldInfos.HasProx ? "prox" : "no prox") + "; " + (flushState.FieldInfos.HasFreq ? "freqs" : "no freqs"));
                    infoStream.Message("DWPT", "flushedFiles=" + Arrays.ToString(segmentInfoPerCommit.GetFiles()));
                    infoStream.Message("DWPT", "flushed codec=" + codec);
                }

                BufferedUpdates segmentDeletes;
                if (pendingUpdates.queries.Count == 0 && pendingUpdates.numericUpdates.Count == 0 && pendingUpdates.binaryUpdates.Count == 0)
                {
                    pendingUpdates.Clear();
                    segmentDeletes = null;
                }
                else
                {
                    segmentDeletes = pendingUpdates;
                }

                if (infoStream.IsEnabled("DWPT"))
                {
                    double newSegmentSize = segmentInfoPerCommit.GetSizeInBytes() / 1024.0 / 1024.0;
                    infoStream.Message("DWPT", "flushed: segment=" + segmentInfo.Name + " ramUsed=" + startMBUsed.ToString(nf) + " MB" + " newFlushedSize(includes docstores)=" + newSegmentSize.ToString(nf) + " MB" + " docs/MB=" + (flushState.SegmentInfo.DocCount / newSegmentSize).ToString(nf));
                }

                Debug.Assert(segmentInfo != null);

                FlushedSegment fs = new FlushedSegment(segmentInfoPerCommit, flushState.FieldInfos, segmentDeletes, flushState.LiveDocs, flushState.DelCountOnFlush);
                SealFlushedSegment(fs);
                success = true;

                return(fs);
            }
            finally
            {
                if (!success)
                {
                    Abort(filesToDelete);
                }
            }
        }
Beispiel #28
0
        /// <summary>Produce _X.nrm if any document had a field with norms
        /// not disabled
        /// </summary>
        public override void  Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state)
        {
            System.Collections.IDictionary byField = new System.Collections.Hashtable();

            // Typically, each thread will have encountered the same
            // field.  So first we collate by field, ie, all
            // per-thread field instances that correspond to the
            // same FieldInfo
            System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator();
            while (it.MoveNext())
            {
                System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry)it.Current;

                System.Collections.ICollection fields         = (System.Collections.ICollection)entry.Value;
                System.Collections.IEnumerator fieldsIt       = fields.GetEnumerator();
                System.Collections.ArrayList   fieldsToRemove = new System.Collections.ArrayList();

                while (fieldsIt.MoveNext())
                {
                    NormsWriterPerField perField = (NormsWriterPerField)((System.Collections.DictionaryEntry)fieldsIt.Current).Key;

                    if (perField.upto > 0)
                    {
                        // It has some norms
                        System.Collections.IList l = (System.Collections.IList)byField[perField.fieldInfo];
                        if (l == null)
                        {
                            l = new System.Collections.ArrayList();
                            byField[perField.fieldInfo] = l;
                        }
                        l.Add(perField);
                    }
                    // Remove this field since we haven't seen it
                    // since the previous flush
                    else
                    {
                        fieldsToRemove.Add(perField);
                    }
                }

                System.Collections.Hashtable fieldsHT = (System.Collections.Hashtable)fields;
                for (int i = 0; i < fieldsToRemove.Count; i++)
                {
                    fieldsHT.Remove(fieldsToRemove[i]);
                }
            }

            System.String normsFileName = state.segmentName + "." + IndexFileNames.NORMS_EXTENSION;
            state.flushedFiles[normsFileName] = normsFileName;
            IndexOutput normsOut = state.directory.CreateOutput(normsFileName);

            try
            {
                normsOut.WriteBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.Length);

                int numField = fieldInfos.Size();

                int normCount = 0;

                for (int fieldNumber = 0; fieldNumber < numField; fieldNumber++)
                {
                    FieldInfo fieldInfo = fieldInfos.FieldInfo(fieldNumber);

                    System.Collections.IList toMerge = (System.Collections.IList)byField[fieldInfo];
                    int upto = 0;
                    if (toMerge != null)
                    {
                        int numFields = toMerge.Count;

                        normCount++;

                        NormsWriterPerField[] fields = new NormsWriterPerField[numFields];
                        int[] uptos = new int[numFields];

                        for (int j = 0; j < numFields; j++)
                        {
                            fields[j] = (NormsWriterPerField)toMerge[j];
                        }

                        int numLeft = numFields;

                        while (numLeft > 0)
                        {
                            System.Diagnostics.Debug.Assert(uptos [0] < fields [0].docIDs.Length, " uptos[0]=" + uptos [0] + " len=" + (fields [0].docIDs.Length));

                            int minLoc   = 0;
                            int minDocID = fields[0].docIDs[uptos[0]];

                            for (int j = 1; j < numLeft; j++)
                            {
                                int docID = fields[j].docIDs[uptos[j]];
                                if (docID < minDocID)
                                {
                                    minDocID = docID;
                                    minLoc   = j;
                                }
                            }

                            System.Diagnostics.Debug.Assert(minDocID < state.numDocs);

                            // Fill hole
                            for (; upto < minDocID; upto++)
                            {
                                normsOut.WriteByte(defaultNorm);
                            }

                            normsOut.WriteByte(fields[minLoc].norms[uptos[minLoc]]);
                            (uptos[minLoc])++;
                            upto++;

                            if (uptos[minLoc] == fields[minLoc].upto)
                            {
                                fields[minLoc].Reset();
                                if (minLoc != numLeft - 1)
                                {
                                    fields[minLoc] = fields[numLeft - 1];
                                    uptos[minLoc]  = uptos[numLeft - 1];
                                }
                                numLeft--;
                            }
                        }

                        // Fill final hole with defaultNorm
                        for (; upto < state.numDocs; upto++)
                        {
                            normsOut.WriteByte(defaultNorm);
                        }
                    }
                    else if (fieldInfo.isIndexed && !fieldInfo.omitNorms)
                    {
                        normCount++;
                        // Fill entire field with default norm:
                        for (; upto < state.numDocs; upto++)
                        {
                            normsOut.WriteByte(defaultNorm);
                        }
                    }

                    System.Diagnostics.Debug.Assert(4 + normCount * state.numDocs == normsOut.GetFilePointer(), ".nrm file size mismatch: expected=" + (4 + normCount * state.numDocs) + " actual=" + normsOut.GetFilePointer());
                }
            }
            finally
            {
                normsOut.Close();
            }
        }
Beispiel #29
0
        /// <summary>
        /// Walk through all unique text tokens (Posting
        /// instances) found in this field and serialize them
        /// into a single RAM segment.
        /// </summary>
        internal void Flush(string fieldName, FieldsConsumer consumer, SegmentWriteState state)
        {
            if (!fieldInfo.IsIndexed)
            {
                return; // nothing to flush, don't bother the codec with the unindexed field
            }

            TermsConsumer        termsConsumer = consumer.AddField(fieldInfo);
            IComparer <BytesRef> termComp      = termsConsumer.Comparer;

            // CONFUSING: this.indexOptions holds the index options
            // that were current when we first saw this field.  But
            // it's possible this has changed, eg when other
            // documents are indexed that cause a "downgrade" of the
            // IndexOptions.  So we must decode the in-RAM buffer
            // according to this.indexOptions, but then write the
            // new segment to the directory according to
            // currentFieldIndexOptions:
            IndexOptions currentFieldIndexOptions = fieldInfo.IndexOptions;

            Debug.Assert(currentFieldIndexOptions != IndexOptions.NONE);

            bool writeTermFreq  = currentFieldIndexOptions.CompareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
            bool writePositions = currentFieldIndexOptions.CompareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
            bool writeOffsets   = currentFieldIndexOptions.CompareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;

            bool readTermFreq  = this.hasFreq;
            bool readPositions = this.hasProx;
            bool readOffsets   = this.hasOffsets;

            //System.out.println("flush readTF=" + readTermFreq + " readPos=" + readPositions + " readOffs=" + readOffsets);

            // Make sure FieldInfo.update is working correctly!:
            Debug.Assert(!writeTermFreq || readTermFreq);
            Debug.Assert(!writePositions || readPositions);
            Debug.Assert(!writeOffsets || readOffsets);

            Debug.Assert(!writeOffsets || writePositions);

            IDictionary <Term, int?> segDeletes;

            if (state.SegUpdates != null && state.SegUpdates.terms.Count > 0)
            {
                segDeletes = state.SegUpdates.terms;
            }
            else
            {
                segDeletes = null;
            }

            int[]    termIDs  = termsHashPerField.SortPostings(termComp);
            int      numTerms = termsHashPerField.bytesHash.Count;
            BytesRef text     = new BytesRef();
            FreqProxPostingsArray postings = (FreqProxPostingsArray)termsHashPerField.postingsArray;
            ByteSliceReader       freq     = new ByteSliceReader();
            ByteSliceReader       prox     = new ByteSliceReader();

            FixedBitSet visitedDocs      = new FixedBitSet(state.SegmentInfo.DocCount);
            long        sumTotalTermFreq = 0;
            long        sumDocFreq       = 0;

            Term protoTerm = new Term(fieldName);

            for (int i = 0; i < numTerms; i++)
            {
                int termID = termIDs[i];
                // Get BytesRef
                int textStart = postings.textStarts[termID];
                termsHashPerField.bytePool.SetBytesRef(text, textStart);

                termsHashPerField.InitReader(freq, termID, 0);
                if (readPositions || readOffsets)
                {
                    termsHashPerField.InitReader(prox, termID, 1);
                }

                // TODO: really TermsHashPerField should take over most
                // of this loop, including merge sort of terms from
                // multiple threads and interacting with the
                // TermsConsumer, only calling out to us (passing us the
                // DocsConsumer) to handle delivery of docs/positions

                PostingsConsumer postingsConsumer = termsConsumer.StartTerm(text);

                int?delDocLimit;
                if (segDeletes != null)
                {
                    protoTerm.Bytes = text;
                    int?docIDUpto;
                    segDeletes.TryGetValue(protoTerm, out docIDUpto);
                    if (docIDUpto != null)
                    {
                        delDocLimit = docIDUpto;
                    }
                    else
                    {
                        delDocLimit = 0;
                    }
                }
                else
                {
                    delDocLimit = 0;
                }

                // Now termStates has numToMerge FieldMergeStates
                // which all share the same term.  Now we must
                // interleave the docID streams.
                int  docFreq       = 0;
                long totalTermFreq = 0;
                int  docID         = 0;

                while (true)
                {
                    //System.out.println("  cycle");
                    int termFreq;
                    if (freq.Eof())
                    {
                        if (postings.lastDocCodes[termID] != -1)
                        {
                            // Return last doc
                            docID = postings.lastDocIDs[termID];
                            if (readTermFreq)
                            {
                                termFreq = postings.termFreqs[termID];
                            }
                            else
                            {
                                termFreq = -1;
                            }
                            postings.lastDocCodes[termID] = -1;
                        }
                        else
                        {
                            // EOF
                            break;
                        }
                    }
                    else
                    {
                        int code = freq.ReadVInt32();
                        if (!readTermFreq)
                        {
                            docID   += code;
                            termFreq = -1;
                        }
                        else
                        {
                            docID += (int)((uint)code >> 1);
                            if ((code & 1) != 0)
                            {
                                termFreq = 1;
                            }
                            else
                            {
                                termFreq = freq.ReadVInt32();
                            }
                        }

                        Debug.Assert(docID != postings.lastDocIDs[termID]);
                    }

                    docFreq++;
                    Debug.Assert(docID < state.SegmentInfo.DocCount, "doc=" + docID + " maxDoc=" + state.SegmentInfo.DocCount);

                    // NOTE: we could check here if the docID was
                    // deleted, and skip it.  However, this is somewhat
                    // dangerous because it can yield non-deterministic
                    // behavior since we may see the docID before we see
                    // the term that caused it to be deleted.  this
                    // would mean some (but not all) of its postings may
                    // make it into the index, which'd alter the docFreq
                    // for those terms.  We could fix this by doing two
                    // passes, ie first sweep marks all del docs, and
                    // 2nd sweep does the real flush, but I suspect
                    // that'd add too much time to flush.
                    visitedDocs.Set(docID);
                    postingsConsumer.StartDoc(docID, writeTermFreq ? termFreq : -1);
                    if (docID < delDocLimit)
                    {
                        // Mark it deleted.  TODO: we could also skip
                        // writing its postings; this would be
                        // deterministic (just for this Term's docs).

                        // TODO: can we do this reach-around in a cleaner way????
                        if (state.LiveDocs == null)
                        {
                            state.LiveDocs = docState.docWriter.codec.LiveDocsFormat.NewLiveDocs(state.SegmentInfo.DocCount);
                        }
                        if (state.LiveDocs.Get(docID))
                        {
                            state.DelCountOnFlush++;
                            state.LiveDocs.Clear(docID);
                        }
                    }

                    totalTermFreq += termFreq;

                    // Carefully copy over the prox + payload info,
                    // changing the format to match Lucene's segment
                    // format.

                    if (readPositions || readOffsets)
                    {
                        // we did record positions (& maybe payload) and/or offsets
                        int position = 0;
                        int offset   = 0;
                        for (int j = 0; j < termFreq; j++)
                        {
                            BytesRef thisPayload;

                            if (readPositions)
                            {
                                int code = prox.ReadVInt32();
                                position += (int)((uint)code >> 1);

                                if ((code & 1) != 0)
                                {
                                    // this position has a payload
                                    int payloadLength = prox.ReadVInt32();

                                    if (payload == null)
                                    {
                                        payload       = new BytesRef();
                                        payload.Bytes = new byte[payloadLength];
                                    }
                                    else if (payload.Bytes.Length < payloadLength)
                                    {
                                        payload.Grow(payloadLength);
                                    }

                                    prox.ReadBytes(payload.Bytes, 0, payloadLength);
                                    payload.Length = payloadLength;
                                    thisPayload    = payload;
                                }
                                else
                                {
                                    thisPayload = null;
                                }

                                if (readOffsets)
                                {
                                    int startOffset = offset + prox.ReadVInt32();
                                    int endOffset   = startOffset + prox.ReadVInt32();
                                    if (writePositions)
                                    {
                                        if (writeOffsets)
                                        {
                                            Debug.Assert(startOffset >= 0 && endOffset >= startOffset, "startOffset=" + startOffset + ",endOffset=" + endOffset + ",offset=" + offset);
                                            postingsConsumer.AddPosition(position, thisPayload, startOffset, endOffset);
                                        }
                                        else
                                        {
                                            postingsConsumer.AddPosition(position, thisPayload, -1, -1);
                                        }
                                    }
                                    offset = startOffset;
                                }
                                else if (writePositions)
                                {
                                    postingsConsumer.AddPosition(position, thisPayload, -1, -1);
                                }
                            }
                        }
                    }
                    postingsConsumer.FinishDoc();
                }
                termsConsumer.FinishTerm(text, new TermStats(docFreq, writeTermFreq ? totalTermFreq : -1));
                sumTotalTermFreq += totalTermFreq;
                sumDocFreq       += docFreq;
            }

            termsConsumer.Finish(writeTermFreq ? sumTotalTermFreq : -1, sumDocFreq, visitedDocs.Cardinality());
        }
Beispiel #30
0
 public abstract void Flush(SegmentWriteState state);
        public override void Flush(IDictionary <DocFieldConsumerPerThread, ICollection <DocFieldConsumerPerField> > threadsAndFields, SegmentWriteState state)
        {
            var childThreadsAndFields    = new HashMap <InvertedDocConsumerPerThread, ICollection <InvertedDocConsumerPerField> >();
            var endChildThreadsAndFields = new HashMap <InvertedDocEndConsumerPerThread, ICollection <InvertedDocEndConsumerPerField> >();

            foreach (var entry in threadsAndFields)
            {
                var perThread = (DocInverterPerThread)entry.Key;

                ICollection <InvertedDocConsumerPerField>    childFields    = new HashSet <InvertedDocConsumerPerField>();
                ICollection <InvertedDocEndConsumerPerField> endChildFields = new HashSet <InvertedDocEndConsumerPerField>();
                foreach (DocFieldConsumerPerField field in entry.Value)
                {
                    var perField = (DocInverterPerField)field;
                    childFields.Add(perField.consumer);
                    endChildFields.Add(perField.endConsumer);
                }

                childThreadsAndFields[perThread.consumer]       = childFields;
                endChildThreadsAndFields[perThread.endConsumer] = endChildFields;
            }

            consumer.Flush(childThreadsAndFields, state);
            endConsumer.Flush(endChildThreadsAndFields, state);
        }
Beispiel #32
0
        public virtual void WriteFieldUpdates(Directory dir, DocValuesFieldUpdates.Container dvUpdates)
        {
            lock (this)
            {
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(Monitor.IsEntered(writer));
                }
                //System.out.println("rld.writeFieldUpdates: seg=" + info + " numericFieldUpdates=" + numericFieldUpdates);

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(dvUpdates.Any());
                }

                // Do this so we can delete any created files on
                // exception; this saves all codecs from having to do
                // it:
                TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir);

                FieldInfos fieldInfos = null;
                bool       success    = false;
                try
                {
                    Codec codec = Info.Info.Codec;

                    // reader could be null e.g. for a just merged segment (from
                    // IndexWriter.commitMergedDeletes).
                    SegmentReader reader = this.reader ?? new SegmentReader(Info, writer.Config.ReaderTermsIndexDivisor, IOContext.READ_ONCE);
                    try
                    {
                        // clone FieldInfos so that we can update their dvGen separately from
                        // the reader's infos and write them to a new fieldInfos_gen file
                        FieldInfos.Builder builder = new FieldInfos.Builder(writer.globalFieldNumberMap);
                        // cannot use builder.add(reader.getFieldInfos()) because it does not
                        // clone FI.attributes as well FI.dvGen
                        foreach (FieldInfo fi in reader.FieldInfos)
                        {
                            FieldInfo clone = builder.Add(fi);
                            // copy the stuff FieldInfos.Builder doesn't copy
                            if (fi.Attributes != null)
                            {
                                foreach (KeyValuePair <string, string> e in fi.Attributes)
                                {
                                    clone.PutAttribute(e.Key, e.Value);
                                }
                            }
                            clone.DocValuesGen = fi.DocValuesGen;
                        }
                        // create new fields or update existing ones to have NumericDV type
                        foreach (string f in dvUpdates.numericDVUpdates.Keys)
                        {
                            builder.AddOrUpdate(f, NumericDocValuesField.TYPE);
                        }
                        // create new fields or update existing ones to have BinaryDV type
                        foreach (string f in dvUpdates.binaryDVUpdates.Keys)
                        {
                            builder.AddOrUpdate(f, BinaryDocValuesField.TYPE);
                        }

                        fieldInfos = builder.Finish();
                        long              nextFieldInfosGen     = Info.NextFieldInfosGen;
                        string            segmentSuffix         = nextFieldInfosGen.ToString(CultureInfo.InvariantCulture);//Convert.ToString(nextFieldInfosGen, Character.MAX_RADIX));
                        SegmentWriteState state                 = new SegmentWriteState(null, trackingDir, Info.Info, fieldInfos, writer.Config.TermIndexInterval, null, IOContext.DEFAULT, segmentSuffix);
                        DocValuesFormat   docValuesFormat       = codec.DocValuesFormat;
                        DocValuesConsumer fieldsConsumer        = docValuesFormat.FieldsConsumer(state);
                        bool              fieldsConsumerSuccess = false;
                        try
                        {
                            //          System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeFieldUpdates: applying numeric updates; seg=" + info + " updates=" + numericFieldUpdates);
                            foreach (KeyValuePair <string, NumericDocValuesFieldUpdates> e in dvUpdates.numericDVUpdates)
                            {
                                string field = e.Key;
                                NumericDocValuesFieldUpdates fieldUpdates = e.Value;
                                FieldInfo fieldInfo = fieldInfos.FieldInfo(field);
                                if (Debugging.AssertsEnabled)
                                {
                                    Debugging.Assert(fieldInfo != null);
                                }

                                fieldInfo.DocValuesGen = nextFieldInfosGen;
                                // write the numeric updates to a new gen'd docvalues file
                                fieldsConsumer.AddNumericField(fieldInfo, GetInt64Enumerable(reader, field, fieldUpdates));
                            }

                            //        System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " updates=" + dvUpdates.binaryDVUpdates);
                            foreach (KeyValuePair <string, BinaryDocValuesFieldUpdates> e in dvUpdates.binaryDVUpdates)
                            {
                                string field = e.Key;
                                BinaryDocValuesFieldUpdates dvFieldUpdates = e.Value;
                                FieldInfo fieldInfo = fieldInfos.FieldInfo(field);
                                if (Debugging.AssertsEnabled)
                                {
                                    Debugging.Assert(fieldInfo != null);
                                }

                                //          System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " f=" + dvFieldUpdates + ", updates=" + dvFieldUpdates);

                                fieldInfo.DocValuesGen = nextFieldInfosGen;
                                // write the numeric updates to a new gen'd docvalues file
                                fieldsConsumer.AddBinaryField(fieldInfo, GetBytesRefEnumerable(reader, field, dvFieldUpdates));
                            }

                            codec.FieldInfosFormat.FieldInfosWriter.Write(trackingDir, Info.Info.Name, segmentSuffix, fieldInfos, IOContext.DEFAULT);
                            fieldsConsumerSuccess = true;
                        }
                        finally
                        {
                            if (fieldsConsumerSuccess)
                            {
                                fieldsConsumer.Dispose();
                            }
                            else
                            {
                                IOUtils.DisposeWhileHandlingException(fieldsConsumer);
                            }
                        }
                    }
                    finally
                    {
                        if (reader != this.reader)
                        {
                            //          System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeLiveDocs: closeReader " + reader);
                            reader.Dispose();
                        }
                    }

                    success = true;
                }
                finally
                {
                    if (!success)
                    {
                        // Advance only the nextWriteDocValuesGen so that a 2nd
                        // attempt to write will write to a new file
                        Info.AdvanceNextWriteFieldInfosGen();

                        // Delete any partially created file(s):
                        foreach (string fileName in trackingDir.CreatedFiles)
                        {
                            try
                            {
                                dir.DeleteFile(fileName);
                            }
                            catch (Exception t) when(t.IsThrowable())
                            {
                                // Ignore so we throw only the first exc
                            }
                        }
                    }
                }

                Info.AdvanceFieldInfosGen();
                // copy all the updates to mergingUpdates, so they can later be applied to the merged segment
                if (isMerging)
                {
                    foreach (KeyValuePair <string, NumericDocValuesFieldUpdates> e in dvUpdates.numericDVUpdates)
                    {
                        if (!mergingDVUpdates.TryGetValue(e.Key, out DocValuesFieldUpdates updates))
                        {
                            mergingDVUpdates[e.Key] = e.Value;
                        }
                        else
                        {
                            updates.Merge(e.Value);
                        }
                    }
                    foreach (KeyValuePair <string, BinaryDocValuesFieldUpdates> e in dvUpdates.binaryDVUpdates)
                    {
                        if (!mergingDVUpdates.TryGetValue(e.Key, out DocValuesFieldUpdates updates))
                        {
                            mergingDVUpdates[e.Key] = e.Value;
                        }
                        else
                        {
                            updates.Merge(e.Value);
                        }
                    }
                }

                // create a new map, keeping only the gens that are in use
                IDictionary <long, ISet <string> > genUpdatesFiles    = Info.UpdatesFiles;
                IDictionary <long, ISet <string> > newGenUpdatesFiles = new Dictionary <long, ISet <string> >();
                long fieldInfosGen = Info.FieldInfosGen;
                foreach (FieldInfo fi in fieldInfos)
                {
                    long dvGen = fi.DocValuesGen;
                    if (dvGen != -1 && !newGenUpdatesFiles.ContainsKey(dvGen))
                    {
                        if (dvGen == fieldInfosGen)
                        {
                            newGenUpdatesFiles[fieldInfosGen] = trackingDir.CreatedFiles;
                        }
                        else
                        {
                            newGenUpdatesFiles[dvGen] = genUpdatesFiles[dvGen];
                        }
                    }
                }

                Info.SetGenUpdatesFiles(newGenUpdatesFiles);

                // wrote new files, should checkpoint()
                writer.Checkpoint();

                // if there is a reader open, reopen it to reflect the updates
                if (reader != null)
                {
                    SegmentReader newReader = new SegmentReader(Info, reader, liveDocs, Info.Info.DocCount - Info.DelCount - pendingDeleteCount);
                    bool          reopened  = false;
                    try
                    {
                        reader.DecRef();
                        reader   = newReader;
                        reopened = true;
                    }
                    finally
                    {
                        if (!reopened)
                        {
                            newReader.DecRef();
                        }
                    }
                }
            }
        }
 public abstract void  Flush(IDictionary <TermsHashConsumerPerThread, ICollection <TermsHashConsumerPerField> > threadsAndFields, SegmentWriteState state);
		internal override void  CloseDocStore(SegmentWriteState state)
		{
		}
Beispiel #35
0
 public abstract void Flush(SegmentWriteState state, DocValuesConsumer consumer);
Beispiel #36
0
        /// <summary>
        /// Merges the readers into the directory passed to the constructor </summary>
        /// <returns> The number of documents that were merged </returns>
        /// <exception cref="CorruptIndexException"> if the index is corrupt </exception>
        /// <exception cref="IOException"> if there is a low-level IO error </exception>
        public MergeState Merge()
        {
            if (!ShouldMerge())
            {
                throw new InvalidOperationException("Merge would result in 0 document segment");
            }
            // NOTE: it's important to add calls to
            // checkAbort.work(...) if you make any changes to this
            // method that will spend alot of time.  The frequency
            // of this check impacts how long
            // IndexWriter.close(false) takes to actually stop the
            // threads.
            MergeFieldInfos();
            SetMatchingSegmentReaders();
            long t0 = 0;
            if (MergeState.InfoStream.IsEnabled("SM"))
            {
                t0 = DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond;
            }
            int numMerged = MergeFields();
            if (MergeState.InfoStream.IsEnabled("SM"))
            {
                long t1 = DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond;
                MergeState.InfoStream.Message("SM", ((t1 - t0) / 1000000) + " msec to merge stored fields [" + numMerged + " docs]");
            }
            Debug.Assert(numMerged == MergeState.SegmentInfo.DocCount);

            SegmentWriteState segmentWriteState = new SegmentWriteState(MergeState.InfoStream, Directory, MergeState.SegmentInfo, MergeState.FieldInfos, TermIndexInterval, null, Context);
            if (MergeState.InfoStream.IsEnabled("SM"))
            {
                t0 = DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond;
            }
            MergeTerms(segmentWriteState);
            if (MergeState.InfoStream.IsEnabled("SM"))
            {
                long t1 = DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond;
                MergeState.InfoStream.Message("SM", ((t1 - t0) / 1000000) + " msec to merge postings [" + numMerged + " docs]");
            }

            if (MergeState.InfoStream.IsEnabled("SM"))
            {
                t0 = DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond;
            }
            if (MergeState.FieldInfos.HasDocValues())
            {
                MergeDocValues(segmentWriteState);
            }
            if (MergeState.InfoStream.IsEnabled("SM"))
            {
                long t1 = DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond;
                MergeState.InfoStream.Message("SM", ((t1 - t0) / 1000000) + " msec to merge doc values [" + numMerged + " docs]");
            }

            if (MergeState.FieldInfos.HasNorms())
            {
                if (MergeState.InfoStream.IsEnabled("SM"))
                {
                    t0 = DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond;
                }
                MergeNorms(segmentWriteState);
                if (MergeState.InfoStream.IsEnabled("SM"))
                {
                    long t1 = DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond;
                    MergeState.InfoStream.Message("SM", ((t1 - t0) / 1000000) + " msec to merge norms [" + numMerged + " docs]");
                }
            }

            if (MergeState.FieldInfos.HasVectors())
            {
                if (MergeState.InfoStream.IsEnabled("SM"))
                {
                    t0 = DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond;
                }
                numMerged = MergeVectors();
                if (MergeState.InfoStream.IsEnabled("SM"))
                {
                    long t1 = DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond;
                    MergeState.InfoStream.Message("SM", ((t1 - t0) / 1000000) + " msec to merge vectors [" + numMerged + " docs]");
                }
                Debug.Assert(numMerged == MergeState.SegmentInfo.DocCount);
            }

            // write the merged infos
            FieldInfosWriter fieldInfosWriter = Codec.FieldInfosFormat().FieldInfosWriter;
            fieldInfosWriter.Write(Directory, MergeState.SegmentInfo.Name, "", MergeState.FieldInfos, Context);

            return MergeState;
        }
Beispiel #37
0
        internal void  ShrinkFreePostings(System.Collections.IDictionary threadsAndFields, SegmentWriteState state)
        {
            System.Diagnostics.Debug.Assert(postingsFreeCount == postingsAllocCount, "Thread.currentThread().getName()" + ": postingsFreeCount=" + postingsFreeCount + " postingsAllocCount=" + postingsAllocCount + " consumer=" + consumer);

            int newSize = ArrayUtil.GetShrinkSize(postingsFreeList.Length, postingsAllocCount);

            if (newSize != postingsFreeList.Length)
            {
                RawPostingList[] newArray = new RawPostingList[newSize];
                Array.Copy(postingsFreeList, 0, newArray, 0, postingsFreeCount);
                postingsFreeList = newArray;
            }
        }
Beispiel #38
0
        internal override void  Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state)
        {
            lock (this)
            {
                System.Collections.IDictionary childThreadsAndFields = new System.Collections.Hashtable();
                System.Collections.IDictionary nextThreadsAndFields;

                if (nextTermsHash != null)
                {
                    nextThreadsAndFields = new System.Collections.Hashtable();
                }
                else
                {
                    nextThreadsAndFields = null;
                }

                System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator();
                while (it.MoveNext())
                {
                    System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry)it.Current;

                    TermsHashPerThread perThread = (TermsHashPerThread)entry.Key;

                    System.Collections.ICollection fields = (System.Collections.ICollection)entry.Value;

                    System.Collections.IEnumerator fieldsIt    = fields.GetEnumerator();
                    System.Collections.Hashtable   childFields = new System.Collections.Hashtable();
                    System.Collections.Hashtable   nextChildFields;

                    if (nextTermsHash != null)
                    {
                        nextChildFields = new System.Collections.Hashtable();
                    }
                    else
                    {
                        nextChildFields = null;
                    }

                    while (fieldsIt.MoveNext())
                    {
                        TermsHashPerField perField = (TermsHashPerField)((System.Collections.DictionaryEntry)fieldsIt.Current).Key;
                        childFields[perField.consumer] = perField.consumer;
                        if (nextTermsHash != null)
                        {
                            nextChildFields[perField.nextPerField] = perField.nextPerField;
                        }
                    }

                    childThreadsAndFields[perThread.consumer] = childFields;
                    if (nextTermsHash != null)
                    {
                        nextThreadsAndFields[perThread.nextPerThread] = nextChildFields;
                    }
                }

                consumer.Flush(childThreadsAndFields, state);

                ShrinkFreePostings(threadsAndFields, state);

                if (nextTermsHash != null)
                {
                    nextTermsHash.Flush(nextThreadsAndFields, state);
                }
            }
        }
Beispiel #39
0
 public abstract void  CloseDocStore(SegmentWriteState state);
 internal abstract void  CloseDocStore(SegmentWriteState state);
Beispiel #41
0
        // LUCENENE specific - original was internal, but FreqProxTermsWriter requires public (little point, since both are internal classes)
        public override void Flush(IDictionary <string, TermsHashConsumerPerField> fieldsToFlush, SegmentWriteState state)
        {
            if (writer != null)
            {
                int numDocs = state.SegmentInfo.DocCount;
                Debug.Assert(numDocs > 0);
                // At least one doc in this run had term vectors enabled
                try
                {
                    Fill(numDocs);
                    Debug.Assert(state.SegmentInfo != null);
                    writer.Finish(state.FieldInfos, numDocs);
                }
                finally
                {
                    IOUtils.Dispose(writer);
                    writer     = null;
                    lastDocID  = 0;
                    hasVectors = false;
                }
            }

            foreach (TermsHashConsumerPerField field in fieldsToFlush.Values)
            {
                TermVectorsConsumerPerField perField = (TermVectorsConsumerPerField)field;
                perField.termsHashPerField.Reset();
                perField.ShrinkHash();
            }
        }
Beispiel #42
0
        public override void Flush(IDictionary <string, TermsHashConsumerPerField> fieldsToFlush, SegmentWriteState state)
        {
            // Gather all FieldData's that have postings, across all
            // ThreadStates
            IList <FreqProxTermsWriterPerField> allFields = new List <FreqProxTermsWriterPerField>();

            foreach (TermsHashConsumerPerField f in fieldsToFlush.Values)
            {
                FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField)f;
                if (perField.termsHashPerField.bytesHash.Count > 0)
                {
                    allFields.Add(perField);
                }
            }

            int numAllFields = allFields.Count;

            // Sort by field name
            CollectionUtil.IntroSort(allFields);

            FieldsConsumer consumer = state.SegmentInfo.Codec.PostingsFormat.FieldsConsumer(state);

            bool success = false;

            try
            {
                TermsHash termsHash = null;

                /*
                 * Current writer chain:
                 * FieldsConsumer
                 * -> IMPL: FormatPostingsTermsDictWriter
                 *  -> TermsConsumer
                 *    -> IMPL: FormatPostingsTermsDictWriter.TermsWriter
                 *      -> DocsConsumer
                 *        -> IMPL: FormatPostingsDocsWriter
                 *          -> PositionsConsumer
                 *            -> IMPL: FormatPostingsPositionsWriter
                 */

                for (int fieldNumber = 0; fieldNumber < numAllFields; fieldNumber++)
                {
                    FieldInfo fieldInfo = allFields[fieldNumber].fieldInfo;

                    FreqProxTermsWriterPerField fieldWriter = allFields[fieldNumber];

                    // If this field has postings then add them to the
                    // segment
                    fieldWriter.Flush(fieldInfo.Name, consumer, state);

                    TermsHashPerField perField = fieldWriter.termsHashPerField;
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(termsHash == null || termsHash == perField.termsHash);
                    }
                    termsHash = perField.termsHash;
                    int numPostings = perField.bytesHash.Count;
                    perField.Reset();
                    perField.ShrinkHash(/* numPostings // LUCENENET: Not used */);
                    fieldWriter.Reset();
                }

                if (termsHash != null)
                {
                    termsHash.Reset();
                }
                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Dispose(consumer);
                }
                else
                {
                    IOUtils.DisposeWhileHandlingException(consumer);
                }
            }
        }
Beispiel #43
0
        internal override void Flush(IDictionary <string, InvertedDocConsumerPerField> fieldsToFlush, SegmentWriteState state)
        {
            IDictionary <string, TermsHashConsumerPerField>   childFields = new Dictionary <string, TermsHashConsumerPerField>();
            IDictionary <string, InvertedDocConsumerPerField> nextChildFields;

            if (nextTermsHash != null)
            {
                nextChildFields = new Dictionary <string, InvertedDocConsumerPerField>();
            }
            else
            {
                nextChildFields = null;
            }

            foreach (KeyValuePair <string, InvertedDocConsumerPerField> entry in fieldsToFlush)
            {
                TermsHashPerField perField = (TermsHashPerField)entry.Value;
                childFields[entry.Key] = perField.consumer;
                if (nextTermsHash != null)
                {
                    nextChildFields[entry.Key] = perField.nextPerField;
                }
            }

            consumer.Flush(childFields, state);

            if (nextTermsHash != null)
            {
                nextTermsHash.Flush(nextChildFields, state);
            }
        }
Beispiel #44
0
 internal override void  CloseDocStore(SegmentWriteState state)
 {
 }
Beispiel #45
0
 internal abstract void Flush(IDictionary <string, InvertedDocEndConsumerPerField> fieldsToFlush, SegmentWriteState state);
Beispiel #46
0
        // TODO: would be nice to factor out more of this, eg the
        // FreqProxFieldMergeState, and code to visit all Fields
        // under the same FieldInfo together, up into TermsHash*.
        // Other writers would presumably share alot of this...

        public override void Flush(Support.Dictionary <TermsHashConsumerPerThread, IList <TermsHashConsumerPerField> > threadsAndFields, SegmentWriteState state)
        {
            // Gather all FieldData's that have postings, across all
            // ThreadStates
            List <FreqProxTermsWriterPerField> allFields = new List <FreqProxTermsWriterPerField>();

            foreach (KeyValuePair <TermsHashConsumerPerThread, IList <TermsHashConsumerPerField> > entry in threadsAndFields)
            {
                IList <TermsHashConsumerPerField> fields = entry.Value;
                foreach (TermsHashConsumerPerField i in fields)
                {
                    FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField)i;
                    if (perField.termsHashPerField.numPostings > 0)
                    {
                        allFields.Add(perField);
                    }
                }
            }

            // Sort by field name
            allFields.Sort();
            int numAllFields = allFields.Count;

            // TODO: allow Lucene user to customize this consumer:
            FormatPostingsFieldsConsumer consumer = new FormatPostingsFieldsWriter(state, fieldInfos);

            /*
             * Current writer chain:
             * FormatPostingsFieldsConsumer
             * -> IMPL: FormatPostingsFieldsWriter
             * -> FormatPostingsTermsConsumer
             * -> IMPL: FormatPostingsTermsWriter
             * -> FormatPostingsDocConsumer
             * -> IMPL: FormatPostingsDocWriter
             * -> FormatPostingsPositionsConsumer
             * -> IMPL: FormatPostingsPositionsWriter
             */

            int start = 0;

            while (start < numAllFields)
            {
                FieldInfo     fieldInfo = ((FreqProxTermsWriterPerField)allFields[start]).fieldInfo;
                System.String fieldName = fieldInfo.name;

                int end = start + 1;
                while (end < numAllFields && ((FreqProxTermsWriterPerField)allFields[end]).fieldInfo.name.Equals(fieldName))
                {
                    end++;
                }

                FreqProxTermsWriterPerField[] fields = new FreqProxTermsWriterPerField[end - start];
                for (int i = start; i < end; i++)
                {
                    fields[i - start] = (FreqProxTermsWriterPerField)allFields[i];

                    // Aggregate the storePayload as seen by the same
                    // field across multiple threads
                    fieldInfo.storePayloads |= fields[i - start].hasPayloads;
                }

                // If this field has postings then add them to the
                // segment
                AppendPostings(fields, consumer);

                for (int i = 0; i < fields.Length; i++)
                {
                    TermsHashPerField perField = fields[i].termsHashPerField;
                    int numPostings            = perField.numPostings;
                    perField.Reset();
                    perField.ShrinkHash(numPostings);
                    fields[i].Reset();
                }

                start = end;
            }

            foreach (KeyValuePair <TermsHashConsumerPerThread, IList <TermsHashConsumerPerField> > entry in threadsAndFields)
            {
                FreqProxTermsWriterPerThread perThread = (FreqProxTermsWriterPerThread)entry.Key;
                perThread.termsHashPerThread.Reset(true);
            }

            consumer.Finish();
        }
 internal FormatPostingsTermsWriter(SegmentWriteState state, FormatPostingsFieldsWriter parent) : base()
 {
     this.parent = parent;
     termsOut    = parent.termsOut;
     docsWriter  = new FormatPostingsDocsWriter(state, this);
 }
Beispiel #48
0
        private void MergeDocValues(SegmentWriteState segmentWriteState)
        {
            DocValuesConsumer consumer = codec.DocValuesFormat.FieldsConsumer(segmentWriteState);
            bool success = false;

            try
            {
                foreach (FieldInfo field in mergeState.FieldInfos)
                {
                    DocValuesType type = field.DocValuesType;
                    if (type != DocValuesType.NONE)
                    {
                        if (type == DocValuesType.NUMERIC)
                        {
                            IList <NumericDocValues> toMerge       = new List <NumericDocValues>();
                            IList <IBits>            docsWithField = new List <IBits>();
                            foreach (AtomicReader reader in mergeState.Readers)
                            {
                                NumericDocValues values = reader.GetNumericDocValues(field.Name);
                                IBits            bits   = reader.GetDocsWithField(field.Name);
                                if (values == null)
                                {
                                    values = DocValues.EMPTY_NUMERIC;
                                    bits   = new Lucene.Net.Util.Bits.MatchNoBits(reader.MaxDoc);
                                }
                                toMerge.Add(values);
                                docsWithField.Add(bits);
                            }
                            consumer.MergeNumericField(field, mergeState, toMerge, docsWithField);
                        }
                        else if (type == DocValuesType.BINARY)
                        {
                            IList <BinaryDocValues> toMerge       = new List <BinaryDocValues>();
                            IList <IBits>           docsWithField = new List <IBits>();
                            foreach (AtomicReader reader in mergeState.Readers)
                            {
                                BinaryDocValues values = reader.GetBinaryDocValues(field.Name);
                                IBits           bits   = reader.GetDocsWithField(field.Name);
                                if (values == null)
                                {
                                    values = DocValues.EMPTY_BINARY;
                                    bits   = new Lucene.Net.Util.Bits.MatchNoBits(reader.MaxDoc);
                                }
                                toMerge.Add(values);
                                docsWithField.Add(bits);
                            }
                            consumer.MergeBinaryField(field, mergeState, toMerge, docsWithField);
                        }
                        else if (type == DocValuesType.SORTED)
                        {
                            IList <SortedDocValues> toMerge = new List <SortedDocValues>();
                            foreach (AtomicReader reader in mergeState.Readers)
                            {
                                SortedDocValues values = reader.GetSortedDocValues(field.Name);
                                if (values == null)
                                {
                                    values = DocValues.EMPTY_SORTED;
                                }
                                toMerge.Add(values);
                            }
                            consumer.MergeSortedField(field, mergeState, toMerge);
                        }
                        else if (type == DocValuesType.SORTED_SET)
                        {
                            IList <SortedSetDocValues> toMerge = new List <SortedSetDocValues>();
                            foreach (AtomicReader reader in mergeState.Readers)
                            {
                                SortedSetDocValues values = reader.GetSortedSetDocValues(field.Name);
                                if (values == null)
                                {
                                    values = DocValues.EMPTY_SORTED_SET;
                                }
                                toMerge.Add(values);
                            }
                            consumer.MergeSortedSetField(field, mergeState, toMerge);
                        }
                        else
                        {
                            throw new InvalidOperationException("type=" + type);
                        }
                    }
                }
                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Dispose(consumer);
                }
                else
                {
                    IOUtils.DisposeWhileHandlingException(consumer);
                }
            }
        }
 public abstract void Flush(IDictionary <string, TermsHashConsumerPerField> fieldsToFlush, SegmentWriteState state);
Beispiel #50
0
 private void MergeNorms(SegmentWriteState segmentWriteState)
 {
     DocValuesConsumer consumer = Codec.NormsFormat().NormsConsumer(segmentWriteState);
     bool success = false;
     try
     {
         foreach (FieldInfo field in MergeState.FieldInfos)
         {
             if (field.HasNorms())
             {
                 IList<NumericDocValues> toMerge = new List<NumericDocValues>();
                 //IList<Bits> docsWithField = new List<Bits>();
                 foreach (AtomicReader reader in MergeState.Readers)
                 {
                     NumericDocValues norms = reader.GetNormValues(field.Name);
                     if (norms == null)
                     {
                         norms = DocValues.EMPTY_NUMERIC;
                     }
                     toMerge.Add(norms);
                     //docsWithField.Add(new Lucene.Net.Util.Bits_MatchAllBits(reader.MaxDoc));
                 }
                 consumer.MergeNumericField(field, MergeState, toMerge/*, docsWithField*/);
             }
         }
         success = true;
     }
     finally
     {
         if (success)
         {
             IOUtils.Close(consumer);
         }
         else
         {
             IOUtils.CloseWhileHandlingException(consumer);
         }
     }
 }
Beispiel #51
0
 public override void  CloseDocStore(SegmentWriteState state)
 {
     consumer.CloseDocStore(state);
     fieldsWriter.CloseDocStore(state);
 }
Beispiel #52
0
        internal override void Flush(IDictionary <string, InvertedDocEndConsumerPerField> fieldsToFlush, SegmentWriteState state)
        {
            bool success = false;
            DocValuesConsumer normsConsumer = null;

            try
            {
                if (state.FieldInfos.HasNorms)
                {
                    NormsFormat normsFormat = state.SegmentInfo.Codec.NormsFormat;
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(normsFormat != null);
                    }
                    normsConsumer = normsFormat.NormsConsumer(state);

                    foreach (FieldInfo fi in state.FieldInfos)
                    {
                        NormsConsumerPerField toWrite = (NormsConsumerPerField)fieldsToFlush[fi.Name];
                        // we must check the final value of omitNorms for the fieldinfo, it could have
                        // changed for this field since the first time we added it.
                        if (!fi.OmitsNorms)
                        {
                            if (toWrite != null && !toWrite.IsEmpty)
                            {
                                toWrite.Flush(state, normsConsumer);
                                if (Debugging.AssertsEnabled)
                                {
                                    Debugging.Assert(fi.NormType == DocValuesType.NUMERIC);
                                }
                            }
                            else if (fi.IsIndexed)
                            {
                                if (Debugging.AssertsEnabled)
                                {
                                    Debugging.Assert(fi.NormType == DocValuesType.NONE, "got {0}; field={1}", fi.NormType, fi.Name);
                                }
                            }
                        }
                    }
                }
                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Dispose(normsConsumer);
                }
                else
                {
                    IOUtils.DisposeWhileHandlingException(normsConsumer);
                }
            }
        }
Beispiel #53
0
        public override void Flush(IDictionary <DocConsumerPerThread, DocConsumerPerThread> threads, SegmentWriteState state)
        {
            Support.Dictionary <DocFieldConsumerPerThread, IList <DocFieldConsumerPerField> > childThreadsAndFields = new Support.Dictionary <DocFieldConsumerPerThread, IList <DocFieldConsumerPerField> >();
            foreach (DocFieldProcessorPerThread perThread in threads.Keys)
            {
                childThreadsAndFields[perThread.consumer] = perThread.Fields();
                perThread.TrimFields(state);
            }
            fieldsWriter.Flush(state);
            consumer.Flush(childThreadsAndFields, state);

            // Important to save after asking consumer to flush so
            // consumer can alter the FieldInfo* if necessary.  EG,
            // FreqProxTermsWriter does this with
            // FieldInfo.storePayload.
            System.String fileName = state.SegmentFileName(IndexFileNames.FIELD_INFOS_EXTENSION);
            fieldInfos.Write(state.directory, fileName);
            state.flushedFiles.Add(fileName);
        }
Beispiel #54
0
 public abstract void  Flush(System.Collections.ICollection threads, SegmentWriteState state);
Beispiel #55
0
        private void MergeTerms(SegmentWriteState segmentWriteState)
        {
            IList<Fields> fields = new List<Fields>();
            IList<ReaderSlice> slices = new List<ReaderSlice>();

            int docBase = 0;

            for (int readerIndex = 0; readerIndex < MergeState.Readers.Count; readerIndex++)
            {
                AtomicReader reader = MergeState.Readers[readerIndex];
                Fields f = reader.Fields;
                int maxDoc = reader.MaxDoc;
                if (f != null)
                {
                    slices.Add(new ReaderSlice(docBase, maxDoc, readerIndex));
                    fields.Add(f);
                }
                docBase += maxDoc;
            }

            FieldsConsumer consumer = Codec.PostingsFormat().FieldsConsumer(segmentWriteState);
            bool success = false;
            try
            {
                consumer.Merge(MergeState, new MultiFields(fields.ToArray(/*Fields.EMPTY_ARRAY*/), slices.ToArray(/*ReaderSlice.EMPTY_ARRAY*/)));
                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Close(consumer);
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(consumer);
                }
            }
        }
Beispiel #56
0
        internal void  ShrinkFreePostings(System.Collections.IDictionary threadsAndFields, SegmentWriteState state)
        {
            System.Diagnostics.Debug.Assert(postingsFreeCount == postingsAllocCount, "Thread.currentThread().getName()" + ": postingsFreeCount=" + postingsFreeCount + " postingsAllocCount=" + postingsAllocCount + " consumer=" + consumer);

            int newSize = 1;

            if (newSize != postingsFreeList.Length)
            {
                if (postingsFreeCount > newSize)
                {
                    if (trackAllocations)
                    {
                        docWriter.BytesAllocated(-(postingsFreeCount - newSize) * bytesPerPosting);
                    }
                    postingsFreeCount  = newSize;
                    postingsAllocCount = newSize;
                }

                RawPostingList[] newArray = new RawPostingList[newSize];
                Array.Copy(postingsFreeList, 0, newArray, 0, postingsFreeCount);
                postingsFreeList = newArray;
            }
        }
		/// <summary>If there are fields we've seen but did not see again
		/// in the last run, then free them up. 
		/// </summary>
		
		internal void  TrimFields(SegmentWriteState state)
		{
			
			for (int i = 0; i < fieldHash.Length; i++)
			{
				DocFieldProcessorPerField perField = fieldHash[i];
				DocFieldProcessorPerField lastPerField = null;
				
				while (perField != null)
				{
					
					if (perField.lastGen == - 1)
					{
						
						// This field was not seen since the previous
						// flush, so, free up its resources now
						
						// Unhash
						if (lastPerField == null)
							fieldHash[i] = perField.next;
						else
							lastPerField.next = perField.next;
						
						if (state.docWriter.infoStream != null)
							state.docWriter.infoStream.WriteLine("  purge field=" + perField.fieldInfo.name);
						
						totalFieldCount--;
					}
					else
					{
						// Reset
						perField.lastGen = - 1;
						lastPerField = perField;
					}
					
					perField = perField.next;
				}
			}
		}
Beispiel #58
0
 public abstract void  Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state);
		// TODO: would be nice to factor out more of this, eg the
		// FreqProxFieldMergeState, and code to visit all Fields
		// under the same FieldInfo together, up into TermsHash*.
		// Other writers would presumably share alot of this...
        public override void Flush(IDictionary<TermsHashConsumerPerThread, ICollection<TermsHashConsumerPerField>> threadsAndFields, SegmentWriteState state)
		{
			
			// Gather all FieldData's that have postings, across all
			// ThreadStates
			var allFields = new List<FreqProxTermsWriterPerField>();

            foreach(var entry in threadsAndFields)
			{
				var fields = entry.Value;
				
				foreach(var i in fields)
				{
					FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField)i;
					if (perField.termsHashPerField.numPostings > 0)
						allFields.Add(perField);
				}
			}
			
			// Sort by field name
            allFields.Sort();
			int numAllFields = allFields.Count;
			
			// TODO: allow Lucene user to customize this consumer:
			FormatPostingsFieldsConsumer consumer = new FormatPostingsFieldsWriter(state, fieldInfos);
			/*
			Current writer chain:
			FormatPostingsFieldsConsumer
			-> IMPL: FormatPostingsFieldsWriter
			-> FormatPostingsTermsConsumer
			-> IMPL: FormatPostingsTermsWriter
			-> FormatPostingsDocConsumer
			-> IMPL: FormatPostingsDocWriter
			-> FormatPostingsPositionsConsumer
			-> IMPL: FormatPostingsPositionsWriter
			*/
			
			int start = 0;
			while (start < numAllFields)
			{
				FieldInfo fieldInfo = allFields[start].fieldInfo;
				System.String fieldName = fieldInfo.name;
				
				int end = start + 1;
				while (end < numAllFields && allFields[end].fieldInfo.name.Equals(fieldName))
					end++;
				
				FreqProxTermsWriterPerField[] fields = new FreqProxTermsWriterPerField[end - start];
				for (int i = start; i < end; i++)
				{
					fields[i - start] = allFields[i];
					
					// Aggregate the storePayload as seen by the same
					// field across multiple threads
					fieldInfo.storePayloads |= fields[i - start].hasPayloads;
				}
				
				// If this field has postings then add them to the
				// segment
				AppendPostings(fields, consumer);
				
				for (int i = 0; i < fields.Length; i++)
				{
					TermsHashPerField perField = fields[i].termsHashPerField;
					int numPostings = perField.numPostings;
					perField.Reset();
					perField.ShrinkHash(numPostings);
					fields[i].Reset();
				}
				
				start = end;
			}

            foreach(var entry in threadsAndFields)
			{
				FreqProxTermsWriterPerThread perThread = (FreqProxTermsWriterPerThread) entry.Key;
				perThread.termsHashPerThread.Reset(true);
			}
			
			consumer.Finish();
		}
Beispiel #60
0
        internal override void Flush(IDictionary <string, DocFieldConsumerPerField> fieldsToFlush, SegmentWriteState state)
        {
            IDictionary <string, InvertedDocConsumerPerField>    childFieldsToFlush    = new Dictionary <string, InvertedDocConsumerPerField>();
            IDictionary <string, InvertedDocEndConsumerPerField> endChildFieldsToFlush = new Dictionary <string, InvertedDocEndConsumerPerField>();

            foreach (KeyValuePair <string, DocFieldConsumerPerField> fieldToFlush in fieldsToFlush)
            {
                DocInverterPerField perField = (DocInverterPerField)fieldToFlush.Value;
                childFieldsToFlush[fieldToFlush.Key]    = perField.consumer;
                endChildFieldsToFlush[fieldToFlush.Key] = perField.endConsumer;
            }

            consumer.Flush(childFieldsToFlush, state);
            endConsumer.Flush(endChildFieldsToFlush, state);
        }