internal DefaultSkipListWriter(int skipInterval, int numberOfSkipLevels, int docCount, IndexOutput freqOutput, IndexOutput proxOutput):base(skipInterval, numberOfSkipLevels, docCount)
		{
			this.freqOutput = freqOutput;
			this.proxOutput = proxOutput;
			
			lastSkipDoc = new int[numberOfSkipLevels];
			lastSkipPayloadLength = new int[numberOfSkipLevels];
			lastSkipFreqPointer = new long[numberOfSkipLevels];
			lastSkipProxPointer = new long[numberOfSkipLevels];
		}
		protected internal override void  WriteSkipData(int level, IndexOutput skipBuffer)
		{
			// To efficiently store payloads in the posting lists we do not store the length of
			// every payload. Instead we omit the length for a payload if the previous payload had
			// the same length.
			// However, in order to support skipping the payload length at every skip point must be known.
			// So we use the same length encoding that we use for the posting lists for the skip data as well:
			// Case 1: current field does not store payloads
			//           SkipDatum                 --> DocSkip, FreqSkip, ProxSkip
			//           DocSkip,FreqSkip,ProxSkip --> VInt
			//           DocSkip records the document number before every SkipInterval th  document in TermFreqs. 
			//           Document numbers are represented as differences from the previous value in the sequence.
			// Case 2: current field stores payloads
			//           SkipDatum                 --> DocSkip, PayloadLength?, FreqSkip,ProxSkip
			//           DocSkip,FreqSkip,ProxSkip --> VInt
			//           PayloadLength             --> VInt    
			//         In this case DocSkip/2 is the difference between
			//         the current and the previous value. If DocSkip
			//         is odd, then a PayloadLength encoded as VInt follows,
			//         if DocSkip is even, then it is assumed that the
			//         current payload length equals the length at the previous
			//         skip point
			if (curStorePayloads)
			{
				int delta = curDoc - lastSkipDoc[level];
				if (curPayloadLength == lastSkipPayloadLength[level])
				{
					// the current payload length equals the length at the previous skip point,
					// so we don't store the length again
					skipBuffer.WriteVInt(delta * 2);
				}
				else
				{
					// the payload length is different from the previous one. We shift the DocSkip, 
					// set the lowest bit and store the current payload length as VInt.
					skipBuffer.WriteVInt(delta * 2 + 1);
					skipBuffer.WriteVInt(curPayloadLength);
					lastSkipPayloadLength[level] = curPayloadLength;
				}
			}
			else
			{
				// current field does not store payloads
				skipBuffer.WriteVInt(curDoc - lastSkipDoc[level]);
			}
			skipBuffer.WriteVInt((int) (curFreqPointer - lastSkipFreqPointer[level]));
			skipBuffer.WriteVInt((int) (curProxPointer - lastSkipProxPointer[level]));
			
			lastSkipDoc[level] = curDoc;
			//System.out.println("write doc at level " + level + ": " + curDoc);
			
			lastSkipFreqPointer[level] = curFreqPointer;
			lastSkipProxPointer[level] = curProxPointer;
		}
예제 #3
0
		public TermVectorsWriter(Directory directory, System.String segment, FieldInfos fieldInfos)
		{
			// Open files for TermVector storage
			tvx = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);
			tvx.WriteInt(TermVectorsReader.FORMAT_CURRENT);
			tvd = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION);
			tvd.WriteInt(TermVectorsReader.FORMAT_CURRENT);
			tvf = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION);
			tvf.WriteInt(TermVectorsReader.FORMAT_CURRENT);
			
			this.fieldInfos = fieldInfos;
		}
		internal FormatPostingsDocsWriter(SegmentWriteState state, FormatPostingsTermsWriter parent):base()
		{
			this.parent = parent;
			System.String fileName = IndexFileNames.SegmentFileName(parent.parent.segment, IndexFileNames.FREQ_EXTENSION);
			state.flushedFiles.Add(fileName);
			out_Renamed = parent.parent.dir.CreateOutput(fileName);
			totalNumDocs = parent.parent.totalNumDocs;
			
			// TODO: abstraction violation
			skipInterval = parent.parent.termsOut.skipInterval;
			skipListWriter = parent.parent.skipListWriter;
			skipListWriter.SetFreqOutput(out_Renamed);
			
			posWriter = new FormatPostingsPositionsWriter(state, this);
		}
		internal FormatPostingsPositionsWriter(SegmentWriteState state, FormatPostingsDocsWriter parent)
		{
			this.parent = parent;
			omitTermFreqAndPositions = parent.omitTermFreqAndPositions;
			if (parent.parent.parent.fieldInfos.HasProx())
			{
				// At least one field does not omit TF, so create the
				// prox file
				System.String fileName = IndexFileNames.SegmentFileName(parent.parent.parent.segment, IndexFileNames.PROX_EXTENSION);
				state.flushedFiles.Add(fileName);
				out_Renamed = parent.parent.parent.dir.CreateOutput(fileName);
				parent.skipListWriter.SetProxOutput(out_Renamed);
			}
			// Every field omits TF so we will write no prox file
			else
				out_Renamed = null;
		}
예제 #6
0
		/// <summary>Copy the current contents of this buffer to the named output. </summary>
		public virtual void  WriteTo(IndexOutput out_Renamed)
		{
			Flush();
			long end = file.length;
			long pos = 0;
			int buffer = 0;
			while (pos < end)
			{
				int length = BUFFER_SIZE;
				long nextPos = pos + length;
				if (nextPos > end)
				{
					// at the last buffer
					length = (int) (end - pos);
				}
				out_Renamed.WriteBytes(file.GetBuffer(buffer++), length);
				pos = nextPos;
			}
		}
예제 #7
0
		public long WriteTo(IndexOutput @out)
		{
			long size = 0;
			while (true)
			{
				if (limit + bufferOffset == endIndex)
				{
					System.Diagnostics.Debug.Assert(endIndex - bufferOffset >= upto);
					@out.WriteBytes(buffer, upto, limit - upto);
					size += limit - upto;
					break;
				}
				else
				{
					@out.WriteBytes(buffer, upto, limit - upto);
					size += limit - upto;
					NextSlice();
				}
			}
			
			return size;
		}
		internal virtual void  SetFreqOutput(IndexOutput freqOutput)
		{
			this.freqOutput = freqOutput;
		}
		/// <summary> Writes the buffered skip lists to the given output.
		/// 
		/// </summary>
		/// <param name="output">the IndexOutput the skip lists shall be written to 
		/// </param>
		/// <returns> the pointer the skip list starts
		/// </returns>
		internal virtual long WriteSkip(IndexOutput output)
		{
			long skipPointer = output.FilePointer;
			if (skipBuffer == null || skipBuffer.Length == 0)
				return skipPointer;
			
			for (int level = numberOfSkipLevels - 1; level > 0; level--)
			{
				long length = skipBuffer[level].FilePointer;
				if (length > 0)
				{
					output.WriteVLong(length);
					skipBuffer[level].WriteTo(output);
				}
			}
			skipBuffer[0].WriteTo(output);
			
			return skipPointer;
		}
		/// <summary> Subclasses must implement the actual skip data encoding in this method.
		/// 
		/// </summary>
		/// <param name="level">the level skip data shall be writting for
		/// </param>
		/// <param name="skipBuffer">the skip buffer to write to
		/// </param>
		protected internal abstract void  WriteSkipData(int level, IndexOutput skipBuffer);
예제 #11
0
		private void  Initialize(Directory directory, System.String segment, FieldInfos fis, int interval, bool isi)
		{
			indexInterval = interval;
			fieldInfos = fis;
			isIndex = isi;
			output = directory.CreateOutput(segment + (isIndex?".tii":".tis"));
			output.WriteInt(FORMAT_CURRENT); // write format
			output.WriteLong(0); // leave space for size
			output.WriteInt(indexInterval); // write indexInterval
			output.WriteInt(skipInterval); // write skipInterval
			output.WriteInt(maxSkipLevels); // write maxSkipLevels
			System.Diagnostics.Debug.Assert(InitUTF16Results());
		}
		internal override void  CloseDocStore(SegmentWriteState state)
		{
			lock (this)
			{
				if (tvx != null)
				{
					// At least one doc in this run had term vectors
					// enabled
					Fill(state.numDocsInStore - docWriter.DocStoreOffset);
					tvx.Close();
					tvf.Close();
					tvd.Close();
					tvx = null;
					System.Diagnostics.Debug.Assert(state.docStoreSegmentName != null);
					System.String fileName = state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION;
					if (4 + ((long) state.numDocsInStore) * 16 != state.directory.FileLength(fileName))
						throw new System.SystemException("after flush: tvx size mismatch: " + state.numDocsInStore + " docs vs " + state.directory.FileLength(fileName) + " length in bytes of " + fileName + " file exists?=" + state.directory.FileExists(fileName));
					
					state.flushedFiles.Add(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);
                    state.flushedFiles.Add(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION);
					state.flushedFiles.Add(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION);
					
					docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);
					docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION);
					docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION);
					
					lastDocID = 0;
				}
			}
		}
예제 #13
0
		/// <summary>Copy the contents of the file with specified extension into the
		/// provided output stream. Use the provided buffer for moving data
		/// to reduce memory allocation.
		/// </summary>
		private void  CopyFile(FileEntry source, IndexOutput os, byte[] buffer)
		{
			IndexInput isRenamed = null;
			try
			{
				long startPtr = os.FilePointer;
				
				isRenamed = directory.OpenInput(source.file);
				long length = isRenamed.Length();
				long remainder = length;
				int chunk = buffer.Length;
				
				while (remainder > 0)
				{
					var len = (int) Math.Min(chunk, remainder);
					isRenamed.ReadBytes(buffer, 0, len, false);
					os.WriteBytes(buffer, len);
					remainder -= len;
					if (checkAbort != null)
					// Roughly every 2 MB we will check if
					// it's time to abort
						checkAbort.Work(80);
				}
				
				// Verify that remainder is 0
				if (remainder != 0)
					throw new System.IO.IOException("Non-zero remainder length after copying: " + remainder + " (id: " + source.file + ", length: " + length + ", buffer size: " + chunk + ")");
				
				// Verify that the output length diff is equal to original file
				long endPtr = os.FilePointer;
				long diff = endPtr - startPtr;
				if (diff != length)
					throw new System.IO.IOException("Difference in the output file offsets " + diff + " does not match the original file length " + length);
			}
			finally
			{
				if (isRenamed != null)
					isRenamed.Close();
			}
		}
		internal void  InitTermVectorsWriter()
		{
			lock (this)
			{
				if (tvx == null)
				{
					
					System.String docStoreSegment = docWriter.DocStoreSegment;
					
					if (docStoreSegment == null)
						return ;
					
					System.Diagnostics.Debug.Assert(docStoreSegment != null);
					
					// If we hit an exception while init'ing the term
					// vector output files, we must abort this segment
					// because those files will be in an unknown
					// state:
					tvx = docWriter.directory.CreateOutput(docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);
					tvd = docWriter.directory.CreateOutput(docStoreSegment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION);
					tvf = docWriter.directory.CreateOutput(docStoreSegment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION);
					
					tvx.WriteInt(TermVectorsReader.FORMAT_CURRENT);
					tvd.WriteInt(TermVectorsReader.FORMAT_CURRENT);
					tvf.WriteInt(TermVectorsReader.FORMAT_CURRENT);
					
					docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);
					docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION);
					docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION);
					
					lastDocID = 0;
				}
			}
		}
예제 #15
0
		/// <summary>Write as a d-gaps list </summary>
		private void  WriteDgaps(IndexOutput output)
		{
			output.WriteInt(- 1); // mark using d-gaps                         
			output.WriteInt(Size()); // write size
			output.WriteInt(Count()); // write count
			int last = 0;
			int n = Count();
			int m = bits.Length;
			for (int i = 0; i < m && n > 0; i++)
			{
				if (bits[i] != 0)
				{
					output.WriteVInt(i - last);
					output.WriteByte(bits[i]);
					last = i;
					n -= BYTE_COUNTS[bits[i] & 0xFF];
				}
			}
		}
예제 #16
0
		public void Dispose()
		{
            // Move to protected method if class becomes unsealed
			if (doClose)
			{
				try
				{
					if (fieldsStream != null)
					{
						try
						{
							fieldsStream.Close();
						}
						finally
						{
							fieldsStream = null;
						}
					}
				}
				catch (System.IO.IOException)
				{
					try
					{
						if (indexStream != null)
						{
							try
							{
								indexStream.Close();
							}
							finally
							{
								indexStream = null;
							}
						}
					}
					catch (System.IO.IOException)
					{
						// Ignore so we throw only first IOException hit
					}
					throw;
				}
				finally
				{
					if (indexStream != null)
					{
						try
						{
							indexStream.Close();
						}
						finally
						{
							indexStream = null;
						}
					}
				}
			}
		}
예제 #17
0
		internal void  SetFieldsStream(IndexOutput stream)
		{
			this.fieldsStream = stream;
		}
예제 #18
0
		internal FieldsWriter(IndexOutput fdx, IndexOutput fdt, FieldInfos fn)
		{
			fieldInfos = fn;
			fieldsStream = fdt;
			indexStream = fdx;
			doClose = false;
		}
예제 #19
0
		public void  Write(IndexOutput output)
		{
			output.WriteVInt(CURRENT_FORMAT);
			output.WriteVInt(Size());
			for (int i = 0; i < Size(); i++)
			{
				FieldInfo fi = FieldInfo(i);
				var bits = (byte) (0x0);
				if (fi.isIndexed)
					bits |= IS_INDEXED;
				if (fi.storeTermVector)
					bits |= STORE_TERMVECTOR;
				if (fi.storePositionWithTermVector)
					bits |= STORE_POSITIONS_WITH_TERMVECTOR;
				if (fi.storeOffsetWithTermVector)
					bits |= STORE_OFFSET_WITH_TERMVECTOR;
				if (fi.omitNorms)
					bits |= OMIT_NORMS;
				if (fi.storePayloads)
					bits |= STORE_PAYLOADS;
				if (fi.omitTermFreqAndPositions)
					bits |= OMIT_TERM_FREQ_AND_POSITIONS;
				
				output.WriteString(fi.name);
				output.WriteByte(bits);
			}
		}
예제 #20
0
		internal virtual void  SetProxOutput(IndexOutput proxOutput)
		{
			this.proxOutput = proxOutput;
		}
		public override void  Abort()
		{
			if (tvx != null)
			{
				try
				{
					tvx.Close();
				}
				catch (System.Exception)
				{
				}
				tvx = null;
			}
			if (tvd != null)
			{
				try
				{
					tvd.Close();
				}
				catch (System.Exception)
				{
				}
				tvd = null;
			}
			if (tvf != null)
			{
				try
				{
					tvf.Close();
				}
				catch (System.Exception)
				{
				}
				tvf = null;
			}
			lastDocID = 0;
		}
예제 #22
0
		internal FieldsWriter(Directory d, System.String segment, FieldInfos fn)
		{
			fieldInfos = fn;
			
			bool success = false;
			String fieldsName = segment + "." + IndexFileNames.FIELDS_EXTENSION;
			try
			{
				fieldsStream = d.CreateOutput(fieldsName);
				fieldsStream.WriteInt(FORMAT_CURRENT);
				success = true;
			}
			finally
			{
				if (!success)
				{
					try
					{
						Dispose();
					}
					catch (System.Exception)
					{
						// Suppress so we keep throwing the original exception
					}
					try
					{
						d.DeleteFile(fieldsName);
					}
					catch (System.Exception)
					{
						// Suppress so we keep throwing the original exception
					}
				}
			}
			
			success = false;
			String indexName = segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION;
			try
			{
				indexStream = d.CreateOutput(indexName);
				indexStream.WriteInt(FORMAT_CURRENT);
				success = true;
			}
			finally
			{
				if (!success)
				{
					try
					{
						Dispose();
					}
					catch (System.IO.IOException)
					{
					}
					try
					{
						d.DeleteFile(fieldsName);
					}
					catch (System.Exception)
					{
						// Suppress so we keep throwing the original exception
					}
					try
					{
						d.DeleteFile(indexName);
					}
					catch (System.Exception)
					{
						// Suppress so we keep throwing the original exception
					}
				}
			}
			
			doClose = true;
		}
예제 #23
0
		public ChecksumIndexOutput(IndexOutput main)
		{
			this.main = main;
			digest = new CRC32();
		}
예제 #24
0
		/// <summary> Save this segment's info.</summary>
		internal void  Write(IndexOutput output)
		{
			output.WriteString(name);
			output.WriteInt(docCount);
			output.WriteLong(delGen);
			output.WriteInt(docStoreOffset);
			if (docStoreOffset != - 1)
			{
				output.WriteString(docStoreSegment);
				output.WriteByte((byte) (docStoreIsCompoundFile?1:0));
			}
			
			output.WriteByte((byte) (hasSingleNormFile?1:0));
			if (normGen == null)
			{
				output.WriteInt(NO);
			}
			else
			{
				output.WriteInt(normGen.Length);
				for (int j = 0; j < normGen.Length; j++)
				{
					output.WriteLong(normGen[j]);
				}
			}
			output.WriteByte((byte) isCompoundFile);
			output.WriteInt(delCount);
			output.WriteByte((byte) (hasProx?1:0));
			output.WriteStringStringMap(diagnostics);
		}
예제 #25
0
		/// <summary>Write as a bit set </summary>
		private void  WriteBits(IndexOutput output)
		{
			output.WriteInt(Size()); // write size
			output.WriteInt(Count()); // write count
			output.WriteBytes(bits, bits.Length);
		}