private void WriteHeader(int docBase, int numBufferedDocs, int[] numStoredFields, int[] lengths) { // save docBase and numBufferedDocs fieldsStream.WriteVInt32(docBase); fieldsStream.WriteVInt32(numBufferedDocs); // save numStoredFields SaveInt32s(numStoredFields, numBufferedDocs, fieldsStream); // save lengths SaveInt32s(lengths, numBufferedDocs, fieldsStream); }
/// <summary> /// Sole constructor. </summary> public CompressingTermVectorsWriter(Directory directory, SegmentInfo si, string segmentSuffix, IOContext context, string formatName, CompressionMode compressionMode, int chunkSize) { Debug.Assert(directory != null); this.directory = directory; this.segment = si.Name; this.segmentSuffix = segmentSuffix; this.compressionMode = compressionMode; this.compressor = compressionMode.NewCompressor(); this.chunkSize = chunkSize; numDocs = 0; pendingDocs = new LinkedList <DocData>(); termSuffixes = new GrowableByteArrayDataOutput(ArrayUtil.Oversize(chunkSize, 1)); payloadBytes = new GrowableByteArrayDataOutput(ArrayUtil.Oversize(1, 1)); lastTerm = new BytesRef(ArrayUtil.Oversize(30, 1)); bool success = false; IndexOutput indexStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, segmentSuffix, VECTORS_INDEX_EXTENSION), context); try { vectorsStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, segmentSuffix, VECTORS_EXTENSION), context); string codecNameIdx = formatName + CODEC_SFX_IDX; string codecNameDat = formatName + CODEC_SFX_DAT; CodecUtil.WriteHeader(indexStream, codecNameIdx, VERSION_CURRENT); CodecUtil.WriteHeader(vectorsStream, codecNameDat, VERSION_CURRENT); Debug.Assert(CodecUtil.HeaderLength(codecNameDat) == vectorsStream.GetFilePointer()); Debug.Assert(CodecUtil.HeaderLength(codecNameIdx) == indexStream.GetFilePointer()); indexWriter = new CompressingStoredFieldsIndexWriter(indexStream); indexStream = null; vectorsStream.WriteVInt32(PackedInt32s.VERSION_CURRENT); vectorsStream.WriteVInt32(chunkSize); writer = new BlockPackedWriter(vectorsStream, BLOCK_SIZE); positionsBuf = new int[1024]; startOffsetsBuf = new int[1024]; lengthsBuf = new int[1024]; payloadLengthsBuf = new int[1024]; success = true; } finally { if (!success) { IOUtils.DisposeWhileHandlingException(indexStream); Abort(); } } }
private int numBufferedDocs; // docBase + numBufferedDocs == current doc ID /// <summary> /// Sole constructor. </summary> public CompressingStoredFieldsWriter(Directory directory, SegmentInfo si, string segmentSuffix, IOContext context, string formatName, CompressionMode compressionMode, int chunkSize) { if (Debugging.AssertsEnabled) { Debugging.Assert(directory != null); } this.directory = directory; this.segment = si.Name; this.segmentSuffix = segmentSuffix; this.compressionMode = compressionMode; this.compressor = compressionMode.NewCompressor(); this.chunkSize = chunkSize; this.docBase = 0; this.bufferedDocs = new GrowableByteArrayDataOutput(chunkSize); this.numStoredFields = new int[16]; this.endOffsets = new int[16]; this.numBufferedDocs = 0; bool success = false; IndexOutput indexStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, segmentSuffix, Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION), context); try { fieldsStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, segmentSuffix, Lucene40StoredFieldsWriter.FIELDS_EXTENSION), context); string codecNameIdx = formatName + CODEC_SFX_IDX; string codecNameDat = formatName + CODEC_SFX_DAT; CodecUtil.WriteHeader(indexStream, codecNameIdx, VERSION_CURRENT); CodecUtil.WriteHeader(fieldsStream, codecNameDat, VERSION_CURRENT); if (Debugging.AssertsEnabled) { Debugging.Assert(CodecUtil.HeaderLength(codecNameDat) == fieldsStream.Position); // LUCENENET specific: Renamed from getFilePointer() to match FileStream Debugging.Assert(CodecUtil.HeaderLength(codecNameIdx) == indexStream.Position); // LUCENENET specific: Renamed from getFilePointer() to match FileStream } indexWriter = new CompressingStoredFieldsIndexWriter(indexStream); indexStream = null; fieldsStream.WriteVInt32(chunkSize); fieldsStream.WriteVInt32(PackedInt32s.VERSION_CURRENT); success = true; } finally { if (!success) { IOUtils.DisposeWhileHandlingException(indexStream); Abort(); } } }
protected FixedInt32BlockIndexOutput(IndexOutput output, int fixedBlockSize) { blockSize = fixedBlockSize; this.m_output = output; output.WriteVInt32(blockSize); m_buffer = new int[blockSize]; }
public override void Init(IndexOutput termsOut) { _termsOut = termsOut; CodecUtil.WriteHeader(termsOut, CODEC, VERSION_CURRENT); termsOut.WriteVInt32(_pending.Length); // encode maxPositions in header _wrappedPostingsWriter.Init(termsOut); }
/// <summary> /// Write a block of data (<c>For</c> format). /// </summary> /// <param name="data"> The data to write. </param> /// <param name="encoded"> A buffer to use to encode data. </param> /// <param name="out"> The destination output. </param> /// <exception cref="IOException"> If there is a low-level I/O error. </exception> internal void WriteBlock(int[] data, byte[] encoded, IndexOutput @out) { if (IsAllEqual(data)) { @out.WriteByte((byte)(sbyte)ALL_VALUES_EQUAL); @out.WriteVInt32(data[0]); return; } int numBits = BitsRequired(data); if (Debugging.AssertsEnabled) { Debugging.Assert(numBits > 0 && numBits <= 32, numBits.ToString); } PackedInt32s.IEncoder encoder = encoders[numBits]; int iters = iterations[numBits]; if (Debugging.AssertsEnabled) { Debugging.Assert(iters * encoder.ByteValueCount >= Lucene41PostingsFormat.BLOCK_SIZE); } int encodedSize = encodedSizes[numBits]; if (Debugging.AssertsEnabled) { Debugging.Assert(iters * encoder.ByteBlockCount >= encodedSize); } @out.WriteByte((byte)numBits); encoder.Encode(data, 0, encoded, 0, iters); @out.WriteBytes(encoded, encodedSize); }
public override void Finish(long sumTotalTermFreq, long sumDocFreq, int docCount) { if (termCount > 0) { @out.WriteVInt32(termCount); @out.WriteVInt32(field.Number); if (field.IndexOptions != IndexOptions.DOCS_ONLY) { @out.WriteVInt64(sumTotalTermFreq); } @out.WriteVInt64(sumDocFreq); @out.WriteVInt32(docCount); FST <BytesRef> fst = builder.Finish(); fst.Save(@out); //System.out.println("finish field=" + field.name + " fp=" + out.getFilePointer()); } }
/// <summary> /// Add a new position & payload </summary> public override void AddPosition(int position, BytesRef payload, int startOffset, int endOffset) { // if (DEBUG) { // System.out.println("FPW.addPosition pos=" + position + " posBufferUpto=" + posBufferUpto + (fieldHasPayloads ? " payloadByteUpto=" + payloadByteUpto: "")); // } posDeltaBuffer[posBufferUpto] = position - lastPosition; if (fieldHasPayloads) { if (payload == null || payload.Length == 0) { // no payload payloadLengthBuffer[posBufferUpto] = 0; } else { payloadLengthBuffer[posBufferUpto] = payload.Length; if (payloadByteUpto + payload.Length > payloadBytes.Length) { payloadBytes = ArrayUtil.Grow(payloadBytes, payloadByteUpto + payload.Length); } Array.Copy(payload.Bytes, payload.Offset, payloadBytes, payloadByteUpto, payload.Length); payloadByteUpto += payload.Length; } } if (fieldHasOffsets) { Debug.Assert(startOffset >= lastStartOffset); Debug.Assert(endOffset >= startOffset); offsetStartDeltaBuffer[posBufferUpto] = startOffset - lastStartOffset; offsetLengthBuffer[posBufferUpto] = endOffset - startOffset; lastStartOffset = startOffset; } posBufferUpto++; lastPosition = position; if (posBufferUpto == Lucene41PostingsFormat.BLOCK_SIZE) { // if (DEBUG) { // System.out.println(" write pos bulk block @ fp=" + posOut.getFilePointer()); // } forUtil.WriteBlock(posDeltaBuffer, encoded, posOut); if (fieldHasPayloads) { forUtil.WriteBlock(payloadLengthBuffer, encoded, payOut); payOut.WriteVInt32(payloadByteUpto); payOut.WriteBytes(payloadBytes, 0, payloadByteUpto); payloadByteUpto = 0; } if (fieldHasOffsets) { forUtil.WriteBlock(offsetStartDeltaBuffer, encoded, payOut); forUtil.WriteBlock(offsetLengthBuffer, encoded, payOut); } posBufferUpto = 0; } }
internal CompressingStoredFieldsIndexWriter(IndexOutput indexOutput) { this.fieldsIndexOut = indexOutput; Reset(); totalDocs = 0; docBaseDeltas = new int[BLOCK_SIZE]; startPointerDeltas = new long[BLOCK_SIZE]; fieldsIndexOut.WriteVInt32(PackedInt32s.VERSION_CURRENT); }
/// <summary> /// Add a new position & payload </summary> public override void AddPosition(int position, BytesRef payload, int startOffset, int endOffset) { //if (DEBUG) System.out.println("SPW: addPos pos=" + position + " payload=" + (payload == null ? "null" : (payload.Length + " bytes")) + " proxFP=" + proxOut.getFilePointer()); Debug.Assert(IndexOptions.CompareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0, "invalid indexOptions: " + IndexOptions); Debug.Assert(ProxOut != null); int delta = position - LastPosition; Debug.Assert(delta >= 0, "position=" + position + " lastPosition=" + LastPosition); // not quite right (if pos=0 is repeated twice we don't catch it) LastPosition = position; int payloadLength = 0; if (StorePayloads) { payloadLength = payload == null ? 0 : payload.Length; if (payloadLength != LastPayloadLength) { LastPayloadLength = payloadLength; ProxOut.WriteVInt32((delta << 1) | 1); ProxOut.WriteVInt32(payloadLength); } else { ProxOut.WriteVInt32(delta << 1); } } else { ProxOut.WriteVInt32(delta); } if (StoreOffsets) { // don't use startOffset - lastEndOffset, because this creates lots of negative vints for synonyms, // and the numbers aren't that much smaller anyways. int offsetDelta = startOffset - LastOffset; int offsetLength = endOffset - startOffset; Debug.Assert(offsetDelta >= 0 && offsetLength >= 0, "startOffset=" + startOffset + ",lastOffset=" + LastOffset + ",endOffset=" + endOffset); if (offsetLength != LastOffsetLength) { ProxOut.WriteVInt32(offsetDelta << 1 | 1); ProxOut.WriteVInt32(offsetLength); } else { ProxOut.WriteVInt32(offsetDelta << 1); } LastOffset = startOffset; LastOffsetLength = offsetLength; } if (payloadLength > 0) { ProxOut.WriteBytes(payload.Bytes, payload.Offset, payloadLength); } }
protected override void Dispose(bool disposing) { if (disposing) { if (m_output != null) { try { long dirStart = m_output.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream int fieldCount = fields.Count; int nonNullFieldCount = 0; for (int i = 0; i < fieldCount; i++) { FSTFieldWriter field = fields[i]; if (field.fst != null) { nonNullFieldCount++; } } m_output.WriteVInt32(nonNullFieldCount); for (int i = 0; i < fieldCount; i++) { FSTFieldWriter field = fields[i]; if (field.fst != null) { m_output.WriteVInt32(field.fieldInfo.Number); m_output.WriteVInt64(field.indexStart); } } WriteTrailer(dirStart); CodecUtil.WriteFooter(m_output); } finally { m_output.Dispose(); m_output = null; } } } }
/// <summary> /// Adds a new << <paramref name="fieldNumber"/>, termBytes>, <see cref="TermInfo"/>> pair to the set. /// Term must be lexicographically greater than all previous Terms added. /// <see cref="TermInfo"/> pointers must be positive and greater than all previous. /// </summary> public void Add(int fieldNumber, BytesRef term, TermInfo ti) { if (Debugging.AssertsEnabled) { Debugging.Assert(CompareToLastTerm(fieldNumber, term) < 0 || (isIndex && term.Length == 0 && lastTerm.Length == 0), () => "Terms are out of order: field=" + FieldName(fieldInfos, fieldNumber) + " (number " + fieldNumber + ")" + " lastField=" + FieldName(fieldInfos, lastFieldNumber) + " (number " + lastFieldNumber + ")" + " text=" + term.Utf8ToString() + " lastText=" + lastTerm.Utf8ToString()); } if (Debugging.AssertsEnabled) { Debugging.Assert(ti.FreqPointer >= lastTi.FreqPointer, () => "freqPointer out of order (" + ti.FreqPointer + " < " + lastTi.FreqPointer + ")"); } if (Debugging.AssertsEnabled) { Debugging.Assert(ti.ProxPointer >= lastTi.ProxPointer, () => "proxPointer out of order (" + ti.ProxPointer + " < " + lastTi.ProxPointer + ")"); } if (!isIndex && size % indexInterval == 0) { other.Add(lastFieldNumber, lastTerm, lastTi); // add an index term } WriteTerm(fieldNumber, term); // write term output.WriteVInt32(ti.DocFreq); // write doc freq output.WriteVInt64(ti.FreqPointer - lastTi.FreqPointer); // write pointers output.WriteVInt64(ti.ProxPointer - lastTi.ProxPointer); if (ti.DocFreq >= skipInterval) { output.WriteVInt32(ti.SkipOffset); } if (isIndex) { output.WriteVInt64(other.output.GetFilePointer() - lastIndexPointer); lastIndexPointer = other.output.GetFilePointer(); // write pointer } lastFieldNumber = fieldNumber; lastTi.Set(ti); size++; }
public override void StartDoc(int docID, int termDocFreq) { // if (DEBUG) System.out.println("SPW: startDoc seg=" + segment + " docID=" + docID + " tf=" + termDocFreq + " freqOut.fp=" + freqOut.getFilePointer()); int delta = docID - lastDocID; if (docID < 0 || (df > 0 && delta <= 0)) { throw new CorruptIndexException("docs out of order (" + docID + " <= " + lastDocID + " ) (freqOut: " + freqOut + ")"); } if ((++df % skipInterval) == 0) { skipListWriter.SetSkipData(lastDocID, storePayloads, lastPayloadLength, storeOffsets, lastOffsetLength); skipListWriter.BufferSkip(df); } if (Debugging.AssertsEnabled) { Debugging.Assert(docID < totalNumDocs, "docID={0} totalNumDocs={1}", docID, totalNumDocs); } lastDocID = docID; if (indexOptions == IndexOptions.DOCS_ONLY) { freqOut.WriteVInt32(delta); } else if (1 == termDocFreq) { freqOut.WriteVInt32((delta << 1) | 1); } else { freqOut.WriteVInt32(delta << 1); freqOut.WriteVInt32(termDocFreq); } lastPosition = 0; lastOffset = 0; }
protected override void WriteSkipData(int level, IndexOutput skipBuffer) { int delta = curDoc - lastSkipDoc[level]; // if (DEBUG) { // System.out.println("writeSkipData level=" + level + " lastDoc=" + curDoc + " delta=" + delta + " curDocPointer=" + curDocPointer); // } skipBuffer.WriteVInt32(delta); lastSkipDoc[level] = curDoc; skipBuffer.WriteVInt32((int)(curDocPointer - lastSkipDocPointer[level])); lastSkipDocPointer[level] = curDocPointer; if (fieldHasPositions) { // if (DEBUG) { // System.out.println(" curPosPointer=" + curPosPointer + " curPosBufferUpto=" + curPosBufferUpto); // } skipBuffer.WriteVInt32((int)(curPosPointer - lastSkipPosPointer[level])); lastSkipPosPointer[level] = curPosPointer; skipBuffer.WriteVInt32(curPosBufferUpto); if (fieldHasPayloads) { skipBuffer.WriteVInt32(curPayloadByteUpto); } if (fieldHasOffsets || fieldHasPayloads) { skipBuffer.WriteVInt32((int)(curPayPointer - lastSkipPayPointer[level])); lastSkipPayPointer[level] = curPayPointer; } } }
/// <summary> /// Adds a new << <paramref name="fieldNumber"/>, termBytes>, <see cref="TermInfo"/>> pair to the set. /// Term must be lexicographically greater than all previous Terms added. /// <see cref="TermInfo"/> pointers must be positive and greater than all previous. /// </summary> public void Add(int fieldNumber, BytesRef term, TermInfo ti) { if (Debugging.AssertsEnabled) { Debugging.Assert(CompareToLastTerm(fieldNumber, term) < 0 || (isIndex && term.Length == 0 && lastTerm.Length == 0), "Terms are out of order: field={0} (number {1}) lastField={2} (number {3}) text={4} lastText={5}", FieldName(fieldInfos, fieldNumber), fieldNumber, FieldName(fieldInfos, lastFieldNumber), lastFieldNumber, // LUCENENET specific - use wrapper BytesRefFormatter struct to defer building the string unless string.Format() is called new BytesRefFormatter(term, BytesRefFormat.UTF8), new BytesRefFormatter(lastTerm, BytesRefFormat.UTF8)); Debugging.Assert(ti.FreqPointer >= lastTi.FreqPointer, "freqPointer out of order ({0} < {1})", ti.FreqPointer, lastTi.FreqPointer); Debugging.Assert(ti.ProxPointer >= lastTi.ProxPointer, "proxPointer out of order ({0} < {1})", ti.ProxPointer, lastTi.ProxPointer); } if (!isIndex && size % indexInterval == 0) { other.Add(lastFieldNumber, lastTerm, lastTi); // add an index term } WriteTerm(fieldNumber, term); // write term output.WriteVInt32(ti.DocFreq); // write doc freq output.WriteVInt64(ti.FreqPointer - lastTi.FreqPointer); // write pointers output.WriteVInt64(ti.ProxPointer - lastTi.ProxPointer); if (ti.DocFreq >= skipInterval) { output.WriteVInt32(ti.SkipOffset); } if (isIndex) { output.WriteVInt64(other.output.Position - lastIndexPointer); // LUCENENET specific: Renamed from getFilePointer() to match FileStream lastIndexPointer = other.output.Position; // write pointer // LUCENENET specific: Renamed from getFilePointer() to match FileStream } lastFieldNumber = fieldNumber; lastTi.Set(ti); size++; }
public override void StartDoc(int docID, int termDocFreq) { // if (DEBUG) System.out.println("SPW: startDoc seg=" + segment + " docID=" + docID + " tf=" + termDocFreq + " freqOut.fp=" + freqOut.getFilePointer()); int delta = docID - LastDocID; if (docID < 0 || (Df > 0 && delta <= 0)) { throw new CorruptIndexException("docs out of order (" + docID + " <= " + LastDocID + " ) (freqOut: " + FreqOut + ")"); } if ((++Df % SkipInterval) == 0) { SkipListWriter.SetSkipData(LastDocID, StorePayloads, LastPayloadLength, StoreOffsets, LastOffsetLength); SkipListWriter.BufferSkip(Df); } Debug.Assert(docID < TotalNumDocs, "docID=" + docID + " totalNumDocs=" + TotalNumDocs); LastDocID = docID; if (IndexOptions == IndexOptions.DOCS_ONLY) { FreqOut.WriteVInt32(delta); } else if (1 == termDocFreq) { FreqOut.WriteVInt32((delta << 1) | 1); } else { FreqOut.WriteVInt32(delta << 1); FreqOut.WriteVInt32(termDocFreq); } LastPosition = 0; LastOffset = 0; }
private void Flush() { int chunkDocs = pendingDocs.Count; if (Debugging.AssertsEnabled) { Debugging.Assert(chunkDocs > 0, "{0}", chunkDocs); } // write the index file indexWriter.WriteIndex(chunkDocs, vectorsStream.Position); // LUCENENET specific: Renamed from getFilePointer() to match FileStream int docBase = numDocs - chunkDocs; vectorsStream.WriteVInt32(docBase); vectorsStream.WriteVInt32(chunkDocs); // total number of fields of the chunk int totalFields = FlushNumFields(chunkDocs); if (totalFields > 0) { // unique field numbers (sorted) int[] fieldNums = FlushFieldNums(); // offsets in the array of unique field numbers FlushFields(totalFields, fieldNums); // flags (does the field have positions, offsets, payloads?) FlushFlags(totalFields, fieldNums); // number of terms of each field FlushNumTerms(totalFields); // prefix and suffix lengths for each field FlushTermLengths(); // term freqs - 1 (because termFreq is always >=1) for each term FlushTermFreqs(); // positions for all terms, when enabled FlushPositions(); // offsets for all terms, when enabled FlushOffsets(fieldNums); // payload lengths for all terms, when enabled FlushPayloadLengths(); // compress terms and payloads and write them to the output compressor.Compress(termSuffixes.Bytes, 0, termSuffixes.Length, vectorsStream); } // reset pendingDocs.Clear(); curDoc = null; curField = null; termSuffixes.Length = 0; }
private bool disposed = false; // LUCENENET specific protected override void Dispose(bool disposing) { if (disposing && !disposed) { disposed = true; _wrappedPostingsWriter.Dispose(); _buffer.Dispose(); // LUCENENET specific if (_wrappedPostingsWriter is PulsingPostingsWriter || VERSION_CURRENT < VERSION_META_ARRAY) { return; } var summaryFileName = IndexFileNames.SegmentFileName(_segmentState.SegmentInfo.Name, _segmentState.SegmentSuffix, SUMMARY_EXTENSION); IndexOutput output = null; try { output = _segmentState.Directory.CreateOutput(summaryFileName, _segmentState.Context); CodecUtil.WriteHeader(output, CODEC, VERSION_CURRENT); output.WriteVInt32(_fields.Count); foreach (var field in _fields) { output.WriteVInt32(field.FieldNumber); output.WriteVInt32(field.Int64sSize); } output.Dispose(); } finally { IOUtils.DisposeWhileHandlingException(output); } } }
protected override void Dispose(bool disposing) { if (disposing) { // EOF marker: try { @out.WriteVInt32(0); CodecUtil.WriteFooter(@out); } finally { @out.Dispose(); } } }
protected override void WriteSkipData(int level, IndexOutput skipBuffer) { // To efficiently store payloads in the posting lists we do not store the length of // every payload. Instead we omit the length for a payload if the previous payload had // the same length. // However, in order to support skipping the payload length at every skip point must be known. // So we use the same length encoding that we use for the posting lists for the skip data as well: // Case 1: current field does not store payloads // SkipDatum --> DocSkip, FreqSkip, ProxSkip // DocSkip,FreqSkip,ProxSkip --> VInt // DocSkip records the document number before every SkipInterval th document in TermFreqs. // Document numbers are represented as differences from the previous value in the sequence. // Case 2: current field stores payloads // SkipDatum --> DocSkip, PayloadLength?, FreqSkip,ProxSkip // DocSkip,FreqSkip,ProxSkip --> VInt // PayloadLength --> VInt // In this case DocSkip/2 is the difference between // the current and the previous value. If DocSkip // is odd, then a PayloadLength encoded as VInt follows, // if DocSkip is even, then it is assumed that the // current payload length equals the length at the previous // skip point if (curStorePayloads) { int delta = curDoc - lastSkipDoc[level]; if (curPayloadLength == lastSkipPayloadLength[level]) { // the current payload length equals the length at the previous skip point, // so we don't store the length again skipBuffer.WriteVInt32(delta * 2); } else { // the payload length is different from the previous one. We shift the DocSkip, // set the lowest bit and store the current payload length as VInt. skipBuffer.WriteVInt32(delta * 2 + 1); skipBuffer.WriteVInt32(curPayloadLength); lastSkipPayloadLength[level] = curPayloadLength; } } else { // current field does not store payloads skipBuffer.WriteVInt32(curDoc - lastSkipDoc[level]); } skipBuffer.WriteVInt32((int)(curFreqPointer - lastSkipFreqPointer[level])); skipBuffer.WriteVInt32((int)(curProxPointer - lastSkipProxPointer[level])); lastSkipDoc[level] = curDoc; lastSkipFreqPointer[level] = curFreqPointer; lastSkipProxPointer[level] = curProxPointer; }
public override FieldsConsumer FieldsConsumer(SegmentWriteState writeState) { int id = (int)nextID.GetAndIncrement(); // TODO -- ok to do this up front instead of // on close....? should be ok? // Write our ID: string idFileName = IndexFileNames.SegmentFileName(writeState.SegmentInfo.Name, writeState.SegmentSuffix, ID_EXTENSION); IndexOutput @out = writeState.Directory.CreateOutput(idFileName, writeState.Context); bool success = false; try { CodecUtil.WriteHeader(@out, RAM_ONLY_NAME, VERSION_LATEST); @out.WriteVInt32(id); success = true; } finally { if (!success) { IOUtils.DisposeWhileHandlingException(@out); } else { IOUtils.Dispose(@out); } } RAMPostings postings = new RAMPostings(); RAMFieldsConsumer consumer = new RAMFieldsConsumer(postings); UninterruptableMonitor.Enter(state); try { state[id] = postings; } finally { UninterruptableMonitor.Exit(state); } return(consumer); }
/// <summary> /// Write as a d-gaps list. </summary> private void WriteClearedDgaps(IndexOutput output) { output.WriteInt32(-1); // mark using d-gaps output.WriteInt32(Length); // write size output.WriteInt32(Count()); // write count int last = 0; int numCleared = Length - Count(); for (int i = 0; i < bits.Length && numCleared > 0; i++) { if (bits[i] != 0xff) { output.WriteVInt32(i - last); output.WriteByte(bits[i]); last = i; numCleared -= (8 - BitUtil.BitCount(bits[i])); Debug.Assert(numCleared >= 0 || (i == (bits.Length - 1) && numCleared == -(8 - (size & 7)))); } } }
public override void Write(Directory directory, string segmentName, string segmentSuffix, FieldInfos infos, IOContext context) { string fileName = IndexFileNames.SegmentFileName(segmentName, "", FIELD_INFOS_EXTENSION); IndexOutput output = directory.CreateOutput(fileName, context); bool success = false; try { output.WriteVInt32(FORMAT_PREFLEX_RW); output.WriteVInt32(infos.Count); foreach (FieldInfo fi in infos) { sbyte bits = 0x0; if (fi.HasVectors) { bits |= STORE_TERMVECTOR; } if (fi.OmitsNorms) { bits |= OMIT_NORMS; } if (fi.HasPayloads) { bits |= STORE_PAYLOADS; } if (fi.IsIndexed) { bits |= IS_INDEXED; if (Debugging.AssertsEnabled) { Debugging.Assert(fi.IndexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !fi.HasPayloads); } if (fi.IndexOptions == IndexOptions.DOCS_ONLY) { bits |= OMIT_TERM_FREQ_AND_POSITIONS; } else if (fi.IndexOptions == IndexOptions.DOCS_AND_FREQS) { bits |= OMIT_POSITIONS; } } output.WriteString(fi.Name); /* * we need to write the field number since IW tries * to stabelize the field numbers across segments so the * FI ordinal is not necessarily equivalent to the field number */ output.WriteInt32(fi.Number); output.WriteByte((byte)bits); if (fi.IsIndexed && !fi.OmitsNorms) { // to allow null norm types we need to indicate if norms are written // only in RW case output.WriteByte((byte)(fi.NormType == Index.DocValuesType.NONE ? 0 : 1)); } if (Debugging.AssertsEnabled) { Debugging.Assert(fi.Attributes is null); // not used or supported } } success = true; } finally { if (success) { output.Dispose(); } else { IOUtils.DisposeWhileHandlingException(output); } } }
// Writes the contents of buffer into the fields stream // and adds a new entry for this document into the index // stream. this assumes the buffer was already written // in the correct fields format. public override void StartDocument(int numStoredFields) { indexStream.WriteInt64(fieldsStream.GetFilePointer()); fieldsStream.WriteVInt32(numStoredFields); }
private void WriteBlock() { if (Debugging.AssertsEnabled) { Debugging.Assert(blockChunks > 0); } fieldsIndexOut.WriteVInt32(blockChunks); // The trick here is that we only store the difference from the average start // pointer or doc base, this helps save bits per value. // And in order to prevent a few chunks that would be far from the average to // raise the number of bits per value for all of them, we only encode blocks // of 1024 chunks at once // See LUCENE-4512 // doc bases int avgChunkDocs; if (blockChunks == 1) { avgChunkDocs = 0; } else { avgChunkDocs = (int)Math.Round((float)(blockDocs - docBaseDeltas[blockChunks - 1]) / (blockChunks - 1)); } fieldsIndexOut.WriteVInt32(totalDocs - blockDocs); // docBase fieldsIndexOut.WriteVInt32(avgChunkDocs); int docBase = 0; long maxDelta = 0; for (int i = 0; i < blockChunks; ++i) { int delta = docBase - avgChunkDocs * i; maxDelta |= MoveSignToLowOrderBit(delta); docBase += docBaseDeltas[i]; } int bitsPerDocBase = PackedInt32s.BitsRequired(maxDelta); fieldsIndexOut.WriteVInt32(bitsPerDocBase); PackedInt32s.Writer writer = PackedInt32s.GetWriterNoHeader(fieldsIndexOut, PackedInt32s.Format.PACKED, blockChunks, bitsPerDocBase, 1); docBase = 0; for (int i = 0; i < blockChunks; ++i) { long delta = docBase - avgChunkDocs * i; if (Debugging.AssertsEnabled) { Debugging.Assert(PackedInt32s.BitsRequired(MoveSignToLowOrderBit(delta)) <= writer.BitsPerValue); } writer.Add(MoveSignToLowOrderBit(delta)); docBase += docBaseDeltas[i]; } writer.Finish(); // start pointers fieldsIndexOut.WriteVInt64(firstStartPointer); long avgChunkSize; if (blockChunks == 1) { avgChunkSize = 0; } else { avgChunkSize = (maxStartPointer - firstStartPointer) / (blockChunks - 1); } fieldsIndexOut.WriteVInt64(avgChunkSize); long startPointer = 0; maxDelta = 0; for (int i = 0; i < blockChunks; ++i) { startPointer += startPointerDeltas[i]; long delta = startPointer - avgChunkSize * i; maxDelta |= MoveSignToLowOrderBit(delta); } int bitsPerStartPointer = PackedInt32s.BitsRequired(maxDelta); fieldsIndexOut.WriteVInt32(bitsPerStartPointer); writer = PackedInt32s.GetWriterNoHeader(fieldsIndexOut, PackedInt32s.Format.PACKED, blockChunks, bitsPerStartPointer, 1); startPointer = 0; for (int i = 0; i < blockChunks; ++i) { startPointer += startPointerDeltas[i]; long delta = startPointer - avgChunkSize * i; if (Debugging.AssertsEnabled) { Debugging.Assert(PackedInt32s.BitsRequired(MoveSignToLowOrderBit(delta)) <= writer.BitsPerValue); } writer.Add(MoveSignToLowOrderBit(delta)); } writer.Finish(); }
/// <summary> /// Called when we are done adding docs to this term. </summary> public override void FinishTerm(BlockTermState state) { Int32BlockTermState state2 = (Int32BlockTermState)state; if (Debugging.AssertsEnabled) { Debugging.Assert(state2.DocFreq > 0); } // TODO: wasteful we are counting this (counting # docs // for this term) in two places? if (Debugging.AssertsEnabled) { Debugging.Assert(state2.DocFreq == docCount, "{0} vs {1}", state2.DocFreq, docCount); } // if (DEBUG) { // System.out.println("FPW.finishTerm docFreq=" + state2.docFreq); // } // if (DEBUG) { // if (docBufferUpto > 0) { // System.out.println(" write doc/freq vInt block (count=" + docBufferUpto + ") at fp=" + docOut.getFilePointer() + " docStartFP=" + docStartFP); // } // } // docFreq == 1, don't write the single docid/freq to a separate file along with a pointer to it. int singletonDocID; if (state2.DocFreq == 1) { // pulse the singleton docid into the term dictionary, freq is implicitly totalTermFreq singletonDocID = docDeltaBuffer[0]; } else { singletonDocID = -1; // vInt encode the remaining doc deltas and freqs: for (int i = 0; i < docBufferUpto; i++) { int docDelta = docDeltaBuffer[i]; int freq = freqBuffer[i]; if (!fieldHasFreqs) { docOut.WriteVInt32(docDelta); } else if (freqBuffer[i] == 1) { docOut.WriteVInt32((docDelta << 1) | 1); } else { docOut.WriteVInt32(docDelta << 1); docOut.WriteVInt32(freq); } } } long lastPosBlockOffset; if (fieldHasPositions) { // if (DEBUG) { // if (posBufferUpto > 0) { // System.out.println(" write pos vInt block (count=" + posBufferUpto + ") at fp=" + posOut.getFilePointer() + " posStartFP=" + posStartFP + " hasPayloads=" + fieldHasPayloads + " hasOffsets=" + fieldHasOffsets); // } // } // totalTermFreq is just total number of positions(or payloads, or offsets) // associated with current term. if (Debugging.AssertsEnabled) { Debugging.Assert(state2.TotalTermFreq != -1); } if (state2.TotalTermFreq > Lucene41PostingsFormat.BLOCK_SIZE) { // record file offset for last pos in last block lastPosBlockOffset = posOut.GetFilePointer() - posStartFP; } else { lastPosBlockOffset = -1; } if (posBufferUpto > 0) { // TODO: should we send offsets/payloads to // .pay...? seems wasteful (have to store extra // vLong for low (< BLOCK_SIZE) DF terms = vast vast // majority) // vInt encode the remaining positions/payloads/offsets: int lastPayloadLength = -1; // force first payload length to be written int lastOffsetLength = -1; // force first offset length to be written int payloadBytesReadUpto = 0; for (int i = 0; i < posBufferUpto; i++) { int posDelta = posDeltaBuffer[i]; if (fieldHasPayloads) { int payloadLength = payloadLengthBuffer[i]; if (payloadLength != lastPayloadLength) { lastPayloadLength = payloadLength; posOut.WriteVInt32((posDelta << 1) | 1); posOut.WriteVInt32(payloadLength); } else { posOut.WriteVInt32(posDelta << 1); } // if (DEBUG) { // System.out.println(" i=" + i + " payloadLen=" + payloadLength); // } if (payloadLength != 0) { // if (DEBUG) { // System.out.println(" write payload @ pos.fp=" + posOut.getFilePointer()); // } posOut.WriteBytes(payloadBytes, payloadBytesReadUpto, payloadLength); payloadBytesReadUpto += payloadLength; } } else { posOut.WriteVInt32(posDelta); } if (fieldHasOffsets) { // if (DEBUG) { // System.out.println(" write offset @ pos.fp=" + posOut.getFilePointer()); // } int delta = offsetStartDeltaBuffer[i]; int length = offsetLengthBuffer[i]; if (length == lastOffsetLength) { posOut.WriteVInt32(delta << 1); } else { posOut.WriteVInt32(delta << 1 | 1); posOut.WriteVInt32(length); lastOffsetLength = length; } } } if (fieldHasPayloads) { if (Debugging.AssertsEnabled) { Debugging.Assert(payloadBytesReadUpto == payloadByteUpto); } payloadByteUpto = 0; } } // if (DEBUG) { // System.out.println(" totalTermFreq=" + state.totalTermFreq + " lastPosBlockOffset=" + lastPosBlockOffset); // } } else { lastPosBlockOffset = -1; } long skipOffset; if (docCount > Lucene41PostingsFormat.BLOCK_SIZE) { skipOffset = skipWriter.WriteSkip(docOut) - docStartFP; // if (DEBUG) { // System.out.println("skip packet " + (docOut.getFilePointer() - (docStartFP + skipOffset)) + " bytes"); // } } else { skipOffset = -1; // if (DEBUG) { // System.out.println(" no skip: docCount=" + docCount); // } } // if (DEBUG) { // System.out.println(" payStartFP=" + payStartFP); // } state2.docStartFP = docStartFP; state2.posStartFP = posStartFP; state2.payStartFP = payStartFP; state2.singletonDocID = singletonDocID; state2.skipOffset = skipOffset; state2.lastPosBlockOffset = lastPosBlockOffset; docBufferUpto = 0; posBufferUpto = 0; lastDocID = 0; docCount = 0; }
/// <summary> /// Expert: writes a value dictionary for a sorted/sortedset field. </summary> protected virtual void AddTermsDict(FieldInfo field, IEnumerable <BytesRef> values) { // first check if its a "fixed-length" terms dict int minLength = int.MaxValue; int maxLength = int.MinValue; foreach (BytesRef v in values) { minLength = Math.Min(minLength, v.Length); maxLength = Math.Max(maxLength, v.Length); } if (minLength == maxLength) { // no index needed: direct addressing by mult AddBinaryField(field, values); } else { // header meta.WriteVInt32(field.Number); meta.WriteByte((byte)Lucene45DocValuesFormat.BINARY); meta.WriteVInt32(BINARY_PREFIX_COMPRESSED); meta.WriteInt64(-1L); // now write the bytes: sharing prefixes within a block long startFP = data.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream // currently, we have to store the delta from expected for every 1/nth term // we could avoid this, but its not much and less overall RAM than the previous approach! RAMOutputStream addressBuffer = new RAMOutputStream(); MonotonicBlockPackedWriter termAddresses = new MonotonicBlockPackedWriter(addressBuffer, BLOCK_SIZE); BytesRef lastTerm = new BytesRef(); long count = 0; foreach (BytesRef v in values) { if (count % ADDRESS_INTERVAL == 0) { termAddresses.Add(data.Position - startFP); // LUCENENET specific: Renamed from getFilePointer() to match FileStream // force the first term in a block to be abs-encoded lastTerm.Length = 0; } // prefix-code int sharedPrefix = StringHelper.BytesDifference(lastTerm, v); data.WriteVInt32(sharedPrefix); data.WriteVInt32(v.Length - sharedPrefix); data.WriteBytes(v.Bytes, v.Offset + sharedPrefix, v.Length - sharedPrefix); lastTerm.CopyBytes(v); count++; } long indexStartFP = data.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream // write addresses of indexed terms termAddresses.Finish(); addressBuffer.WriteTo(data); //addressBuffer = null; // LUCENENET: IDE0059: Remove unnecessary value assignment //termAddresses = null; // LUCENENET: IDE0059: Remove unnecessary value assignment meta.WriteVInt32(minLength); meta.WriteVInt32(maxLength); meta.WriteVInt64(count); meta.WriteInt64(startFP); meta.WriteVInt32(ADDRESS_INTERVAL); meta.WriteInt64(indexStartFP); meta.WriteVInt32(PackedInt32s.VERSION_CURRENT); meta.WriteVInt32(BLOCK_SIZE); } }
public override void Write(Directory directory, string segmentName, string segmentSuffix, FieldInfos infos, IOContext context) { string fileName = IndexFileNames.SegmentFileName(segmentName, "", Lucene42FieldInfosFormat.EXTENSION); IndexOutput output = directory.CreateOutput(fileName, context); bool success = false; try { CodecUtil.WriteHeader(output, Lucene42FieldInfosFormat.CODEC_NAME, Lucene42FieldInfosFormat.FORMAT_CURRENT); output.WriteVInt32(infos.Count); foreach (FieldInfo fi in infos) { IndexOptions indexOptions = fi.IndexOptions; sbyte bits = 0x0; if (fi.HasVectors) { bits |= Lucene42FieldInfosFormat.STORE_TERMVECTOR; } if (fi.OmitsNorms) { bits |= Lucene42FieldInfosFormat.OMIT_NORMS; } if (fi.HasPayloads) { bits |= Lucene42FieldInfosFormat.STORE_PAYLOADS; } if (fi.IsIndexed) { bits |= Lucene42FieldInfosFormat.IS_INDEXED; Debug.Assert(indexOptions.CompareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.HasPayloads); if (indexOptions == IndexOptions.DOCS_ONLY) { bits |= Lucene42FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS; } else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) { bits |= Lucene42FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS; } else if (indexOptions == IndexOptions.DOCS_AND_FREQS) { bits |= Lucene42FieldInfosFormat.OMIT_POSITIONS; } } output.WriteString(fi.Name); output.WriteVInt32(fi.Number); output.WriteByte((byte)bits); // pack the DV types in one byte var dv = DocValuesByte(fi.DocValuesType); var nrm = DocValuesByte(fi.NormType); Debug.Assert((dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0); var val = (byte)(0xff & ((nrm << 4) | (byte)dv)); output.WriteByte(val); output.WriteStringStringMap(fi.Attributes); } success = true; } finally { if (success) { output.Dispose(); } else { IOUtils.DisposeWhileHandlingException(output); } } }
public override void Init(IndexOutput termsOut) { CodecUtil.WriteHeader(termsOut, TERMS_CODEC, VERSION_CURRENT); termsOut.WriteVInt32(Lucene41PostingsFormat.BLOCK_SIZE); }
internal void Persist() { UninterruptableMonitor.Enter(this); try { string fileName = SNAPSHOTS_PREFIX + nextWriteGen; IndexOutput @out = dir.CreateOutput(fileName, IOContext.DEFAULT); bool success = false; try { CodecUtil.WriteHeader(@out, CODEC_NAME, VERSION_CURRENT); @out.WriteVInt32(m_refCounts.Count); foreach (KeyValuePair <long, int> ent in m_refCounts) { @out.WriteVInt64(ent.Key); @out.WriteVInt32(ent.Value); } success = true; } finally { if (!success) { IOUtils.DisposeWhileHandlingException(@out); try { dir.DeleteFile(fileName); } catch (Exception e) when(e.IsException()) { // Suppress so we keep throwing original exception } } else { IOUtils.Dispose(@out); } } dir.Sync(/*Collections.singletonList(*/ new[] { fileName } /*)*/); if (nextWriteGen > 0) { string lastSaveFile = SNAPSHOTS_PREFIX + (nextWriteGen - 1); try { dir.DeleteFile(lastSaveFile); } catch (Exception ioe) when(ioe.IsIOException()) { // OK: likely it didn't exist } } nextWriteGen++; } finally { UninterruptableMonitor.Exit(this); } }