private void Flush() { indexWriter.WriteIndex(numBufferedDocs, fieldsStream.GetFilePointer()); // transform end offsets into lengths int[] lengths = endOffsets; for (int i = numBufferedDocs - 1; i > 0; --i) { lengths[i] = endOffsets[i] - endOffsets[i - 1]; if (Debugging.AssertsEnabled) { Debugging.Assert(lengths[i] >= 0); } } WriteHeader(docBase, numBufferedDocs, numStoredFields, lengths); // compress stored fields to fieldsStream if (bufferedDocs.Length >= 2 * chunkSize) { // big chunk, slice it for (int compressed = 0; compressed < bufferedDocs.Length; compressed += chunkSize) { compressor.Compress(bufferedDocs.Bytes, compressed, Math.Min(chunkSize, bufferedDocs.Length - compressed), fieldsStream); } } else { compressor.Compress(bufferedDocs.Bytes, 0, bufferedDocs.Length, fieldsStream); } // reset docBase += numBufferedDocs; numBufferedDocs = 0; bufferedDocs.Length = 0; }
/// <summary> /// Sets the values for the current skip data. /// </summary> // Called @ every index interval (every 128th (by default) // doc) internal void SetSkipData(int doc, bool storePayloads, int payloadLength) { this.curDoc = doc; this.curStorePayloads = storePayloads; this.curPayloadLength = payloadLength; if (payloadOutput != null) { this.curPayloadPointer = payloadOutput.GetFilePointer(); } }
/// <summary> /// Sets the values for the current skip data. /// </summary> public virtual void SetSkipData(int doc, bool storePayloads, int payloadLength) { this.curDoc = doc; this.curStorePayloads = storePayloads; this.curPayloadLength = payloadLength; this.curFreqPointer = freqOutput.GetFilePointer(); if (proxOutput != null) { this.curProxPointer = proxOutput.GetFilePointer(); } }
private void AddVarSortedBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable <BytesRef> values, IEnumerable <long?> docToOrd) { field.PutAttribute(legacyKey, LegacyDocValuesType.BYTES_VAR_SORTED.ToString()); CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT); CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT); /* values */ long startPos = data.GetFilePointer(); int valueCount = 0; foreach (BytesRef v in values) { data.WriteBytes(v.Bytes, v.Offset, v.Length); valueCount++; } /* addresses */ long maxAddress = data.GetFilePointer() - startPos; index.WriteInt64(maxAddress); Debug.Assert(valueCount != int.MaxValue); // unsupported by the 4.0 impl PackedInt32s.Writer w = PackedInt32s.GetWriter(index, valueCount + 1, PackedInt32s.BitsRequired(maxAddress), PackedInt32s.DEFAULT); long currentPosition = 0; foreach (BytesRef v in values) { w.Add(currentPosition); currentPosition += v.Length; } // write sentinel Debug.Assert(currentPosition == maxAddress); w.Add(currentPosition); w.Finish(); /* ordinals */ int maxDoc = state.SegmentInfo.DocCount; Debug.Assert(valueCount > 0); PackedInt32s.Writer ords = PackedInt32s.GetWriter(index, maxDoc, PackedInt32s.BitsRequired(valueCount - 1), PackedInt32s.DEFAULT); foreach (long n in docToOrd) { ords.Add((long)n); } ords.Finish(); }
private void Flush() { int chunkDocs = pendingDocs.Count; if (Debugging.AssertsEnabled) { Debugging.Assert(chunkDocs > 0, "{0}", chunkDocs); } // write the index file indexWriter.WriteIndex(chunkDocs, vectorsStream.GetFilePointer()); int docBase = numDocs - chunkDocs; vectorsStream.WriteVInt32(docBase); vectorsStream.WriteVInt32(chunkDocs); // total number of fields of the chunk int totalFields = FlushNumFields(chunkDocs); if (totalFields > 0) { // unique field numbers (sorted) int[] fieldNums = FlushFieldNums(); // offsets in the array of unique field numbers FlushFields(totalFields, fieldNums); // flags (does the field have positions, offsets, payloads?) FlushFlags(totalFields, fieldNums); // number of terms of each field FlushNumTerms(totalFields); // prefix and suffix lengths for each field FlushTermLengths(); // term freqs - 1 (because termFreq is always >=1) for each term FlushTermFreqs(); // positions for all terms, when enabled FlushPositions(); // offsets for all terms, when enabled FlushOffsets(fieldNums); // payload lengths for all terms, when enabled FlushPayloadLengths(); // compress terms and payloads and write them to the output compressor.Compress(termSuffixes.Bytes, 0, termSuffixes.Length, vectorsStream); } // reset pendingDocs.Clear(); curDoc = null; curField = null; termSuffixes.Length = 0; }
// NOTE: 4.0 file format docs are crazy/wrong here... private void AddVarStraightBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable <BytesRef> values) { field.PutAttribute(legacyKey, LegacyDocValuesType.BYTES_VAR_STRAIGHT.ToString()); CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_CURRENT); CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_CURRENT); /* values */ long startPos = data.GetFilePointer(); foreach (BytesRef v in values) { if (v != null) { data.WriteBytes(v.Bytes, v.Offset, v.Length); } } /* addresses */ long maxAddress = data.GetFilePointer() - startPos; index.WriteVInt64(maxAddress); int maxDoc = state.SegmentInfo.DocCount; if (Debugging.AssertsEnabled) { Debugging.Assert(maxDoc != int.MaxValue); // unsupported by the 4.0 impl } PackedInt32s.Writer w = PackedInt32s.GetWriter(index, maxDoc + 1, PackedInt32s.BitsRequired(maxAddress), PackedInt32s.DEFAULT); long currentPosition = 0; foreach (BytesRef v in values) { w.Add(currentPosition); if (v != null) { currentPosition += v.Length; } } // write sentinel if (Debugging.AssertsEnabled) { Debugging.Assert(currentPosition == maxAddress); } w.Add(currentPosition); w.Finish(); }
/// <summary> /// Sets the values for the current skip data. /// </summary> public virtual void SetSkipData(int doc, bool storePayloads, int payloadLength, bool storeOffsets, int offsetLength) { Debug.Assert(storePayloads || payloadLength == -1); Debug.Assert(storeOffsets || offsetLength == -1); this.CurDoc = doc; this.CurStorePayloads = storePayloads; this.CurPayloadLength = payloadLength; this.CurStoreOffsets = storeOffsets; this.CurOffsetLength = offsetLength; this.CurFreqPointer = FreqOutput.GetFilePointer(); if (ProxOutput != null) { this.CurProxPointer = ProxOutput.GetFilePointer(); } }
/// <summary> /// Sole constructor. </summary> public Lucene40StoredFieldsWriter(Directory directory, string segment, IOContext context) { Debug.Assert(directory != null); this.directory = directory; this.segment = segment; bool success = false; try { fieldsStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", FIELDS_EXTENSION), context); indexStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", FIELDS_INDEX_EXTENSION), context); CodecUtil.WriteHeader(fieldsStream, CODEC_NAME_DAT, VERSION_CURRENT); CodecUtil.WriteHeader(indexStream, CODEC_NAME_IDX, VERSION_CURRENT); Debug.Assert(HEADER_LENGTH_DAT == fieldsStream.GetFilePointer()); Debug.Assert(HEADER_LENGTH_IDX == indexStream.GetFilePointer()); success = true; } finally { if (!success) { Abort(); } } }
/// <summary> /// Sole constructor. </summary> public Lucene40TermVectorsWriter(Directory directory, string segment, IOContext context) { this.directory = directory; this.segment = segment; bool success = false; try { // Open files for TermVector storage tvx = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", Lucene40TermVectorsReader.VECTORS_INDEX_EXTENSION), context); CodecUtil.WriteHeader(tvx, Lucene40TermVectorsReader.CODEC_NAME_INDEX, Lucene40TermVectorsReader.VERSION_CURRENT); tvd = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", Lucene40TermVectorsReader.VECTORS_DOCUMENTS_EXTENSION), context); CodecUtil.WriteHeader(tvd, Lucene40TermVectorsReader.CODEC_NAME_DOCS, Lucene40TermVectorsReader.VERSION_CURRENT); tvf = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", Lucene40TermVectorsReader.VECTORS_FIELDS_EXTENSION), context); CodecUtil.WriteHeader(tvf, Lucene40TermVectorsReader.CODEC_NAME_FIELDS, Lucene40TermVectorsReader.VERSION_CURRENT); Debug.Assert(Lucene40TermVectorsReader.HEADER_LENGTH_INDEX == tvx.GetFilePointer()); Debug.Assert(Lucene40TermVectorsReader.HEADER_LENGTH_DOCS == tvd.GetFilePointer()); Debug.Assert(Lucene40TermVectorsReader.HEADER_LENGTH_FIELDS == tvf.GetFilePointer()); success = true; } finally { if (!success) { Abort(); } } }
/// <summary> /// Sole constructor. </summary> public CompressingTermVectorsWriter(Directory directory, SegmentInfo si, string segmentSuffix, IOContext context, string formatName, CompressionMode compressionMode, int chunkSize) { if (Debugging.AssertsEnabled) { Debugging.Assert(directory != null); } this.directory = directory; this.segment = si.Name; this.segmentSuffix = segmentSuffix; this.compressionMode = compressionMode; this.compressor = compressionMode.NewCompressor(); this.chunkSize = chunkSize; numDocs = 0; pendingDocs = new LinkedList <DocData>(); termSuffixes = new GrowableByteArrayDataOutput(ArrayUtil.Oversize(chunkSize, 1)); payloadBytes = new GrowableByteArrayDataOutput(ArrayUtil.Oversize(1, 1)); lastTerm = new BytesRef(ArrayUtil.Oversize(30, 1)); bool success = false; IndexOutput indexStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, segmentSuffix, VECTORS_INDEX_EXTENSION), context); try { vectorsStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, segmentSuffix, VECTORS_EXTENSION), context); string codecNameIdx = formatName + CODEC_SFX_IDX; string codecNameDat = formatName + CODEC_SFX_DAT; CodecUtil.WriteHeader(indexStream, codecNameIdx, VERSION_CURRENT); CodecUtil.WriteHeader(vectorsStream, codecNameDat, VERSION_CURRENT); if (Debugging.AssertsEnabled) { Debugging.Assert(CodecUtil.HeaderLength(codecNameDat) == vectorsStream.GetFilePointer()); Debugging.Assert(CodecUtil.HeaderLength(codecNameIdx) == indexStream.GetFilePointer()); } indexWriter = new CompressingStoredFieldsIndexWriter(indexStream); indexStream = null; vectorsStream.WriteVInt32(PackedInt32s.VERSION_CURRENT); vectorsStream.WriteVInt32(chunkSize); writer = new BlockPackedWriter(vectorsStream, BLOCK_SIZE); positionsBuf = new int[1024]; startOffsetsBuf = new int[1024]; lengthsBuf = new int[1024]; payloadLengthsBuf = new int[1024]; success = true; } finally { if (!success) { IOUtils.DisposeWhileHandlingException(indexStream); Abort(); } } }
public override void ResetSkip() { base.ResetSkip(); Arrays.Fill(lastSkipDoc, 0); Arrays.Fill(lastSkipDocPointer, docOut.GetFilePointer()); if (fieldHasPositions) { Arrays.Fill(lastSkipPosPointer, posOut.GetFilePointer()); if (fieldHasPayloads) { Arrays.Fill(lastPayloadByteUpto, 0); } if (fieldHasOffsets || fieldHasPayloads) { Arrays.Fill(lastSkipPayPointer, payOut.GetFilePointer()); } } }
public override void StartTerm() { docStartFP = docOut.GetFilePointer(); if (fieldHasPositions) { posStartFP = posOut.GetFilePointer(); if (fieldHasPayloads || fieldHasOffsets) { payStartFP = payOut.GetFilePointer(); } } lastDocID = 0; lastBlockDocID = -1; // if (DEBUG) { // System.out.println("FPW.startTerm startFP=" + docStartFP); // } skipWriter.ResetSkip(); }
private void AddVarDerefBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable <BytesRef> values) { field.PutAttribute(legacyKey, LegacyDocValuesType.BYTES_VAR_DEREF.ToString()); CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_VAR_DEREF_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_CURRENT); CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_VAR_DEREF_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_CURRENT); // deduplicate SortedSet <BytesRef> dictionary = new SortedSet <BytesRef>(); foreach (BytesRef v in values) { dictionary.Add(v == null ? new BytesRef() : BytesRef.DeepCopyOf(v)); } /* values */ long startPosition = data.GetFilePointer(); long currentAddress = 0; Dictionary <BytesRef, long> valueToAddress = new Dictionary <BytesRef, long>(); foreach (BytesRef v in dictionary) { currentAddress = data.GetFilePointer() - startPosition; valueToAddress[v] = currentAddress; WriteVShort(data, v.Length); data.WriteBytes(v.Bytes, v.Offset, v.Length); } /* ordinals */ long totalBytes = data.GetFilePointer() - startPosition; index.WriteInt64(totalBytes); int maxDoc = state.SegmentInfo.DocCount; PackedInt32s.Writer w = PackedInt32s.GetWriter(index, maxDoc, PackedInt32s.BitsRequired(currentAddress), PackedInt32s.DEFAULT); foreach (BytesRef v in values) { w.Add(valueToAddress[v == null ? new BytesRef() : v]); } w.Finish(); }
protected override void Dispose(bool disposing) { if (disposing) { if (m_output != null) { bool success = false; try { long dirStart = m_output.GetFilePointer(); int fieldCount = fields.Count; int nonNullFieldCount = 0; for (int i = 0; i < fieldCount; i++) { SimpleFieldWriter field = fields[i]; if (field.numIndexTerms > 0) { nonNullFieldCount++; } } m_output.WriteVInt32(nonNullFieldCount); for (int i = 0; i < fieldCount; i++) { SimpleFieldWriter field = fields[i]; if (field.numIndexTerms > 0) { m_output.WriteVInt32(field.fieldInfo.Number); m_output.WriteVInt32(field.numIndexTerms); m_output.WriteVInt64(field.termsStart); m_output.WriteVInt64(field.indexStart); m_output.WriteVInt64(field.packedIndexStart); m_output.WriteVInt64(field.packedOffsetsStart); } } WriteTrailer(dirStart); CodecUtil.WriteFooter(m_output); success = true; } finally { if (success) { IOUtils.Dispose(m_output); } else { IOUtils.DisposeWhileHandlingException(m_output); } m_output = null; } } } }
public override void Finish(FieldInfos fis, int numDocs) { if (4 + ((long)numDocs) * 16 != tvx.GetFilePointer()) // this is most likely a bug in Sun JRE 1.6.0_04/_05; // we detect that the bug has struck, here, and // throw an exception to prevent the corruption from // entering the index. See LUCENE-1282 for // details. { throw new Exception("tvx size mismatch: mergedDocs is " + numDocs + " but tvx size is " + tvx.GetFilePointer() + " file=" + tvx.ToString() + "; now aborting this merge to prevent index corruption"); } }
public override void Finish(FieldInfos fis, int numDocs) { if (HEADER_LENGTH_IDX + ((long)numDocs) * 8 != indexStream.GetFilePointer()) // this is most likely a bug in Sun JRE 1.6.0_04/_05; // we detect that the bug has struck, here, and // throw an exception to prevent the corruption from // entering the index. See LUCENE-1282 for // details. { throw new Exception("fdx size mismatch: docCount is " + numDocs + " but fdx file size is " + indexStream.GetFilePointer() + " file=" + indexStream.ToString() + "; now aborting this merge to prevent index corruption"); } }
public virtual void Test() { BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("test2BPagedBytes")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = Throttling.NEVER; } PagedBytes pb = new PagedBytes(15); IndexOutput dataOutput = dir.CreateOutput("foo", IOContext.DEFAULT); long netBytes = 0; long seed = Random.NextInt64(); long lastFP = 0; Random r2 = new Random((int)seed); while (netBytes < 1.1 * int.MaxValue) { int numBytes = TestUtil.NextInt32(r2, 1, 32768); byte[] bytes = new byte[numBytes]; r2.NextBytes(bytes); dataOutput.WriteBytes(bytes, bytes.Length); long fp = dataOutput.GetFilePointer(); if (Debugging.AssertsEnabled) { Debugging.Assert(fp == lastFP + numBytes); } lastFP = fp; netBytes += numBytes; } dataOutput.Dispose(); IndexInput input = dir.OpenInput("foo", IOContext.DEFAULT); pb.Copy(input, input.Length); input.Dispose(); PagedBytes.Reader reader = pb.Freeze(true); r2 = new Random((int)seed); netBytes = 0; while (netBytes < 1.1 * int.MaxValue) { int numBytes = TestUtil.NextInt32(r2, 1, 32768); var bytes = new byte[numBytes]; r2.NextBytes(bytes); BytesRef expected = new BytesRef(bytes); BytesRef actual = new BytesRef(); reader.FillSlice(actual, netBytes, numBytes); Assert.AreEqual(expected, actual); netBytes += numBytes; } dir.Dispose(); }
private int numBufferedDocs; // docBase + numBufferedDocs == current doc ID /// <summary> /// Sole constructor. </summary> public CompressingStoredFieldsWriter(Directory directory, SegmentInfo si, string segmentSuffix, IOContext context, string formatName, CompressionMode compressionMode, int chunkSize) { if (Debugging.AssertsEnabled) { Debugging.Assert(directory != null); } this.directory = directory; this.segment = si.Name; this.segmentSuffix = segmentSuffix; this.compressionMode = compressionMode; this.compressor = compressionMode.NewCompressor(); this.chunkSize = chunkSize; this.docBase = 0; this.bufferedDocs = new GrowableByteArrayDataOutput(chunkSize); this.numStoredFields = new int[16]; this.endOffsets = new int[16]; this.numBufferedDocs = 0; bool success = false; IndexOutput indexStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, segmentSuffix, Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION), context); try { fieldsStream = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, segmentSuffix, Lucene40StoredFieldsWriter.FIELDS_EXTENSION), context); string codecNameIdx = formatName + CODEC_SFX_IDX; string codecNameDat = formatName + CODEC_SFX_DAT; CodecUtil.WriteHeader(indexStream, codecNameIdx, VERSION_CURRENT); CodecUtil.WriteHeader(fieldsStream, codecNameDat, VERSION_CURRENT); if (Debugging.AssertsEnabled) { Debugging.Assert(CodecUtil.HeaderLength(codecNameDat) == fieldsStream.GetFilePointer()); Debugging.Assert(CodecUtil.HeaderLength(codecNameIdx) == indexStream.GetFilePointer()); } indexWriter = new CompressingStoredFieldsIndexWriter(indexStream); indexStream = null; fieldsStream.WriteVInt32(chunkSize); fieldsStream.WriteVInt32(PackedInt32s.VERSION_CURRENT); success = true; } finally { if (!success) { IOUtils.DisposeWhileHandlingException(indexStream); Abort(); } } }
public override void StartTerm() { freqStart = freqOut.GetFilePointer(); //if (DEBUG) System.out.println("SPW: startTerm freqOut.fp=" + freqStart); if (proxOut != null) { proxStart = proxOut.GetFilePointer(); } // force first payload to write its length lastPayloadLength = -1; // force first offset to write its length lastOffsetLength = -1; skipListWriter.ResetSkip(); }
/// <summary>Called when we are done adding docs to this term. </summary> public override void FinishTerm(BlockTermState state) { SepTermState state_ = (SepTermState)state; // TODO: -- wasteful we are counting this in two places? if (Debugging.AssertsEnabled) { Debugging.Assert(state_.DocFreq > 0); Debugging.Assert(state_.DocFreq == df); } state_.DocIndex = docOut.GetIndex(); state_.DocIndex.CopyFrom(docIndex, false); if (indexOptions != IndexOptions.DOCS_ONLY) { state_.FreqIndex = freqOut.GetIndex(); state_.FreqIndex.CopyFrom(freqIndex, false); if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { state_.PosIndex = posOut.GetIndex(); state_.PosIndex.CopyFrom(posIndex, false); } else { state_.PosIndex = null; } } else { state_.FreqIndex = null; state_.PosIndex = null; } if (df >= skipMinimum) { state_.SkipFP = skipOut.GetFilePointer(); //System.out.println(" skipFP=" + skipFP); skipListWriter.WriteSkip(skipOut); //System.out.println(" numBytes=" + (skipOut.getFilePointer()-skipFP)); } else { state_.SkipFP = -1; } state_.PayloadFP = payloadStart; lastDocID = 0; df = 0; }
public virtual void TestLargeWrites() { IndexOutput os = Dir.CreateOutput("testBufferStart.txt", NewIOContext(Random)); var largeBuf = new byte[2048]; for (int i = 0; i < largeBuf.Length; i++) { largeBuf[i] = (byte)unchecked ((sbyte)(new Random(1).NextDouble() * 256)); } long currentPos = os.GetFilePointer(); os.WriteBytes(largeBuf, largeBuf.Length); try { Assert.AreEqual(currentPos + largeBuf.Length, os.GetFilePointer()); } finally { os.Dispose(); } }
public override void StartTerm() { docIndex.Mark(); if (indexOptions != IndexOptions.DOCS_ONLY) { freqIndex.Mark(); } if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { posIndex.Mark(); payloadStart = payloadOut.GetFilePointer(); lastPayloadLength = -1; } skipListWriter.ResetSkip(docIndex, freqIndex, posIndex); }
protected override void Dispose(bool disposing) { if (disposing) { if (m_output != null) { try { long dirStart = m_output.GetFilePointer(); int fieldCount = fields.Count; int nonNullFieldCount = 0; for (int i = 0; i < fieldCount; i++) { FSTFieldWriter field = fields[i]; if (field.fst != null) { nonNullFieldCount++; } } m_output.WriteVInt32(nonNullFieldCount); for (int i = 0; i < fieldCount; i++) { FSTFieldWriter field = fields[i]; if (field.fst != null) { m_output.WriteVInt32(field.fieldInfo.Number); m_output.WriteVInt64(field.indexStart); } } WriteTrailer(dirStart); CodecUtil.WriteFooter(m_output); } finally { m_output.Dispose(); m_output = null; } } } }
/// <summary> /// Writes the buffered skip lists to the given output. /// </summary> /// <param name="output"> The <see cref="IndexOutput"/> the skip lists shall be written to. </param> /// <returns> The pointer the skip list starts. </returns> public virtual long WriteSkip(IndexOutput output) { long skipPointer = output.GetFilePointer(); //System.out.println("skipper.writeSkip fp=" + skipPointer); if (skipBuffer == null || skipBuffer.Length == 0) { return(skipPointer); } for (int level = m_numberOfSkipLevels - 1; level > 0; level--) { long length = skipBuffer[level].GetFilePointer(); if (length > 0) { output.WriteVInt64(length); skipBuffer[level].WriteTo(output); } } skipBuffer[0].WriteTo(output); return(skipPointer); }
// Writes the contents of buffer into the fields stream // and adds a new entry for this document into the index // stream. this assumes the buffer was already written // in the correct fields format. public override void StartDocument(int numStoredFields) { indexStream.WriteInt64(fieldsStream.GetFilePointer()); fieldsStream.WriteVInt32(numStoredFields); }
public override long GetFilePointer() { return(io.GetFilePointer()); }
public override long GetFilePointer() { return(_cacheDirIndexOutput.GetFilePointer()); }
public override long GetFilePointer() { return(@delegate.GetFilePointer()); }
public override long GetFilePointer() { return(_indexOutput.GetFilePointer()); }
internal virtual void AddNumericField(FieldInfo field, IEnumerable <long?> values, bool optimizeStorage) { long count = 0; long minValue = long.MaxValue; long maxValue = long.MinValue; long gcd = 0; bool missing = false; // TODO: more efficient? JCG.HashSet <long> uniqueValues = null; if (optimizeStorage) { uniqueValues = new JCG.HashSet <long>(); foreach (long?nv in values) { long v; if (nv == null) { v = 0; missing = true; } else { v = nv.Value; } if (gcd != 1) { if (v < long.MinValue / 2 || v > long.MaxValue / 2) { // in that case v - minValue might overflow and make the GCD computation return // wrong results. Since these extreme values are unlikely, we just discard // GCD computation for them gcd = 1; } // minValue needs to be set first else if (count != 0) { gcd = MathUtil.Gcd(gcd, v - minValue); } } minValue = Math.Min(minValue, v); maxValue = Math.Max(maxValue, v); if (uniqueValues != null) { if (uniqueValues.Add(v)) { if (uniqueValues.Count > 256) { uniqueValues = null; } } } ++count; } } else { foreach (var nv in values) { ++count; } } long delta = maxValue - minValue; int format; if (uniqueValues != null && (delta < 0L || PackedInt32s.BitsRequired(uniqueValues.Count - 1) < PackedInt32s.BitsRequired(delta)) && count <= int.MaxValue) { format = TABLE_COMPRESSED; } else if (gcd != 0 && gcd != 1) { format = GCD_COMPRESSED; } else { format = DELTA_COMPRESSED; } meta.WriteVInt32(field.Number); meta.WriteByte((byte)Lucene45DocValuesFormat.NUMERIC); meta.WriteVInt32(format); if (missing) { meta.WriteInt64(data.GetFilePointer()); WriteMissingBitset(values); } else { meta.WriteInt64(-1L); } meta.WriteVInt32(PackedInt32s.VERSION_CURRENT); meta.WriteInt64(data.GetFilePointer()); meta.WriteVInt64(count); meta.WriteVInt32(BLOCK_SIZE); switch (format) { case GCD_COMPRESSED: meta.WriteInt64(minValue); meta.WriteInt64(gcd); BlockPackedWriter quotientWriter = new BlockPackedWriter(data, BLOCK_SIZE); foreach (long?nv in values) { quotientWriter.Add((nv.GetValueOrDefault() - minValue) / gcd); } quotientWriter.Finish(); break; case DELTA_COMPRESSED: BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE); foreach (long?nv in values) { writer.Add(nv.GetValueOrDefault()); } writer.Finish(); break; case TABLE_COMPRESSED: // LUCENENET NOTE: diming an array and then using .CopyTo() for better efficiency than LINQ .ToArray() long[] decode = new long[uniqueValues.Count]; uniqueValues.CopyTo(decode, 0); Dictionary <long, int> encode = new Dictionary <long, int>(); meta.WriteVInt32(decode.Length); for (int i = 0; i < decode.Length; i++) { meta.WriteInt64(decode[i]); encode[decode[i]] = i; } int bitsRequired = PackedInt32s.BitsRequired(uniqueValues.Count - 1); PackedInt32s.Writer ordsWriter = PackedInt32s.GetWriterNoHeader(data, PackedInt32s.Format.PACKED, (int)count, bitsRequired, PackedInt32s.DEFAULT_BUFFER_SIZE); foreach (long?nv in values) { ordsWriter.Add(encode[nv.GetValueOrDefault()]); } ordsWriter.Finish(); break; default: throw new InvalidOperationException(); } }