private void AddVarSortedBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable <BytesRef> values, IEnumerable <long?> docToOrd) { field.PutAttribute(LegacyKey, LegacyDocValuesType.BYTES_VAR_SORTED.Name); CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT); CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT); /* values */ long startPos = data.FilePointer; int valueCount = 0; foreach (BytesRef v in values) { data.WriteBytes(v.Bytes, v.Offset, v.Length); valueCount++; } /* addresses */ long maxAddress = data.FilePointer - startPos; index.WriteLong(maxAddress); Debug.Assert(valueCount != int.MaxValue); // unsupported by the 4.0 impl PackedInts.Writer w = PackedInts.GetWriter(index, valueCount + 1, PackedInts.BitsRequired(maxAddress), PackedInts.DEFAULT); long currentPosition = 0; foreach (BytesRef v in values) { w.Add(currentPosition); currentPosition += v.Length; } // write sentinel Debug.Assert(currentPosition == maxAddress); w.Add(currentPosition); w.Finish(); /* ordinals */ int maxDoc = State.SegmentInfo.DocCount; Debug.Assert(valueCount > 0); PackedInts.Writer ords = PackedInts.GetWriter(index, maxDoc, PackedInts.BitsRequired(valueCount - 1), PackedInts.DEFAULT); foreach (long n in docToOrd) { ords.Add((long)n); } ords.Finish(); }
private void FlushNumTerms(int totalFields) { int maxNumTerms = 0; foreach (DocData dd in PendingDocs) { foreach (FieldData fd in dd.Fields) { maxNumTerms |= fd.NumTerms; } } int bitsRequired = PackedInts.BitsRequired(maxNumTerms); VectorsStream.WriteVInt(bitsRequired); PackedInts.Writer writer = PackedInts.GetWriterNoHeader(VectorsStream, PackedInts.Format.PACKED, totalFields, bitsRequired, 1); foreach (DocData dd in PendingDocs) { foreach (FieldData fd in dd.Fields) { writer.Add(fd.NumTerms); } } Debug.Assert(writer.Ord() == totalFields - 1); writer.Finish(); }
private void AddFixedSortedBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable <BytesRef> values, IEnumerable <long?> docToOrd, int length) { field.PutAttribute(LegacyKey, LegacyDocValuesType.BYTES_FIXED_SORTED.Name); CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT); CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT); /* values */ data.WriteInt(length); int valueCount = 0; foreach (BytesRef v in values) { data.WriteBytes(v.Bytes, v.Offset, v.Length); valueCount++; } /* ordinals */ index.WriteInt(valueCount); int maxDoc = State.SegmentInfo.DocCount; Debug.Assert(valueCount > 0); PackedInts.Writer w = PackedInts.GetWriter(index, maxDoc, PackedInts.BitsRequired(valueCount - 1), PackedInts.DEFAULT); foreach (long n in docToOrd) { w.Add((long)n); } w.Finish(); }
private void AddVarIntsField(FieldInfo field, IndexOutput output, IEnumerable <long?> values, long minValue, long maxValue) { field.PutAttribute(LegacyKey, LegacyDocValuesType.VAR_INTS.Name); CodecUtil.WriteHeader(output, Lucene40DocValuesFormat.VAR_INTS_CODEC_NAME, Lucene40DocValuesFormat.VAR_INTS_VERSION_CURRENT); long delta = maxValue - minValue; if (delta < 0) { // writes longs output.WriteByte((byte)Lucene40DocValuesFormat.VAR_INTS_FIXED_64); foreach (long?n in values) { output.WriteLong(n == null ? 0 : n.Value); } } else { // writes packed ints output.WriteByte((byte)Lucene40DocValuesFormat.VAR_INTS_PACKED); output.WriteLong(minValue); output.WriteLong(0 - minValue); // default value (representation of 0) PackedInts.Writer writer = PackedInts.GetWriter(output, State.SegmentInfo.DocCount, PackedInts.BitsRequired(delta), PackedInts.DEFAULT); foreach (long?n in values) { long v = n == null ? 0 : (long)n; writer.Add(v - minValue); } writer.Finish(); } }
// NOTE: 4.0 file format docs are crazy/wrong here... private void AddVarStraightBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable <BytesRef> values) { field.PutAttribute(LegacyKey, LegacyDocValuesType.BYTES_VAR_STRAIGHT.Name); CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_CURRENT); CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_CURRENT); /* values */ long startPos = data.FilePointer; foreach (BytesRef v in values) { if (v != null) { data.WriteBytes(v.Bytes, v.Offset, v.Length); } } /* addresses */ long maxAddress = data.FilePointer - startPos; index.WriteVLong(maxAddress); int maxDoc = State.SegmentInfo.DocCount; Debug.Assert(maxDoc != int.MaxValue); // unsupported by the 4.0 impl PackedInts.Writer w = PackedInts.GetWriter(index, maxDoc + 1, PackedInts.BitsRequired(maxAddress), PackedInts.DEFAULT); long currentPosition = 0; foreach (BytesRef v in values) { w.Add(currentPosition); if (v != null) { currentPosition += v.Length; } } // write sentinel Debug.Assert(currentPosition == maxAddress); w.Add(currentPosition); w.Finish(); }
public override void Finish(long termsFilePointer) { // write primary terms dict offsets PackedIndexStart = _fgtiw.Output.FilePointer; PackedInts.Writer w = PackedInts.GetWriter(_fgtiw.Output, NumIndexTerms, PackedInts.BitsRequired(termsFilePointer), PackedInts.DEFAULT); // relative to our indexStart long upto = 0; for (int i = 0; i < NumIndexTerms; i++) { upto += _termsPointerDeltas[i]; w.Add(upto); } w.Finish(); PackedOffsetsStart = _fgtiw.Output.FilePointer; // write offsets into the byte[] terms w = PackedInts.GetWriter(_fgtiw.Output, 1 + NumIndexTerms, PackedInts.BitsRequired(_totTermLength), PackedInts.DEFAULT); upto = 0; for (int i = 0; i < NumIndexTerms; i++) { w.Add(upto); upto += _termLengths[i]; } w.Add(upto); w.Finish(); // our referrer holds onto us, while other fields are // being written, so don't tie up this RAM: _termLengths = null; _termsPointerDeltas = null; }
private void FlushFields(int totalFields, int[] fieldNums) { PackedInts.Writer writer = PackedInts.GetWriterNoHeader(VectorsStream, PackedInts.Format.PACKED, totalFields, PackedInts.BitsRequired(fieldNums.Length - 1), 1); foreach (DocData dd in PendingDocs) { foreach (FieldData fd in dd.Fields) { int fieldNumIndex = Array.BinarySearch(fieldNums, fd.FieldNum); Debug.Assert(fieldNumIndex >= 0); writer.Add(fieldNumIndex); } } writer.Finish(); }
private void AddFixedDerefBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable <BytesRef> values, int length) { field.PutAttribute(LegacyKey, LegacyDocValuesType.BYTES_FIXED_DEREF.Name); CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_VERSION_CURRENT); CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_VERSION_CURRENT); // deduplicate SortedSet <BytesRef> dictionary = new SortedSet <BytesRef>(); foreach (BytesRef v in values) { dictionary.Add(v == null ? new BytesRef() : BytesRef.DeepCopyOf(v)); } /* values */ data.WriteInt(length); foreach (BytesRef v in dictionary) { data.WriteBytes(v.Bytes, v.Offset, v.Length); } /* ordinals */ int valueCount = dictionary.Count; Debug.Assert(valueCount > 0); index.WriteInt(valueCount); int maxDoc = State.SegmentInfo.DocCount; PackedInts.Writer w = PackedInts.GetWriter(index, maxDoc, PackedInts.BitsRequired(valueCount - 1), PackedInts.DEFAULT); BytesRef brefDummy; foreach (BytesRef v in values) { brefDummy = v; if (v == null) { brefDummy = new BytesRef(); } //int ord = dictionary.HeadSet(brefDummy).Size(); int ord = dictionary.Count(@ref => @ref.CompareTo(brefDummy) < 0); w.Add(ord); } w.Finish(); }
private void AddVarDerefBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable <BytesRef> values) { field.PutAttribute(LegacyKey, LegacyDocValuesType.BYTES_VAR_DEREF.Name); CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_VAR_DEREF_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_CURRENT); CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_VAR_DEREF_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_CURRENT); // deduplicate SortedSet <BytesRef> dictionary = new SortedSet <BytesRef>(); foreach (BytesRef v in values) { dictionary.Add(v == null ? new BytesRef() : BytesRef.DeepCopyOf(v)); } /* values */ long startPosition = data.FilePointer; long currentAddress = 0; Dictionary <BytesRef, long> valueToAddress = new Dictionary <BytesRef, long>(); foreach (BytesRef v in dictionary) { currentAddress = data.FilePointer - startPosition; valueToAddress[v] = currentAddress; WriteVShort(data, v.Length); data.WriteBytes(v.Bytes, v.Offset, v.Length); } /* ordinals */ long totalBytes = data.FilePointer - startPosition; index.WriteLong(totalBytes); int maxDoc = State.SegmentInfo.DocCount; PackedInts.Writer w = PackedInts.GetWriter(index, maxDoc, PackedInts.BitsRequired(currentAddress), PackedInts.DEFAULT); foreach (BytesRef v in values) { w.Add(valueToAddress[v == null ? new BytesRef() : v]); } w.Finish(); }
private static void SaveInts(int[] values, int length, DataOutput @out) { Debug.Assert(length > 0); if (length == 1) { @out.WriteVInt(values[0]); } else { bool allEqual = true; for (int i = 1; i < length; ++i) { if (values[i] != values[0]) { allEqual = false; break; } } if (allEqual) { @out.WriteVInt(0); @out.WriteVInt(values[0]); } else { long max = 0; for (int i = 0; i < length; ++i) { max |= (uint)values[i]; } int bitsRequired = PackedInts.BitsRequired(max); @out.WriteVInt(bitsRequired); PackedInts.Writer w = PackedInts.GetWriterNoHeader(@out, PackedInts.Format.PACKED, length, bitsRequired, 1); for (int i = 0; i < length; ++i) { w.Add(values[i]); } w.Finish(); } } }
/// <summary> /// Returns a sorted array containing unique field numbers </summary> private int[] FlushFieldNums() { SortedSet <int> fieldNums = new SortedSet <int>(); foreach (DocData dd in PendingDocs) { foreach (FieldData fd in dd.Fields) { fieldNums.Add(fd.FieldNum); } } int numDistinctFields = fieldNums.Count; Debug.Assert(numDistinctFields > 0); int bitsRequired = PackedInts.BitsRequired(fieldNums.Last()); int token = (Math.Min(numDistinctFields - 1, 0x07) << 5) | bitsRequired; VectorsStream.WriteByte((byte)(sbyte)token); if (numDistinctFields - 1 >= 0x07) { VectorsStream.WriteVInt(numDistinctFields - 1 - 0x07); } PackedInts.Writer writer = PackedInts.GetWriterNoHeader(VectorsStream, PackedInts.Format.PACKED, fieldNums.Count, bitsRequired, 1); foreach (int fieldNum in fieldNums) { writer.Add(fieldNum); } writer.Finish(); int[] fns = new int[fieldNums.Count]; int i = 0; foreach (int key in fieldNums) { fns[i++] = key; } return(fns); }
internal virtual void AddNumericField(FieldInfo field, IEnumerable <long?> values, bool optimizeStorage) { long count = 0; long minValue = long.MaxValue; long maxValue = long.MinValue; long gcd = 0; bool missing = false; // TODO: more efficient? HashSet <long> uniqueValues = null; if (optimizeStorage) { uniqueValues = new HashSet <long>(); foreach (long?nv in values) { long v; if (nv == null) { v = 0; missing = true; } else { v = nv.Value; } if (gcd != 1) { if (v < long.MinValue / 2 || v > long.MaxValue / 2) { // in that case v - minValue might overflow and make the GCD computation return // wrong results. Since these extreme values are unlikely, we just discard // GCD computation for them gcd = 1; } // minValue needs to be set first else if (count != 0) { gcd = MathUtil.Gcd(gcd, v - minValue); } } minValue = Math.Min(minValue, v); maxValue = Math.Max(maxValue, v); if (uniqueValues != null) { if (uniqueValues.Add(v)) { if (uniqueValues.Count > 256) { uniqueValues = null; } } } ++count; } } else { foreach (var nv in values) { ++count; } } long delta = maxValue - minValue; int format; if (uniqueValues != null && (delta < 0L || PackedInts.BitsRequired(uniqueValues.Count - 1) < PackedInts.BitsRequired(delta)) && count <= int.MaxValue) { format = TABLE_COMPRESSED; } else if (gcd != 0 && gcd != 1) { format = GCD_COMPRESSED; } else { format = DELTA_COMPRESSED; } Meta.WriteVInt(field.Number); Meta.WriteByte((byte)Lucene45DocValuesFormat.NUMERIC); Meta.WriteVInt(format); if (missing) { Meta.WriteLong(Data.FilePointer); WriteMissingBitset(values); } else { Meta.WriteLong(-1L); } Meta.WriteVInt(PackedInts.VERSION_CURRENT); Meta.WriteLong(Data.FilePointer); Meta.WriteVLong(count); Meta.WriteVInt(BLOCK_SIZE); switch (format) { case GCD_COMPRESSED: Meta.WriteLong(minValue); Meta.WriteLong(gcd); BlockPackedWriter quotientWriter = new BlockPackedWriter(Data, BLOCK_SIZE); foreach (long?nv in values) { long value = nv == null ? 0 : nv.Value; quotientWriter.Add((value - minValue) / gcd); } quotientWriter.Finish(); break; case DELTA_COMPRESSED: BlockPackedWriter writer = new BlockPackedWriter(Data, BLOCK_SIZE); foreach (long?nv in values) { writer.Add(nv == null ? 0 : nv.Value); } writer.Finish(); break; case TABLE_COMPRESSED: long[] decode = uniqueValues.ToArray(); //LUCENE TO-DO Hadd oparamerter before Dictionary <long, int> encode = new Dictionary <long, int>(); Meta.WriteVInt(decode.Length); for (int i = 0; i < decode.Length; i++) { Meta.WriteLong(decode[i]); encode[decode[i]] = i; } int bitsRequired = PackedInts.BitsRequired(uniqueValues.Count - 1); PackedInts.Writer ordsWriter = PackedInts.GetWriterNoHeader(Data, PackedInts.Format.PACKED, (int)count, bitsRequired, PackedInts.DEFAULT_BUFFER_SIZE); foreach (long?nv in values) { ordsWriter.Add(encode[nv == null ? 0 : nv.Value]); } ordsWriter.Finish(); break; default: throw new InvalidOperationException(); } }
private void FlushFlags(int totalFields, int[] fieldNums) { // check if fields always have the same flags bool nonChangingFlags = true; int[] fieldFlags = new int[fieldNums.Length]; Arrays.Fill(fieldFlags, -1); bool breakOuterLoop; foreach (DocData dd in PendingDocs) { breakOuterLoop = false; foreach (FieldData fd in dd.Fields) { int fieldNumOff = Array.BinarySearch(fieldNums, fd.FieldNum); Debug.Assert(fieldNumOff >= 0); if (fieldFlags[fieldNumOff] == -1) { fieldFlags[fieldNumOff] = fd.Flags; } else if (fieldFlags[fieldNumOff] != fd.Flags) { nonChangingFlags = false; breakOuterLoop = true; } } if (breakOuterLoop) { break; } } if (nonChangingFlags) { // write one flag per field num VectorsStream.WriteVInt(0); PackedInts.Writer writer = PackedInts.GetWriterNoHeader(VectorsStream, PackedInts.Format.PACKED, fieldFlags.Length, FLAGS_BITS, 1); foreach (int flags in fieldFlags) { Debug.Assert(flags >= 0); writer.Add(flags); } Debug.Assert(writer.Ord() == fieldFlags.Length - 1); writer.Finish(); } else { // write one flag for every field instance VectorsStream.WriteVInt(1); PackedInts.Writer writer = PackedInts.GetWriterNoHeader(VectorsStream, PackedInts.Format.PACKED, totalFields, FLAGS_BITS, 1); foreach (DocData dd in PendingDocs) { foreach (FieldData fd in dd.Fields) { writer.Add(fd.Flags); } } Debug.Assert(writer.Ord() == totalFields - 1); writer.Finish(); } }
internal virtual void AddNumericField(FieldInfo field, IEnumerable <long> values, bool optimizeStorage) { Meta.WriteVInt(field.Number); Meta.WriteByte(Lucene42DocValuesProducer.NUMBER); Meta.WriteLong(Data.FilePointer); long minValue = long.MaxValue; long maxValue = long.MinValue; long gcd = 0; // TODO: more efficient? HashSet <long> uniqueValues = null; if (optimizeStorage) { uniqueValues = new HashSet <long>(); long count = 0; foreach (long nv in values) { // TODO: support this as MemoryDVFormat (and be smart about missing maybe) long v = nv == null ? 0 : (long)nv; if (gcd != 1) { if (v < long.MinValue / 2 || v > long.MaxValue / 2) { // in that case v - minValue might overflow and make the GCD computation return // wrong results. Since these extreme values are unlikely, we just discard // GCD computation for them gcd = 1; } // minValue needs to be set first else if (count != 0) { gcd = MathUtil.Gcd(gcd, v - minValue); } } minValue = Math.Min(minValue, v); maxValue = Math.Max(maxValue, v); if (uniqueValues != null) { if (uniqueValues.Add(v)) { if (uniqueValues.Count > 256) { uniqueValues = null; } } } ++count; } Debug.Assert(count == MaxDoc); } if (uniqueValues != null) { // small number of unique values int bitsPerValue = PackedInts.BitsRequired(uniqueValues.Count - 1); FormatAndBits formatAndBits = PackedInts.FastestFormatAndBits(MaxDoc, bitsPerValue, AcceptableOverheadRatio); if (formatAndBits.bitsPerValue == 8 && minValue >= sbyte.MinValue && maxValue <= sbyte.MaxValue) { Meta.WriteByte(Lucene42DocValuesProducer.UNCOMPRESSED); // uncompressed foreach (long nv in values) { Data.WriteByte(nv == null ? (byte)0 : (byte)nv); } } else { Meta.WriteByte(Lucene42DocValuesProducer.TABLE_COMPRESSED); // table-compressed long[] decode = uniqueValues.ToArray(/*new long?[uniqueValues.Count]*/); Dictionary <long, int> encode = new Dictionary <long, int>(); Data.WriteVInt(decode.Length); for (int i = 0; i < decode.Length; i++) { Data.WriteLong(decode[i]); encode[decode[i]] = i; } Meta.WriteVInt(PackedInts.VERSION_CURRENT); Data.WriteVInt(formatAndBits.format.id); Data.WriteVInt(formatAndBits.bitsPerValue); PackedInts.Writer writer = PackedInts.GetWriterNoHeader(Data, formatAndBits.format, MaxDoc, formatAndBits.bitsPerValue, PackedInts.DEFAULT_BUFFER_SIZE); foreach (long nv in values) { writer.Add(encode[nv == null ? 0 : (long)nv]); } writer.Finish(); } } else if (gcd != 0 && gcd != 1) { Meta.WriteByte(Lucene42DocValuesProducer.GCD_COMPRESSED); Meta.WriteVInt(PackedInts.VERSION_CURRENT); Data.WriteLong(minValue); Data.WriteLong(gcd); Data.WriteVInt(Lucene42DocValuesProducer.BLOCK_SIZE); BlockPackedWriter writer = new BlockPackedWriter(Data, Lucene42DocValuesProducer.BLOCK_SIZE); foreach (long nv in values) { long value = nv == null ? 0 : (long)nv; writer.Add((value - minValue) / gcd); } writer.Finish(); } else { Meta.WriteByte(Lucene42DocValuesProducer.DELTA_COMPRESSED); // delta-compressed Meta.WriteVInt(PackedInts.VERSION_CURRENT); Data.WriteVInt(Lucene42DocValuesProducer.BLOCK_SIZE); BlockPackedWriter writer = new BlockPackedWriter(Data, Lucene42DocValuesProducer.BLOCK_SIZE); foreach (long nv in values) { writer.Add(nv == null ? 0 : (long)nv); } writer.Finish(); } }
public override void AddNumericField(FieldInfo field, IEnumerable <long?> values) { Meta.WriteVInt(field.Number); Meta.WriteByte((byte)NUMBER); Meta.WriteLong(Data.FilePointer); long minValue = long.MaxValue; long maxValue = long.MinValue; long gcd = 0; // TODO: more efficient? HashSet <long> uniqueValues = null; if (true) { uniqueValues = new HashSet <long>(); long count = 0; foreach (long?nv in values) { Debug.Assert(nv != null); long v = nv.Value; if (gcd != 1) { if (v < long.MinValue / 2 || v > long.MaxValue / 2) { // in that case v - minValue might overflow and make the GCD computation return // wrong results. Since these extreme values are unlikely, we just discard // GCD computation for them gcd = 1; } // minValue needs to be set first else if (count != 0) { gcd = MathUtil.Gcd(gcd, v - minValue); } } minValue = Math.Min(minValue, v); maxValue = Math.Max(maxValue, v); if (uniqueValues != null) { if (uniqueValues.Add(v)) { if (uniqueValues.Count > 256) { uniqueValues = null; } } } ++count; } Debug.Assert(count == MaxDoc); } if (uniqueValues != null) { // small number of unique values int bitsPerValue = PackedInts.BitsRequired(uniqueValues.Count - 1); FormatAndBits formatAndBits = PackedInts.FastestFormatAndBits(MaxDoc, bitsPerValue, AcceptableOverheadRatio); if (formatAndBits.bitsPerValue == 8 && minValue >= sbyte.MinValue && maxValue <= sbyte.MaxValue) { Meta.WriteByte((byte)UNCOMPRESSED); // uncompressed foreach (long?nv in values) { Data.WriteByte(nv == null ? (byte)0 : (byte)(sbyte)nv.Value); } } else { Meta.WriteByte((byte)TABLE_COMPRESSED); // table-compressed //LUCENE TO-DO, ToArray had a parameter to start var decode = uniqueValues.ToArray(); var encode = new Dictionary <long, int>(); Data.WriteVInt(decode.Length); for (int i = 0; i < decode.Length; i++) { Data.WriteLong(decode[i]); encode[decode[i]] = i; } Meta.WriteVInt(PackedInts.VERSION_CURRENT); Data.WriteVInt(formatAndBits.format.id); Data.WriteVInt(formatAndBits.bitsPerValue); PackedInts.Writer writer = PackedInts.GetWriterNoHeader(Data, formatAndBits.format, MaxDoc, formatAndBits.bitsPerValue, PackedInts.DEFAULT_BUFFER_SIZE); foreach (long?nv in values) { writer.Add(encode[nv == null ? 0 : nv.Value]); } writer.Finish(); } } else if (gcd != 0 && gcd != 1) { Meta.WriteByte((byte)GCD_COMPRESSED); Meta.WriteVInt(PackedInts.VERSION_CURRENT); Data.WriteLong(minValue); Data.WriteLong(gcd); Data.WriteVInt(BLOCK_SIZE); var writer = new BlockPackedWriter(Data, BLOCK_SIZE); foreach (long?nv in values) { long value = nv == null ? 0 : nv.Value; writer.Add((value - minValue) / gcd); } writer.Finish(); } else { Meta.WriteByte((byte)DELTA_COMPRESSED); // delta-compressed Meta.WriteVInt(PackedInts.VERSION_CURRENT); Data.WriteVInt(BLOCK_SIZE); var writer = new BlockPackedWriter(Data, BLOCK_SIZE); foreach (long?nv in values) { writer.Add(nv == null ? 0 : nv.Value); } writer.Finish(); } }
private void WriteBlock() { Debug.Assert(BlockChunks > 0); FieldsIndexOut.WriteVInt(BlockChunks); // The trick here is that we only store the difference from the average start // pointer or doc base, this helps save bits per value. // And in order to prevent a few chunks that would be far from the average to // raise the number of bits per value for all of them, we only encode blocks // of 1024 chunks at once // See LUCENE-4512 // doc bases int avgChunkDocs; if (BlockChunks == 1) { avgChunkDocs = 0; } else { avgChunkDocs = (int)Math.Round((float)(BlockDocs - DocBaseDeltas[BlockChunks - 1]) / (BlockChunks - 1)); } FieldsIndexOut.WriteVInt(TotalDocs - BlockDocs); // docBase FieldsIndexOut.WriteVInt(avgChunkDocs); int docBase = 0; long maxDelta = 0; for (int i = 0; i < BlockChunks; ++i) { int delta = docBase - avgChunkDocs * i; maxDelta |= MoveSignToLowOrderBit(delta); docBase += DocBaseDeltas[i]; } int bitsPerDocBase = PackedInts.BitsRequired(maxDelta); FieldsIndexOut.WriteVInt(bitsPerDocBase); PackedInts.Writer writer = PackedInts.GetWriterNoHeader(FieldsIndexOut, PackedInts.Format.PACKED, BlockChunks, bitsPerDocBase, 1); docBase = 0; for (int i = 0; i < BlockChunks; ++i) { long delta = docBase - avgChunkDocs * i; Debug.Assert(PackedInts.BitsRequired(MoveSignToLowOrderBit(delta)) <= writer.BitsPerValue()); writer.Add(MoveSignToLowOrderBit(delta)); docBase += DocBaseDeltas[i]; } writer.Finish(); // start pointers FieldsIndexOut.WriteVLong(FirstStartPointer); long avgChunkSize; if (BlockChunks == 1) { avgChunkSize = 0; } else { avgChunkSize = (MaxStartPointer - FirstStartPointer) / (BlockChunks - 1); } FieldsIndexOut.WriteVLong(avgChunkSize); long startPointer = 0; maxDelta = 0; for (int i = 0; i < BlockChunks; ++i) { startPointer += StartPointerDeltas[i]; long delta = startPointer - avgChunkSize * i; maxDelta |= MoveSignToLowOrderBit(delta); } int bitsPerStartPointer = PackedInts.BitsRequired(maxDelta); FieldsIndexOut.WriteVInt(bitsPerStartPointer); writer = PackedInts.GetWriterNoHeader(FieldsIndexOut, PackedInts.Format.PACKED, BlockChunks, bitsPerStartPointer, 1); startPointer = 0; for (int i = 0; i < BlockChunks; ++i) { startPointer += StartPointerDeltas[i]; long delta = startPointer - avgChunkSize * i; Debug.Assert(PackedInts.BitsRequired(MoveSignToLowOrderBit(delta)) <= writer.BitsPerValue()); writer.Add(MoveSignToLowOrderBit(delta)); } writer.Finish(); }