private void AddFixedSortedBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable <BytesRef> values, IEnumerable <long?> docToOrd, int length) { field.PutAttribute(LegacyKey, LegacyDocValuesType.BYTES_FIXED_SORTED.Name); CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT); CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT); /* values */ data.WriteInt(length); int valueCount = 0; foreach (BytesRef v in values) { data.WriteBytes(v.Bytes, v.Offset, v.Length); valueCount++; } /* ordinals */ index.WriteInt(valueCount); int maxDoc = State.SegmentInfo.DocCount; Debug.Assert(valueCount > 0); PackedInts.Writer w = PackedInts.GetWriter(index, maxDoc, PackedInts.BitsRequired(valueCount - 1), PackedInts.DEFAULT); foreach (long n in docToOrd) { w.Add((long)n); } w.Finish(); }
internal virtual LongValues GetNumeric(NumericEntry entry) { IndexInput data = (IndexInput)this.Data.Clone(); data.Seek(entry.Offset); switch (entry.Format) { case Lucene45DocValuesConsumer.DELTA_COMPRESSED: BlockPackedReader reader = new BlockPackedReader(data, entry.PackedIntsVersion, entry.BlockSize, entry.Count, true); return(reader); case Lucene45DocValuesConsumer.GCD_COMPRESSED: long min = entry.MinValue; long mult = entry.Gcd; BlockPackedReader quotientReader = new BlockPackedReader(data, entry.PackedIntsVersion, entry.BlockSize, entry.Count, true); return(new LongValuesAnonymousInnerClassHelper(this, min, mult, quotientReader)); case Lucene45DocValuesConsumer.TABLE_COMPRESSED: long[] table = entry.Table; int bitsRequired = PackedInts.BitsRequired(table.Length - 1); PackedInts.Reader ords = PackedInts.GetDirectReaderNoHeader(data, PackedInts.Format.PACKED, entry.PackedIntsVersion, (int)entry.Count, bitsRequired); return(new LongValuesAnonymousInnerClassHelper2(this, table, ords)); default: throw new Exception(); } }
protected internal override void Flush() { Debug.Assert(Off > 0); // TODO: perform a true linear regression? long min = Values[0]; float avg = Off == 1 ? 0f : (float)(Values[Off - 1] - min) / (Off - 1); long maxZigZagDelta = 0; for (int i = 0; i < Off; ++i) { Values[i] = ZigZagEncode(Values[i] - min - (long)(avg * i)); maxZigZagDelta = Math.Max(maxZigZagDelta, Values[i]); } @out.WriteVLong(min); @out.WriteInt(Number.FloatToIntBits(avg)); if (maxZigZagDelta == 0) { @out.WriteVInt(0); } else { int bitsRequired = PackedInts.BitsRequired(maxZigZagDelta); @out.WriteVInt(bitsRequired); WriteValues(bitsRequired); } Off = 0; }
public override void Encode(int[] values, int valuesOffset, byte[] blocks, int blocksOffset, int iterations) { int nextBlock = 0; int bitsLeft = 8; for (int i = 0; i < ByteValueCount_Renamed * iterations; ++i) { int v = values[valuesOffset++]; Debug.Assert(PackedInts.BitsRequired(v & 0xFFFFFFFFL) <= BitsPerValue); if (BitsPerValue < bitsLeft) { // just buffer nextBlock |= v << (bitsLeft - BitsPerValue); bitsLeft -= BitsPerValue; } else { // flush as many blocks as possible int bits = BitsPerValue - bitsLeft; blocks[blocksOffset++] = (byte)(nextBlock | ((int)((uint)v >> bits))); while (bits >= 8) { bits -= 8; blocks[blocksOffset++] = (byte)((int)((uint)v >> bits)); } // then buffer bitsLeft = 8 - bits; nextBlock = (v & ((1 << bits) - 1)) << bitsLeft; } } Debug.Assert(bitsLeft == 8); }
// [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass public virtual void TestDateCompression() { Directory dir = new RAMDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); IndexWriter iwriter = new IndexWriter(dir, iwc); const long @base = 13; // prime long day = 1000L * 60 * 60 * 24; Document doc = new Document(); NumericDocValuesField dvf = new NumericDocValuesField("dv", 0); doc.Add(dvf); for (int i = 0; i < 300; ++i) { dvf.LongValue = @base + Random().Next(1000) * day; iwriter.AddDocument(doc); } iwriter.ForceMerge(1); long size1 = DirSize(dir); for (int i = 0; i < 50; ++i) { dvf.LongValue = @base + Random().Next(1000) * day; iwriter.AddDocument(doc); } iwriter.ForceMerge(1); long size2 = DirSize(dir); // make sure the new longs costed less than if they had only been packed Assert.IsTrue(size2 < size1 + (PackedInts.BitsRequired(day) * 50) / 8); }
internal override void PackPendingValues() { // compute max delta long minValue = Pending[0]; long maxValue = Pending[0]; for (int i = 1; i < PendingOff; ++i) { minValue = Math.Min(minValue, Pending[i]); maxValue = Math.Max(maxValue, Pending[i]); } long delta = maxValue - minValue; MinValues[ValuesOff] = minValue; if (delta == 0) { Values[ValuesOff] = new PackedInts.NullReader(PendingOff); } else { // build a new packed reader int bitsRequired = delta < 0 ? 64 : PackedInts.BitsRequired(delta); for (int i = 0; i < PendingOff; ++i) { Pending[i] -= minValue; } PackedInts.Mutable mutable = PackedInts.GetMutable(PendingOff, bitsRequired, AcceptableOverheadRatio); for (int i = 0; i < PendingOff;) { i += mutable.Set(i, Pending, i, PendingOff - i); } Values[ValuesOff] = mutable; } }
private void FlushNumTerms(int totalFields) { int maxNumTerms = 0; foreach (DocData dd in PendingDocs) { foreach (FieldData fd in dd.Fields) { maxNumTerms |= fd.NumTerms; } } int bitsRequired = PackedInts.BitsRequired(maxNumTerms); VectorsStream.WriteVInt(bitsRequired); PackedInts.Writer writer = PackedInts.GetWriterNoHeader(VectorsStream, PackedInts.Format.PACKED, totalFields, bitsRequired, 1); foreach (DocData dd in PendingDocs) { foreach (FieldData fd in dd.Fields) { writer.Add(fd.NumTerms); } } Debug.Assert(writer.Ord() == totalFields - 1); writer.Finish(); }
public override void Fill(int fromIndex, int toIndex, long val) { Debug.Assert(PackedInts.BitsRequired(val) <= BitsPerValue); Debug.Assert(fromIndex <= toIndex); // minimum number of values that use an exact number of full blocks int nAlignedValues = 64 / Gcd(64, bitsPerValue); int span = toIndex - fromIndex; if (span <= 3 * nAlignedValues) { // there needs be at least 2 * nAlignedValues aligned values for the // block approach to be worth trying base.Fill(fromIndex, toIndex, val); return; } // fill the first values naively until the next block start int fromIndexModNAlignedValues = fromIndex % nAlignedValues; if (fromIndexModNAlignedValues != 0) { for (int i = fromIndexModNAlignedValues; i < nAlignedValues; ++i) { Set(fromIndex++, val); } } Debug.Assert(fromIndex % nAlignedValues == 0); // compute the long[] blocks for nAlignedValues consecutive values and // use them to set as many values as possible without applying any mask // or shift int nAlignedBlocks = (nAlignedValues * bitsPerValue) >> 6; long[] nAlignedValuesBlocks; { Packed64 values = new Packed64(nAlignedValues, bitsPerValue); for (int i = 0; i < nAlignedValues; ++i) { values.Set(i, val); } nAlignedValuesBlocks = values.Blocks; Debug.Assert(nAlignedBlocks <= nAlignedValuesBlocks.Length); } int startBlock = (int)((ulong)((long)fromIndex * bitsPerValue) >> 6); int endBlock = (int)((ulong)((long)toIndex * bitsPerValue) >> 6); for (int block = startBlock; block < endBlock; ++block) { long blockValue = nAlignedValuesBlocks[block % nAlignedBlocks]; Blocks[block] = blockValue; } // fill the gap for (int i = (int)(((long)endBlock << 6) / bitsPerValue); i < toIndex; ++i) { Set(i, val); } }
public NumericDocValuesFieldUpdates(string field, int maxDoc) : base(field, Type_e.NUMERIC) { DocsWithField = new FixedBitSet(64); Docs = new PagedMutable(1, 1024, PackedInts.BitsRequired(maxDoc - 1), PackedInts.COMPACT); Values = new PagedGrowableWriter(1, 1024, 1, PackedInts.FAST); Size = 0; }
public BinaryDocValuesFieldUpdates(string field, int maxDoc) : base(field, Type_e.BINARY) { DocsWithField = new FixedBitSet(64); Docs = new PagedMutable(1, 1024, PackedInts.BitsRequired(maxDoc - 1), PackedInts.COMPACT); Offsets = new PagedGrowableWriter(1, 1024, 1, PackedInts.FAST); Lengths = new PagedGrowableWriter(1, 1024, 1, PackedInts.FAST); Values = new BytesRef(16); // start small Size = 0; }
/// <summary> /// Compress <code>bytes[off:off+len]</code> into <code>out</code> using /// at most 16KB of memory. <code>ht</code> shouldn't be shared across threads /// but can safely be reused. /// </summary> public static void Compress(byte[] bytes, int off, int len, DataOutput @out, HashTable ht) { int @base = off; int end = off + len; int anchor = off++; if (len > LAST_LITERALS + MIN_MATCH) { int limit = end - LAST_LITERALS; int matchLimit = limit - MIN_MATCH; ht.Reset(len); int hashLog = ht.HashLog; PackedInts.Mutable hashTable = ht.hashTable; while (off <= limit) { // find a match int @ref; while (true) { if (off >= matchLimit) { goto mainBreak; } int v = ReadInt(bytes, off); int h = Hash(v, hashLog); @ref = @base + (int)hashTable.Get(h); Debug.Assert(PackedInts.BitsRequired(off - @base) <= hashTable.BitsPerValue); hashTable.Set(h, off - @base); if (off - @ref < MAX_DISTANCE && ReadInt(bytes, @ref) == v) { break; } ++off; } // compute match length int matchLen = MIN_MATCH + CommonBytes(bytes, @ref + MIN_MATCH, off + MIN_MATCH, limit); EncodeSequence(bytes, anchor, @ref, off, matchLen, @out); off += matchLen; anchor = off; mainContinue :; } mainBreak :; } // last literals int literalLen = end - anchor; Debug.Assert(literalLen >= LAST_LITERALS || literalLen == len); EncodeLastLiterals(bytes, anchor, end - anchor, @out); }
/// <summary> /// Compute the number of bits required to serialize any of the longs in /// <code>data</code>. /// </summary> private static int BitsRequired(int[] data) { long or = 0; for (int i = 0; i < Lucene41PostingsFormat.BLOCK_SIZE; ++i) { Debug.Assert(data[i] >= 0); or |= (uint)data[i]; } return(PackedInts.BitsRequired(or)); }
private void AddVarSortedBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable <BytesRef> values, IEnumerable <long?> docToOrd) { field.PutAttribute(LegacyKey, LegacyDocValuesType.BYTES_VAR_SORTED.Name); CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT); CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT); /* values */ long startPos = data.FilePointer; int valueCount = 0; foreach (BytesRef v in values) { data.WriteBytes(v.Bytes, v.Offset, v.Length); valueCount++; } /* addresses */ long maxAddress = data.FilePointer - startPos; index.WriteLong(maxAddress); Debug.Assert(valueCount != int.MaxValue); // unsupported by the 4.0 impl PackedInts.Writer w = PackedInts.GetWriter(index, valueCount + 1, PackedInts.BitsRequired(maxAddress), PackedInts.DEFAULT); long currentPosition = 0; foreach (BytesRef v in values) { w.Add(currentPosition); currentPosition += v.Length; } // write sentinel Debug.Assert(currentPosition == maxAddress); w.Add(currentPosition); w.Finish(); /* ordinals */ int maxDoc = State.SegmentInfo.DocCount; Debug.Assert(valueCount > 0); PackedInts.Writer ords = PackedInts.GetWriter(index, maxDoc, PackedInts.BitsRequired(valueCount - 1), PackedInts.DEFAULT); foreach (long n in docToOrd) { ords.Add((long)n); } ords.Finish(); }
public override void AddNumericField(FieldInfo field, IEnumerable <long?> values) { // examine the values to determine best type to use long minValue = long.MaxValue; long maxValue = long.MinValue; foreach (long?n in values) { long v = n == null ? 0 : (long)n; minValue = Math.Min(minValue, v); maxValue = Math.Max(maxValue, v); } string fileName = IndexFileNames.SegmentFileName(State.SegmentInfo.Name + "_" + Convert.ToString(field.Number), SegmentSuffix, "dat"); IndexOutput data = Dir.CreateOutput(fileName, State.Context); bool success = false; try { if (minValue >= sbyte.MinValue && maxValue <= sbyte.MaxValue && PackedInts.BitsRequired(maxValue - minValue) > 4) { // fits in a byte[], would be more than 4bpv, just write byte[] AddBytesField(field, data, values); } else if (minValue >= short.MinValue && maxValue <= short.MaxValue && PackedInts.BitsRequired(maxValue - minValue) > 8) { // fits in a short[], would be more than 8bpv, just write short[] AddShortsField(field, data, values); } else if (minValue >= int.MinValue && maxValue <= int.MaxValue && PackedInts.BitsRequired(maxValue - minValue) > 16) { // fits in a int[], would be more than 16bpv, just write int[] AddIntsField(field, data, values); } else { AddVarIntsField(field, data, values, minValue, maxValue); } success = true; } finally { if (success) { IOUtils.Close(data); } else { IOUtils.CloseWhileHandlingException(data); } } }
internal void Reset(int len) { int bitsPerOffset = PackedInts.BitsRequired(len - LAST_LITERALS); int bitsPerOffsetLog = 32 - Number.NumberOfLeadingZeros(bitsPerOffset - 1); HashLog = MEMORY_USAGE + 3 - bitsPerOffsetLog; if (hashTable == null || hashTable.Size() < 1 << HashLog || hashTable.BitsPerValue < bitsPerOffset) { hashTable = PackedInts.GetMutable(1 << HashLog, bitsPerOffset, PackedInts.DEFAULT); } else { hashTable.Clear(); } }
private void Rehash() { PagedGrowableWriter oldTable = table; table = new PagedGrowableWriter(2 * oldTable.Size(), 1 << 30, PackedInts.BitsRequired(count), PackedInts.COMPACT); mask = table.Size() - 1; for (long idx = 0; idx < oldTable.Size(); idx++) { long address = oldTable.Get(idx); if (address != 0) { AddNew(address); } } }
public override void Fill(int fromIndex, int toIndex, long val) { Debug.Assert(fromIndex >= 0); Debug.Assert(fromIndex <= toIndex); Debug.Assert(PackedInts.BitsRequired(val) <= bitsPerValue); int valuesPerBlock = 64 / bitsPerValue; if (toIndex - fromIndex <= valuesPerBlock << 1) { // there needs to be at least one full block to set for the block // approach to be worth trying base.Fill(fromIndex, toIndex, val); return; } // set values naively until the next block start int fromOffsetInBlock = fromIndex % valuesPerBlock; if (fromOffsetInBlock != 0) { for (int i = fromOffsetInBlock; i < valuesPerBlock; ++i) { Set(fromIndex++, val); } Debug.Assert(fromIndex % valuesPerBlock == 0); } // bulk set of the inner blocks int fromBlock = fromIndex / valuesPerBlock; int toBlock = toIndex / valuesPerBlock; Debug.Assert(fromBlock * valuesPerBlock == fromIndex); long blockValue = 0L; for (int i = 0; i < valuesPerBlock; ++i) { blockValue = blockValue | (val << (i * bitsPerValue)); } Arrays.Fill(Blocks, fromBlock, toBlock, blockValue); // fill the gap for (int i = valuesPerBlock * toBlock; i < toIndex; ++i) { Set(i, val); } }
protected internal override void Flush() { Debug.Assert(Off > 0); long min = long.MaxValue, max = long.MinValue; for (int i = 0; i < Off; ++i) { min = Math.Min(Values[i], min); max = Math.Max(Values[i], max); } long delta = max - min; int bitsRequired = delta < 0 ? 64 : delta == 0L ? 0 : PackedInts.BitsRequired(delta); if (bitsRequired == 64) { // no need to delta-encode min = 0L; } else if (min > 0L) { // make min as small as possible so that writeVLong requires fewer bytes min = Math.Max(0L, max - PackedInts.MaxValue(bitsRequired)); } int token = (bitsRequired << BPV_SHIFT) | (min == 0 ? MIN_VALUE_EQUALS_0 : 0); @out.WriteByte((byte)(sbyte)token); if (min != 0) { WriteVLong(@out, ZigZagEncode(min) - 1); } if (bitsRequired > 0) { if (min != 0) { for (int i = 0; i < Off; ++i) { Values[i] -= min; } } WriteValues(bitsRequired); } Off = 0; }
private void AddFixedDerefBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable <BytesRef> values, int length) { field.PutAttribute(LegacyKey, LegacyDocValuesType.BYTES_FIXED_DEREF.Name); CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_VERSION_CURRENT); CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_VERSION_CURRENT); // deduplicate SortedSet <BytesRef> dictionary = new SortedSet <BytesRef>(); foreach (BytesRef v in values) { dictionary.Add(v == null ? new BytesRef() : BytesRef.DeepCopyOf(v)); } /* values */ data.WriteInt(length); foreach (BytesRef v in dictionary) { data.WriteBytes(v.Bytes, v.Offset, v.Length); } /* ordinals */ int valueCount = dictionary.Count; Debug.Assert(valueCount > 0); index.WriteInt(valueCount); int maxDoc = State.SegmentInfo.DocCount; PackedInts.Writer w = PackedInts.GetWriter(index, maxDoc, PackedInts.BitsRequired(valueCount - 1), PackedInts.DEFAULT); BytesRef brefDummy; foreach (BytesRef v in values) { brefDummy = v; if (v == null) { brefDummy = new BytesRef(); } //int ord = dictionary.HeadSet(brefDummy).Size(); int ord = dictionary.Count(@ref => @ref.CompareTo(brefDummy) < 0); w.Add(ord); } w.Finish(); }
private void EnsureCapacity(long value) { if ((value & CurrentMask) == value) { return; } int bitsRequired = value < 0 ? 64 : PackedInts.BitsRequired(value); Debug.Assert(bitsRequired > Current.BitsPerValue); int valueCount = Size(); PackedInts.Mutable next = PackedInts.GetMutable(valueCount, bitsRequired, AcceptableOverheadRatio); PackedInts.Copy(Current, 0, next, 0, valueCount, PackedInts.DEFAULT_BUFFER_SIZE); Current = next; CurrentMask = Mask(Current.BitsPerValue); }
// NOTE: 4.0 file format docs are crazy/wrong here... private void AddVarStraightBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable <BytesRef> values) { field.PutAttribute(LegacyKey, LegacyDocValuesType.BYTES_VAR_STRAIGHT.Name); CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_CURRENT); CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_CURRENT); /* values */ long startPos = data.FilePointer; foreach (BytesRef v in values) { if (v != null) { data.WriteBytes(v.Bytes, v.Offset, v.Length); } } /* addresses */ long maxAddress = data.FilePointer - startPos; index.WriteVLong(maxAddress); int maxDoc = State.SegmentInfo.DocCount; Debug.Assert(maxDoc != int.MaxValue); // unsupported by the 4.0 impl PackedInts.Writer w = PackedInts.GetWriter(index, maxDoc + 1, PackedInts.BitsRequired(maxAddress), PackedInts.DEFAULT); long currentPosition = 0; foreach (BytesRef v in values) { w.Add(currentPosition); if (v != null) { currentPosition += v.Length; } } // write sentinel Debug.Assert(currentPosition == maxAddress); w.Add(currentPosition); w.Finish(); }
private void AddVarDerefBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable <BytesRef> values) { field.PutAttribute(LegacyKey, LegacyDocValuesType.BYTES_VAR_DEREF.Name); CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_VAR_DEREF_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_CURRENT); CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_VAR_DEREF_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_CURRENT); // deduplicate SortedSet <BytesRef> dictionary = new SortedSet <BytesRef>(); foreach (BytesRef v in values) { dictionary.Add(v == null ? new BytesRef() : BytesRef.DeepCopyOf(v)); } /* values */ long startPosition = data.FilePointer; long currentAddress = 0; Dictionary <BytesRef, long> valueToAddress = new Dictionary <BytesRef, long>(); foreach (BytesRef v in dictionary) { currentAddress = data.FilePointer - startPosition; valueToAddress[v] = currentAddress; WriteVShort(data, v.Length); data.WriteBytes(v.Bytes, v.Offset, v.Length); } /* ordinals */ long totalBytes = data.FilePointer - startPosition; index.WriteLong(totalBytes); int maxDoc = State.SegmentInfo.DocCount; PackedInts.Writer w = PackedInts.GetWriter(index, maxDoc, PackedInts.BitsRequired(currentAddress), PackedInts.DEFAULT); foreach (BytesRef v in values) { w.Add(valueToAddress[v == null ? new BytesRef() : v]); } w.Finish(); }
private static void SaveInts(int[] values, int length, DataOutput @out) { Debug.Assert(length > 0); if (length == 1) { @out.WriteVInt(values[0]); } else { bool allEqual = true; for (int i = 1; i < length; ++i) { if (values[i] != values[0]) { allEqual = false; break; } } if (allEqual) { @out.WriteVInt(0); @out.WriteVInt(values[0]); } else { long max = 0; for (int i = 0; i < length; ++i) { max |= (uint)values[i]; } int bitsRequired = PackedInts.BitsRequired(max); @out.WriteVInt(bitsRequired); PackedInts.Writer w = PackedInts.GetWriterNoHeader(@out, PackedInts.Format.PACKED, length, bitsRequired, 1); for (int i = 0; i < length; ++i) { w.Add(values[i]); } w.Finish(); } } }
internal override void PackPendingValues() { // compute max delta long minValue = Pending[0]; long maxValue = Pending[0]; for (int i = 1; i < PendingOff; ++i) { minValue = Math.Min(minValue, Pending[i]); maxValue = Math.Max(maxValue, Pending[i]); } // build a new packed reader int bitsRequired = minValue < 0 ? 64 : PackedInts.BitsRequired(maxValue); PackedInts.Mutable mutable = PackedInts.GetMutable(PendingOff, bitsRequired, AcceptableOverheadRatio); for (int i = 0; i < PendingOff;) { i += mutable.Set(i, Pending, i, PendingOff - i); } Values[ValuesOff] = mutable; }
internal override void PackPendingValues() { Debug.Assert(PendingOff > 0); MinValues[ValuesOff] = Pending[0]; Averages[ValuesOff] = PendingOff == 1 ? 0 : (float)(Pending[PendingOff - 1] - Pending[0]) / (PendingOff - 1); for (int i = 0; i < PendingOff; ++i) { Pending[i] = ZigZagEncode(Pending[i] - MinValues[ValuesOff] - (long)(Averages[ValuesOff] * (long)i)); } long maxDelta = 0; for (int i = 0; i < PendingOff; ++i) { if (Pending[i] < 0) { maxDelta = -1; break; } else { maxDelta = Math.Max(maxDelta, Pending[i]); } } if (maxDelta == 0) { Values[ValuesOff] = new PackedInts.NullReader(PendingOff); } else { int bitsRequired = maxDelta < 0 ? 64 : PackedInts.BitsRequired(maxDelta); PackedInts.Mutable mutable = PackedInts.GetMutable(PendingOff, bitsRequired, AcceptableOverheadRatio); for (int i = 0; i < PendingOff;) { i += mutable.Set(i, Pending, i, PendingOff - i); } Values[ValuesOff] = mutable; } }
/// <summary> /// Returns a sorted array containing unique field numbers </summary> private int[] FlushFieldNums() { SortedSet <int> fieldNums = new SortedSet <int>(); foreach (DocData dd in PendingDocs) { foreach (FieldData fd in dd.Fields) { fieldNums.Add(fd.FieldNum); } } int numDistinctFields = fieldNums.Count; Debug.Assert(numDistinctFields > 0); int bitsRequired = PackedInts.BitsRequired(fieldNums.Last()); int token = (Math.Min(numDistinctFields - 1, 0x07) << 5) | bitsRequired; VectorsStream.WriteByte((byte)(sbyte)token); if (numDistinctFields - 1 >= 0x07) { VectorsStream.WriteVInt(numDistinctFields - 1 - 0x07); } PackedInts.Writer writer = PackedInts.GetWriterNoHeader(VectorsStream, PackedInts.Format.PACKED, fieldNums.Count, bitsRequired, 1); foreach (int fieldNum in fieldNums) { writer.Add(fieldNum); } writer.Finish(); int[] fns = new int[fieldNums.Count]; int i = 0; foreach (int key in fieldNums) { fns[i++] = key; } return(fns); }
public override void Finish(long termsFilePointer) { // write primary terms dict offsets PackedIndexStart = _fgtiw.Output.FilePointer; PackedInts.Writer w = PackedInts.GetWriter(_fgtiw.Output, NumIndexTerms, PackedInts.BitsRequired(termsFilePointer), PackedInts.DEFAULT); // relative to our indexStart long upto = 0; for (int i = 0; i < NumIndexTerms; i++) { upto += _termsPointerDeltas[i]; w.Add(upto); } w.Finish(); PackedOffsetsStart = _fgtiw.Output.FilePointer; // write offsets into the byte[] terms w = PackedInts.GetWriter(_fgtiw.Output, 1 + NumIndexTerms, PackedInts.BitsRequired(_totTermLength), PackedInts.DEFAULT); upto = 0; for (int i = 0; i < NumIndexTerms; i++) { w.Add(upto); upto += _termLengths[i]; } w.Add(upto); w.Finish(); // our referrer holds onto us, while other fields are // being written, so don't tie up this RAM: _termLengths = null; _termsPointerDeltas = null; }
internal virtual void AddNumericField(FieldInfo field, IEnumerable <long?> values, bool optimizeStorage) { long count = 0; long minValue = long.MaxValue; long maxValue = long.MinValue; long gcd = 0; bool missing = false; // TODO: more efficient? HashSet <long> uniqueValues = null; if (optimizeStorage) { uniqueValues = new HashSet <long>(); foreach (long?nv in values) { long v; if (nv == null) { v = 0; missing = true; } else { v = nv.Value; } if (gcd != 1) { if (v < long.MinValue / 2 || v > long.MaxValue / 2) { // in that case v - minValue might overflow and make the GCD computation return // wrong results. Since these extreme values are unlikely, we just discard // GCD computation for them gcd = 1; } // minValue needs to be set first else if (count != 0) { gcd = MathUtil.Gcd(gcd, v - minValue); } } minValue = Math.Min(minValue, v); maxValue = Math.Max(maxValue, v); if (uniqueValues != null) { if (uniqueValues.Add(v)) { if (uniqueValues.Count > 256) { uniqueValues = null; } } } ++count; } } else { foreach (var nv in values) { ++count; } } long delta = maxValue - minValue; int format; if (uniqueValues != null && (delta < 0L || PackedInts.BitsRequired(uniqueValues.Count - 1) < PackedInts.BitsRequired(delta)) && count <= int.MaxValue) { format = TABLE_COMPRESSED; } else if (gcd != 0 && gcd != 1) { format = GCD_COMPRESSED; } else { format = DELTA_COMPRESSED; } Meta.WriteVInt(field.Number); Meta.WriteByte((byte)Lucene45DocValuesFormat.NUMERIC); Meta.WriteVInt(format); if (missing) { Meta.WriteLong(Data.FilePointer); WriteMissingBitset(values); } else { Meta.WriteLong(-1L); } Meta.WriteVInt(PackedInts.VERSION_CURRENT); Meta.WriteLong(Data.FilePointer); Meta.WriteVLong(count); Meta.WriteVInt(BLOCK_SIZE); switch (format) { case GCD_COMPRESSED: Meta.WriteLong(minValue); Meta.WriteLong(gcd); BlockPackedWriter quotientWriter = new BlockPackedWriter(Data, BLOCK_SIZE); foreach (long?nv in values) { long value = nv == null ? 0 : nv.Value; quotientWriter.Add((value - minValue) / gcd); } quotientWriter.Finish(); break; case DELTA_COMPRESSED: BlockPackedWriter writer = new BlockPackedWriter(Data, BLOCK_SIZE); foreach (long?nv in values) { writer.Add(nv == null ? 0 : nv.Value); } writer.Finish(); break; case TABLE_COMPRESSED: long[] decode = uniqueValues.ToArray(); //LUCENE TO-DO Hadd oparamerter before Dictionary <long, int> encode = new Dictionary <long, int>(); Meta.WriteVInt(decode.Length); for (int i = 0; i < decode.Length; i++) { Meta.WriteLong(decode[i]); encode[decode[i]] = i; } int bitsRequired = PackedInts.BitsRequired(uniqueValues.Count - 1); PackedInts.Writer ordsWriter = PackedInts.GetWriterNoHeader(Data, PackedInts.Format.PACKED, (int)count, bitsRequired, PackedInts.DEFAULT_BUFFER_SIZE); foreach (long?nv in values) { ordsWriter.Add(encode[nv == null ? 0 : nv.Value]); } ordsWriter.Finish(); break; default: throw new InvalidOperationException(); } }
private void FlushFields(int totalFields, int[] fieldNums) { PackedInts.Writer writer = PackedInts.GetWriterNoHeader(VectorsStream, PackedInts.Format.PACKED, totalFields, PackedInts.BitsRequired(fieldNums.Length - 1), 1); foreach (DocData dd in PendingDocs) { foreach (FieldData fd in dd.Fields) { int fieldNumIndex = Array.BinarySearch(fieldNums, fd.FieldNum); Debug.Assert(fieldNumIndex >= 0); writer.Add(fieldNumIndex); } } writer.Finish(); }
internal virtual void AddNumericField(FieldInfo field, IEnumerable <long> values, bool optimizeStorage) { Meta.WriteVInt(field.Number); Meta.WriteByte(Lucene42DocValuesProducer.NUMBER); Meta.WriteLong(Data.FilePointer); long minValue = long.MaxValue; long maxValue = long.MinValue; long gcd = 0; // TODO: more efficient? HashSet <long> uniqueValues = null; if (optimizeStorage) { uniqueValues = new HashSet <long>(); long count = 0; foreach (long nv in values) { // TODO: support this as MemoryDVFormat (and be smart about missing maybe) long v = nv == null ? 0 : (long)nv; if (gcd != 1) { if (v < long.MinValue / 2 || v > long.MaxValue / 2) { // in that case v - minValue might overflow and make the GCD computation return // wrong results. Since these extreme values are unlikely, we just discard // GCD computation for them gcd = 1; } // minValue needs to be set first else if (count != 0) { gcd = MathUtil.Gcd(gcd, v - minValue); } } minValue = Math.Min(minValue, v); maxValue = Math.Max(maxValue, v); if (uniqueValues != null) { if (uniqueValues.Add(v)) { if (uniqueValues.Count > 256) { uniqueValues = null; } } } ++count; } Debug.Assert(count == MaxDoc); } if (uniqueValues != null) { // small number of unique values int bitsPerValue = PackedInts.BitsRequired(uniqueValues.Count - 1); FormatAndBits formatAndBits = PackedInts.FastestFormatAndBits(MaxDoc, bitsPerValue, AcceptableOverheadRatio); if (formatAndBits.bitsPerValue == 8 && minValue >= sbyte.MinValue && maxValue <= sbyte.MaxValue) { Meta.WriteByte(Lucene42DocValuesProducer.UNCOMPRESSED); // uncompressed foreach (long nv in values) { Data.WriteByte(nv == null ? (byte)0 : (byte)nv); } } else { Meta.WriteByte(Lucene42DocValuesProducer.TABLE_COMPRESSED); // table-compressed long[] decode = uniqueValues.ToArray(/*new long?[uniqueValues.Count]*/); Dictionary <long, int> encode = new Dictionary <long, int>(); Data.WriteVInt(decode.Length); for (int i = 0; i < decode.Length; i++) { Data.WriteLong(decode[i]); encode[decode[i]] = i; } Meta.WriteVInt(PackedInts.VERSION_CURRENT); Data.WriteVInt(formatAndBits.format.id); Data.WriteVInt(formatAndBits.bitsPerValue); PackedInts.Writer writer = PackedInts.GetWriterNoHeader(Data, formatAndBits.format, MaxDoc, formatAndBits.bitsPerValue, PackedInts.DEFAULT_BUFFER_SIZE); foreach (long nv in values) { writer.Add(encode[nv == null ? 0 : (long)nv]); } writer.Finish(); } } else if (gcd != 0 && gcd != 1) { Meta.WriteByte(Lucene42DocValuesProducer.GCD_COMPRESSED); Meta.WriteVInt(PackedInts.VERSION_CURRENT); Data.WriteLong(minValue); Data.WriteLong(gcd); Data.WriteVInt(Lucene42DocValuesProducer.BLOCK_SIZE); BlockPackedWriter writer = new BlockPackedWriter(Data, Lucene42DocValuesProducer.BLOCK_SIZE); foreach (long nv in values) { long value = nv == null ? 0 : (long)nv; writer.Add((value - minValue) / gcd); } writer.Finish(); } else { Meta.WriteByte(Lucene42DocValuesProducer.DELTA_COMPRESSED); // delta-compressed Meta.WriteVInt(PackedInts.VERSION_CURRENT); Data.WriteVInt(Lucene42DocValuesProducer.BLOCK_SIZE); BlockPackedWriter writer = new BlockPackedWriter(Data, Lucene42DocValuesProducer.BLOCK_SIZE); foreach (long nv in values) { writer.Add(nv == null ? 0 : (long)nv); } writer.Finish(); } }