/// <summary> /// Create a new <see cref="ForUtil"/> instance and save state into <paramref name="out"/>. /// </summary> internal ForUtil(float acceptableOverheadRatio, DataOutput @out) { @out.WriteVInt32(PackedInt32s.VERSION_CURRENT); encodedSizes = new int[33]; encoders = new PackedInt32s.IEncoder[33]; decoders = new PackedInt32s.IDecoder[33]; iterations = new int[33]; for (int bpv = 1; bpv <= 32; ++bpv) { PackedInt32s.FormatAndBits formatAndBits = PackedInt32s.FastestFormatAndBits(Lucene41PostingsFormat.BLOCK_SIZE, bpv, acceptableOverheadRatio); Debug.Assert(formatAndBits.Format.IsSupported(formatAndBits.BitsPerValue)); Debug.Assert(formatAndBits.BitsPerValue <= 32); encodedSizes[bpv] = EncodedSize(formatAndBits.Format, PackedInt32s.VERSION_CURRENT, formatAndBits.BitsPerValue); encoders[bpv] = PackedInt32s.GetEncoder(formatAndBits.Format, PackedInt32s.VERSION_CURRENT, formatAndBits.BitsPerValue); decoders[bpv] = PackedInt32s.GetDecoder(formatAndBits.Format, PackedInt32s.VERSION_CURRENT, formatAndBits.BitsPerValue); iterations[bpv] = ComputeIterations(decoders[bpv]); @out.WriteVInt32(formatAndBits.Format.Id << 5 | (formatAndBits.BitsPerValue - 1)); } }
/// <summary> /// Create a new <see cref="PagedMutable"/> instance. /// </summary> /// <param name="size"> The number of values to store. </param> /// <param name="pageSize"> The number of values per page. </param> /// <param name="bitsPerValue"> The number of bits per value. </param> /// <param name="acceptableOverheadRatio"> An acceptable overhead ratio. </param> public PagedMutable(long size, int pageSize, int bitsPerValue, float acceptableOverheadRatio) : this(size, pageSize, PackedInt32s.FastestFormatAndBits(pageSize, bitsPerValue, acceptableOverheadRatio)) { FillPages(); }
internal virtual void AddNumericField(FieldInfo field, IEnumerable <long?> values, bool optimizeStorage) { meta.WriteVInt32(field.Number); meta.WriteByte(MemoryDocValuesProducer.NUMBER); meta.WriteInt64(data.GetFilePointer()); long minValue = long.MaxValue; long maxValue = long.MinValue; long gcd = 0; bool missing = false; // TODO: more efficient? ISet <long?> uniqueValues = null; if (optimizeStorage) { uniqueValues = new JCG.HashSet <long?>(); long count = 0; foreach (var nv in values) { long v; if (nv == null) { v = 0; missing = true; } else { v = nv.Value; } if (gcd != 1) { if (v < long.MinValue / 2 || v > long.MaxValue / 2) { // in that case v - minValue might overflow and make the GCD computation return // wrong results. Since these extreme values are unlikely, we just discard // GCD computation for them gcd = 1; } // minValue needs to be set first else if (count != 0) { gcd = MathUtil.Gcd(gcd, v - minValue); } } minValue = Math.Min(minValue, v); maxValue = Math.Max(maxValue, v); if (uniqueValues != null) { if (uniqueValues.Add(v)) { if (uniqueValues.Count > 256) { uniqueValues = null; } } } ++count; } if (Debugging.AssertsEnabled) { Debugging.Assert(count == maxDoc); } } if (missing) { long start = data.GetFilePointer(); WriteMissingBitset(values); meta.WriteInt64(start); meta.WriteInt64(data.GetFilePointer() - start); } else { meta.WriteInt64(-1L); } if (uniqueValues != null) { // small number of unique values int bitsPerValue = PackedInt32s.BitsRequired(uniqueValues.Count - 1); FormatAndBits formatAndBits = PackedInt32s.FastestFormatAndBits(maxDoc, bitsPerValue, acceptableOverheadRatio); if (formatAndBits.BitsPerValue == 8 && minValue >= sbyte.MinValue && maxValue <= sbyte.MaxValue) { meta.WriteByte(MemoryDocValuesProducer.UNCOMPRESSED); // uncompressed foreach (var nv in values) { data.WriteByte((byte)nv.GetValueOrDefault()); } } else { meta.WriteByte(MemoryDocValuesProducer.TABLE_COMPRESSED); // table-compressed long?[] decode = new long?[uniqueValues.Count]; uniqueValues.CopyTo(decode, 0); var encode = new Dictionary <long?, int?>(); data.WriteVInt32(decode.Length); for (int i = 0; i < decode.Length; i++) { data.WriteInt64(decode[i].Value); encode[decode[i]] = i; } meta.WriteVInt32(PackedInt32s.VERSION_CURRENT); data.WriteVInt32(formatAndBits.Format.Id); data.WriteVInt32(formatAndBits.BitsPerValue); PackedInt32s.Writer writer = PackedInt32s.GetWriterNoHeader(data, formatAndBits.Format, maxDoc, formatAndBits.BitsPerValue, PackedInt32s.DEFAULT_BUFFER_SIZE); foreach (var nv in values) { var v = encode[nv.GetValueOrDefault()]; writer.Add((long)v); } writer.Finish(); } } else if (gcd != 0 && gcd != 1) { meta.WriteByte(MemoryDocValuesProducer.GCD_COMPRESSED); meta.WriteVInt32(PackedInt32s.VERSION_CURRENT); data.WriteInt64(minValue); data.WriteInt64(gcd); data.WriteVInt32(MemoryDocValuesProducer.BLOCK_SIZE); var writer = new BlockPackedWriter(data, MemoryDocValuesProducer.BLOCK_SIZE); foreach (var nv in values) { writer.Add((nv.GetValueOrDefault() - minValue) / gcd); } writer.Finish(); } else { meta.WriteByte(MemoryDocValuesProducer.DELTA_COMPRESSED); // delta-compressed meta.WriteVInt32(PackedInt32s.VERSION_CURRENT); data.WriteVInt32(MemoryDocValuesProducer.BLOCK_SIZE); var writer = new BlockPackedWriter(data, MemoryDocValuesProducer.BLOCK_SIZE); foreach (var nv in values) { writer.Add(nv.GetValueOrDefault()); } writer.Finish(); } }
public override void AddNumericField(FieldInfo field, IEnumerable <long?> values) { meta.WriteVInt32(field.Number); meta.WriteByte((byte)NUMBER); meta.WriteInt64(data.GetFilePointer()); long minValue = long.MaxValue; long maxValue = long.MinValue; long gcd = 0; // TODO: more efficient? HashSet <long> uniqueValues = null; if (true) { uniqueValues = new HashSet <long>(); long count = 0; foreach (long?nv in values) { Debug.Assert(nv != null); long v = nv.Value; if (gcd != 1) { if (v < long.MinValue / 2 || v > long.MaxValue / 2) { // in that case v - minValue might overflow and make the GCD computation return // wrong results. Since these extreme values are unlikely, we just discard // GCD computation for them gcd = 1; } // minValue needs to be set first else if (count != 0) { gcd = MathUtil.Gcd(gcd, v - minValue); } } minValue = Math.Min(minValue, v); maxValue = Math.Max(maxValue, v); if (uniqueValues != null) { if (uniqueValues.Add(v)) { if (uniqueValues.Count > 256) { uniqueValues = null; } } } ++count; } Debug.Assert(count == maxDoc); } if (uniqueValues != null) { // small number of unique values int bitsPerValue = PackedInt32s.BitsRequired(uniqueValues.Count - 1); FormatAndBits formatAndBits = PackedInt32s.FastestFormatAndBits(maxDoc, bitsPerValue, acceptableOverheadRatio); if (formatAndBits.BitsPerValue == 8 && minValue >= sbyte.MinValue && maxValue <= sbyte.MaxValue) { meta.WriteByte((byte)UNCOMPRESSED); // uncompressed foreach (long?nv in values) { data.WriteByte((byte)nv.GetValueOrDefault()); } } else { meta.WriteByte((byte)TABLE_COMPRESSED); // table-compressed var decode = uniqueValues.ToArray(); var encode = new Dictionary <long, int>(); data.WriteVInt32(decode.Length); for (int i = 0; i < decode.Length; i++) { data.WriteInt64(decode[i]); encode[decode[i]] = i; } meta.WriteVInt32(PackedInt32s.VERSION_CURRENT); data.WriteVInt32(formatAndBits.Format.Id); data.WriteVInt32(formatAndBits.BitsPerValue); PackedInt32s.Writer writer = PackedInt32s.GetWriterNoHeader(data, formatAndBits.Format, maxDoc, formatAndBits.BitsPerValue, PackedInt32s.DEFAULT_BUFFER_SIZE); foreach (long?nv in values) { writer.Add(encode[nv.GetValueOrDefault()]); } writer.Finish(); } } else if (gcd != 0 && gcd != 1) { meta.WriteByte((byte)GCD_COMPRESSED); meta.WriteVInt32(PackedInt32s.VERSION_CURRENT); data.WriteInt64(minValue); data.WriteInt64(gcd); data.WriteVInt32(BLOCK_SIZE); var writer = new BlockPackedWriter(data, BLOCK_SIZE); foreach (long?nv in values) { writer.Add((nv.GetValueOrDefault() - minValue) / gcd); } writer.Finish(); } else { meta.WriteByte((byte)DELTA_COMPRESSED); // delta-compressed meta.WriteVInt32(PackedInt32s.VERSION_CURRENT); data.WriteVInt32(BLOCK_SIZE); var writer = new BlockPackedWriter(data, BLOCK_SIZE); foreach (long?nv in values) { writer.Add(nv.GetValueOrDefault()); } writer.Finish(); } }
internal virtual void AddNumericField(FieldInfo field, IEnumerable <long?> values, bool optimizeStorage) { Meta.WriteVInt32(field.Number); Meta.WriteByte((byte)Lucene42DocValuesProducer.NUMBER); Meta.WriteInt64(Data.GetFilePointer()); long minValue = long.MaxValue; long maxValue = long.MinValue; long gcd = 0; // TODO: more efficient? HashSet <long> uniqueValues = null; if (optimizeStorage) { uniqueValues = new HashSet <long>(); long count = 0; foreach (long?nv in values) { // TODO: support this as MemoryDVFormat (and be smart about missing maybe) long v = nv.GetValueOrDefault(); if (gcd != 1) { if (v < long.MinValue / 2 || v > long.MaxValue / 2) { // in that case v - minValue might overflow and make the GCD computation return // wrong results. Since these extreme values are unlikely, we just discard // GCD computation for them gcd = 1; } // minValue needs to be set first else if (count != 0) { gcd = MathUtil.Gcd(gcd, v - minValue); } } minValue = Math.Min(minValue, v); maxValue = Math.Max(maxValue, v); if (uniqueValues != null) { if (uniqueValues.Add(v)) { if (uniqueValues.Count > 256) { uniqueValues = null; } } } ++count; } Debug.Assert(count == MaxDoc); } if (uniqueValues != null) { // small number of unique values int bitsPerValue = PackedInt32s.BitsRequired(uniqueValues.Count - 1); FormatAndBits formatAndBits = PackedInt32s.FastestFormatAndBits(MaxDoc, bitsPerValue, AcceptableOverheadRatio); if (formatAndBits.BitsPerValue == 8 && minValue >= sbyte.MinValue && maxValue <= sbyte.MaxValue) { Meta.WriteByte((byte)Lucene42DocValuesProducer.UNCOMPRESSED); // uncompressed foreach (long?nv in values) { Data.WriteByte((byte)nv.GetValueOrDefault()); } } else { Meta.WriteByte((byte)Lucene42DocValuesProducer.TABLE_COMPRESSED); // table-compressed long[] decode = uniqueValues.ToArray(/*new long?[uniqueValues.Count]*/); var encode = new Dictionary <long, int>(); Data.WriteVInt32(decode.Length); for (int i = 0; i < decode.Length; i++) { Data.WriteInt64(decode[i]); encode[decode[i]] = i; } Meta.WriteVInt32(PackedInt32s.VERSION_CURRENT); Data.WriteVInt32(formatAndBits.Format.Id); Data.WriteVInt32(formatAndBits.BitsPerValue); PackedInt32s.Writer writer = PackedInt32s.GetWriterNoHeader(Data, formatAndBits.Format, MaxDoc, formatAndBits.BitsPerValue, PackedInt32s.DEFAULT_BUFFER_SIZE); foreach (long?nv in values) { writer.Add(encode[nv.GetValueOrDefault()]); } writer.Finish(); } } else if (gcd != 0 && gcd != 1) { Meta.WriteByte((byte)Lucene42DocValuesProducer.GCD_COMPRESSED); Meta.WriteVInt32(PackedInt32s.VERSION_CURRENT); Data.WriteInt64(minValue); Data.WriteInt64(gcd); Data.WriteVInt32(Lucene42DocValuesProducer.BLOCK_SIZE); BlockPackedWriter writer = new BlockPackedWriter(Data, Lucene42DocValuesProducer.BLOCK_SIZE); foreach (long?nv in values) { writer.Add((nv.GetValueOrDefault() - minValue) / gcd); } writer.Finish(); } else { Meta.WriteByte((byte)Lucene42DocValuesProducer.DELTA_COMPRESSED); // delta-compressed Meta.WriteVInt32(PackedInt32s.VERSION_CURRENT); Data.WriteVInt32(Lucene42DocValuesProducer.BLOCK_SIZE); BlockPackedWriter writer = new BlockPackedWriter(Data, Lucene42DocValuesProducer.BLOCK_SIZE); foreach (long?nv in values) { writer.Add(nv.GetValueOrDefault()); } writer.Finish(); } }