public override void AddNumericField(FieldInfo field, IEnumerable<long?> values) { Meta.WriteVInt(field.Number); Meta.WriteByte((byte)NUMBER); Meta.WriteLong(Data.FilePointer); long minValue = long.MaxValue; long maxValue = long.MinValue; long gcd = 0; // TODO: more efficient? HashSet<long> uniqueValues = null; if (true) { uniqueValues = new HashSet<long>(); long count = 0; foreach (long? nv in values) { Debug.Assert(nv != null); long v = nv.Value; if (gcd != 1) { if (v < long.MinValue / 2 || v > long.MaxValue / 2) { // in that case v - minValue might overflow and make the GCD computation return // wrong results. Since these extreme values are unlikely, we just discard // GCD computation for them gcd = 1; } // minValue needs to be set first else if (count != 0) { gcd = MathUtil.Gcd(gcd, v - minValue); } } minValue = Math.Min(minValue, v); maxValue = Math.Max(maxValue, v); if (uniqueValues != null) { if (uniqueValues.Add(v)) { if (uniqueValues.Count > 256) { uniqueValues = null; } } } ++count; } Debug.Assert(count == MaxDoc); } if (uniqueValues != null) { // small number of unique values int bitsPerValue = PackedInts.BitsRequired(uniqueValues.Count - 1); FormatAndBits formatAndBits = PackedInts.FastestFormatAndBits(MaxDoc, bitsPerValue, AcceptableOverheadRatio); if (formatAndBits.bitsPerValue == 8 && minValue >= sbyte.MinValue && maxValue <= sbyte.MaxValue) { Meta.WriteByte((byte)UNCOMPRESSED); // uncompressed foreach (long? nv in values) { Data.WriteByte(nv == null ? (byte)0 : (byte)(sbyte)nv.Value); } } else { Meta.WriteByte((byte)TABLE_COMPRESSED); // table-compressed //LUCENE TO-DO, ToArray had a parameter to start var decode = uniqueValues.ToArray(); var encode = new Dictionary<long, int>(); Data.WriteVInt(decode.Length); for (int i = 0; i < decode.Length; i++) { Data.WriteLong(decode[i]); encode[decode[i]] = i; } Meta.WriteVInt(PackedInts.VERSION_CURRENT); Data.WriteVInt(formatAndBits.format.id); Data.WriteVInt(formatAndBits.bitsPerValue); PackedInts.Writer writer = PackedInts.GetWriterNoHeader(Data, formatAndBits.format, MaxDoc, formatAndBits.bitsPerValue, PackedInts.DEFAULT_BUFFER_SIZE); foreach (long? nv in values) { writer.Add(encode[nv == null ? 0 : nv.Value]); } writer.Finish(); } } else if (gcd != 0 && gcd != 1) { Meta.WriteByte((byte)GCD_COMPRESSED); Meta.WriteVInt(PackedInts.VERSION_CURRENT); Data.WriteLong(minValue); Data.WriteLong(gcd); Data.WriteVInt(BLOCK_SIZE); var writer = new BlockPackedWriter(Data, BLOCK_SIZE); foreach (long? nv in values) { long value = nv == null ? 0 : nv.Value; writer.Add((value - minValue) / gcd); } writer.Finish(); } else { Meta.WriteByte((byte)DELTA_COMPRESSED); // delta-compressed Meta.WriteVInt(PackedInts.VERSION_CURRENT); Data.WriteVInt(BLOCK_SIZE); var writer = new BlockPackedWriter(Data, BLOCK_SIZE); foreach (long? nv in values) { writer.Add(nv == null ? 0 : nv.Value); } writer.Finish(); } }
internal virtual void AddNumericField(FieldInfo field, IEnumerable<long?> values, bool optimizeStorage) { long count = 0; long minValue = long.MaxValue; long maxValue = long.MinValue; long gcd = 0; bool missing = false; // TODO: more efficient? HashSet<long> uniqueValues = null; if (optimizeStorage) { uniqueValues = new HashSet<long>(); foreach (long? nv in values) { long v; if (nv == null) { v = 0; missing = true; } else { v = nv.Value; } if (gcd != 1) { if (v < long.MinValue / 2 || v > long.MaxValue / 2) { // in that case v - minValue might overflow and make the GCD computation return // wrong results. Since these extreme values are unlikely, we just discard // GCD computation for them gcd = 1; } // minValue needs to be set first else if (count != 0) { gcd = MathUtil.Gcd(gcd, v - minValue); } } minValue = Math.Min(minValue, v); maxValue = Math.Max(maxValue, v); if (uniqueValues != null) { if (uniqueValues.Add(v)) { if (uniqueValues.Count > 256) { uniqueValues = null; } } } ++count; } } else { foreach (var nv in values) { ++count; } } long delta = maxValue - minValue; int format; if (uniqueValues != null && (delta < 0L || PackedInts.BitsRequired(uniqueValues.Count - 1) < PackedInts.BitsRequired(delta)) && count <= int.MaxValue) { format = TABLE_COMPRESSED; } else if (gcd != 0 && gcd != 1) { format = GCD_COMPRESSED; } else { format = DELTA_COMPRESSED; } Meta.WriteVInt(field.Number); Meta.WriteByte((byte)Lucene45DocValuesFormat.NUMERIC); Meta.WriteVInt(format); if (missing) { Meta.WriteLong(Data.FilePointer); WriteMissingBitset(values); } else { Meta.WriteLong(-1L); } Meta.WriteVInt(PackedInts.VERSION_CURRENT); Meta.WriteLong(Data.FilePointer); Meta.WriteVLong(count); Meta.WriteVInt(BLOCK_SIZE); switch (format) { case GCD_COMPRESSED: Meta.WriteLong(minValue); Meta.WriteLong(gcd); BlockPackedWriter quotientWriter = new BlockPackedWriter(Data, BLOCK_SIZE); foreach (long? nv in values) { long value = nv == null ? 0 : nv.Value; quotientWriter.Add((value - minValue) / gcd); } quotientWriter.Finish(); break; case DELTA_COMPRESSED: BlockPackedWriter writer = new BlockPackedWriter(Data, BLOCK_SIZE); foreach (long? nv in values) { writer.Add(nv == null ? 0 : nv.Value); } writer.Finish(); break; case TABLE_COMPRESSED: long[] decode = uniqueValues.ToArray();//LUCENE TO-DO Hadd oparamerter before Dictionary<long, int> encode = new Dictionary<long, int>(); Meta.WriteVInt(decode.Length); for (int i = 0; i < decode.Length; i++) { Meta.WriteLong(decode[i]); encode[decode[i]] = i; } int bitsRequired = PackedInts.BitsRequired(uniqueValues.Count - 1); PackedInts.Writer ordsWriter = PackedInts.GetWriterNoHeader(Data, PackedInts.Format.PACKED, (int)count, bitsRequired, PackedInts.DEFAULT_BUFFER_SIZE); foreach (long? nv in values) { ordsWriter.Add(encode[nv == null ? 0 : nv.Value]); } ordsWriter.Finish(); break; default: throw new InvalidOperationException(); } }