Ejemplo n.º 1
0
        public override void AddNumericField(FieldInfo field, IEnumerable<long?> values)
        {
            Meta.WriteVInt(field.Number);
            Meta.WriteByte((byte)NUMBER);
            Meta.WriteLong(Data.FilePointer);
            long minValue = long.MaxValue;
            long maxValue = long.MinValue;
            long gcd = 0;
            // TODO: more efficient?
            HashSet<long> uniqueValues = null;
            if (true)
            {
                uniqueValues = new HashSet<long>();

                long count = 0;
                foreach (long? nv in values)
                {
                    Debug.Assert(nv != null);
                    long v = nv.Value;

                    if (gcd != 1)
                    {
                        if (v < long.MinValue / 2 || v > long.MaxValue / 2)
                        {
                            // in that case v - minValue might overflow and make the GCD computation return
                            // wrong results. Since these extreme values are unlikely, we just discard
                            // GCD computation for them
                            gcd = 1;
                        } // minValue needs to be set first
                        else if (count != 0)
                        {
                            gcd = MathUtil.Gcd(gcd, v - minValue);
                        }
                    }

                    minValue = Math.Min(minValue, v);
                    maxValue = Math.Max(maxValue, v);

                    if (uniqueValues != null)
                    {
                        if (uniqueValues.Add(v))
                        {
                            if (uniqueValues.Count > 256)
                            {
                                uniqueValues = null;
                            }
                        }
                    }

                    ++count;
                }
                Debug.Assert(count == MaxDoc);
            }

            if (uniqueValues != null)
            {
                // small number of unique values
                int bitsPerValue = PackedInts.BitsRequired(uniqueValues.Count - 1);
                FormatAndBits formatAndBits = PackedInts.FastestFormatAndBits(MaxDoc, bitsPerValue, AcceptableOverheadRatio);
                if (formatAndBits.bitsPerValue == 8 && minValue >= sbyte.MinValue && maxValue <= sbyte.MaxValue)
                {
                    Meta.WriteByte((byte)UNCOMPRESSED); // uncompressed
                    foreach (long? nv in values)
                    {
                        Data.WriteByte(nv == null ? (byte)0 : (byte)(sbyte)nv.Value);
                    }
                }
                else
                {
                    Meta.WriteByte((byte)TABLE_COMPRESSED); // table-compressed
                    //LUCENE TO-DO, ToArray had a parameter to start
                    var decode = uniqueValues.ToArray();
                    var encode = new Dictionary<long, int>();
                    Data.WriteVInt(decode.Length);
                    for (int i = 0; i < decode.Length; i++)
                    {
                        Data.WriteLong(decode[i]);
                        encode[decode[i]] = i;
                    }

                    Meta.WriteVInt(PackedInts.VERSION_CURRENT);
                    Data.WriteVInt(formatAndBits.format.id);
                    Data.WriteVInt(formatAndBits.bitsPerValue);

                    PackedInts.Writer writer = PackedInts.GetWriterNoHeader(Data, formatAndBits.format, MaxDoc, formatAndBits.bitsPerValue, PackedInts.DEFAULT_BUFFER_SIZE);
                    foreach (long? nv in values)
                    {
                        writer.Add(encode[nv == null ? 0 : nv.Value]);
                    }
                    writer.Finish();
                }
            }
            else if (gcd != 0 && gcd != 1)
            {
                Meta.WriteByte((byte)GCD_COMPRESSED);
                Meta.WriteVInt(PackedInts.VERSION_CURRENT);
                Data.WriteLong(minValue);
                Data.WriteLong(gcd);
                Data.WriteVInt(BLOCK_SIZE);

                var writer = new BlockPackedWriter(Data, BLOCK_SIZE);
                foreach (long? nv in values)
                {
                    long value = nv == null ? 0 : nv.Value;
                    writer.Add((value - minValue) / gcd);
                }
                writer.Finish();
            }
            else
            {
                Meta.WriteByte((byte)DELTA_COMPRESSED); // delta-compressed

                Meta.WriteVInt(PackedInts.VERSION_CURRENT);
                Data.WriteVInt(BLOCK_SIZE);

                var writer = new BlockPackedWriter(Data, BLOCK_SIZE);
                foreach (long? nv in values)
                {
                    writer.Add(nv == null ? 0 : nv.Value);
                }
                writer.Finish();
            }
        }
        internal virtual void AddNumericField(FieldInfo field, IEnumerable<long?> values, bool optimizeStorage)
        {
            long count = 0;
            long minValue = long.MaxValue;
            long maxValue = long.MinValue;
            long gcd = 0;
            bool missing = false;
            // TODO: more efficient?
            HashSet<long> uniqueValues = null;

            if (optimizeStorage)
            {
                uniqueValues = new HashSet<long>();

                foreach (long? nv in values)
                {
                    long v;
                    if (nv == null)
                    {
                        v = 0;
                        missing = true;
                    }
                    else
                    {
                        v = nv.Value;
                    }

                    if (gcd != 1)
                    {
                        if (v < long.MinValue / 2 || v > long.MaxValue / 2)
                        {
                            // in that case v - minValue might overflow and make the GCD computation return
                            // wrong results. Since these extreme values are unlikely, we just discard
                            // GCD computation for them
                            gcd = 1;
                        } // minValue needs to be set first
                        else if (count != 0)
                        {
                            gcd = MathUtil.Gcd(gcd, v - minValue);
                        }
                    }

                    minValue = Math.Min(minValue, v);
                    maxValue = Math.Max(maxValue, v);

                    if (uniqueValues != null)
                    {
                        if (uniqueValues.Add(v))
                        {
                            if (uniqueValues.Count > 256)
                            {
                                uniqueValues = null;
                            }
                        }
                    }

                    ++count;
                }
            }
            else
            {
                foreach (var nv in values)
                {
                    ++count;
                }
            }

            long delta = maxValue - minValue;

            int format;
            if (uniqueValues != null && (delta < 0L || PackedInts.BitsRequired(uniqueValues.Count - 1) < PackedInts.BitsRequired(delta)) && count <= int.MaxValue)
            {
                format = TABLE_COMPRESSED;
            }
            else if (gcd != 0 && gcd != 1)
            {
                format = GCD_COMPRESSED;
            }
            else
            {
                format = DELTA_COMPRESSED;
            }
            Meta.WriteVInt(field.Number);
            Meta.WriteByte((byte)Lucene45DocValuesFormat.NUMERIC);
            Meta.WriteVInt(format);
            if (missing)
            {
                Meta.WriteLong(Data.FilePointer);
                WriteMissingBitset(values);
            }
            else
            {
                Meta.WriteLong(-1L);
            }
            Meta.WriteVInt(PackedInts.VERSION_CURRENT);
            Meta.WriteLong(Data.FilePointer);
            Meta.WriteVLong(count);
            Meta.WriteVInt(BLOCK_SIZE);

            switch (format)
            {
                case GCD_COMPRESSED:
                    Meta.WriteLong(minValue);
                    Meta.WriteLong(gcd);
                    BlockPackedWriter quotientWriter = new BlockPackedWriter(Data, BLOCK_SIZE);
                    foreach (long? nv in values)
                    {
                        long value = nv == null ? 0 : nv.Value;
                        quotientWriter.Add((value - minValue) / gcd);
                    }
                    quotientWriter.Finish();
                    break;

                case DELTA_COMPRESSED:
                    BlockPackedWriter writer = new BlockPackedWriter(Data, BLOCK_SIZE);
                    foreach (long? nv in values)
                    {
                        writer.Add(nv == null ? 0 : nv.Value);
                    }
                    writer.Finish();
                    break;

                case TABLE_COMPRESSED:
                    long[] decode = uniqueValues.ToArray();//LUCENE TO-DO Hadd oparamerter before
                    Dictionary<long, int> encode = new Dictionary<long, int>();
                    Meta.WriteVInt(decode.Length);
                    for (int i = 0; i < decode.Length; i++)
                    {
                        Meta.WriteLong(decode[i]);
                        encode[decode[i]] = i;
                    }
                    int bitsRequired = PackedInts.BitsRequired(uniqueValues.Count - 1);
                    PackedInts.Writer ordsWriter = PackedInts.GetWriterNoHeader(Data, PackedInts.Format.PACKED, (int)count, bitsRequired, PackedInts.DEFAULT_BUFFER_SIZE);
                    foreach (long? nv in values)
                    {
                        ordsWriter.Add(encode[nv == null ? 0 : nv.Value]);
                    }
                    ordsWriter.Finish();
                    break;

                default:
                    throw new InvalidOperationException();
            }
        }