コード例 #1
0
ファイル: ForUtil.cs プロジェクト: zhuthree/lucenenet
        /// <summary>
        /// Create a new <see cref="ForUtil"/> instance and save state into <paramref name="out"/>.
        /// </summary>
        internal ForUtil(float acceptableOverheadRatio, DataOutput @out)
        {
            @out.WriteVInt32(PackedInt32s.VERSION_CURRENT);
            encodedSizes = new int[33];
            encoders     = new PackedInt32s.IEncoder[33];
            decoders     = new PackedInt32s.IDecoder[33];
            iterations   = new int[33];

            for (int bpv = 1; bpv <= 32; ++bpv)
            {
                PackedInt32s.FormatAndBits formatAndBits = PackedInt32s.FastestFormatAndBits(Lucene41PostingsFormat.BLOCK_SIZE, bpv, acceptableOverheadRatio);
                Debug.Assert(formatAndBits.Format.IsSupported(formatAndBits.BitsPerValue));
                Debug.Assert(formatAndBits.BitsPerValue <= 32);
                encodedSizes[bpv] = EncodedSize(formatAndBits.Format, PackedInt32s.VERSION_CURRENT, formatAndBits.BitsPerValue);
                encoders[bpv]     = PackedInt32s.GetEncoder(formatAndBits.Format, PackedInt32s.VERSION_CURRENT, formatAndBits.BitsPerValue);
                decoders[bpv]     = PackedInt32s.GetDecoder(formatAndBits.Format, PackedInt32s.VERSION_CURRENT, formatAndBits.BitsPerValue);
                iterations[bpv]   = ComputeIterations(decoders[bpv]);

                @out.WriteVInt32(formatAndBits.Format.Id << 5 | (formatAndBits.BitsPerValue - 1));
            }
        }
コード例 #2
0
ファイル: PagedMutable.cs プロジェクト: harunoknel/YAFNET
 /// <summary>
 /// Create a new <see cref="PagedMutable"/> instance.
 /// </summary>
 /// <param name="size"> The number of values to store. </param>
 /// <param name="pageSize"> The number of values per page. </param>
 /// <param name="bitsPerValue"> The number of bits per value. </param>
 /// <param name="acceptableOverheadRatio"> An acceptable overhead ratio. </param>
 public PagedMutable(long size, int pageSize, int bitsPerValue, float acceptableOverheadRatio)
     : this(size, pageSize, PackedInt32s.FastestFormatAndBits(pageSize, bitsPerValue, acceptableOverheadRatio))
 {
     FillPages();
 }
コード例 #3
0
        internal virtual void AddNumericField(FieldInfo field, IEnumerable <long?> values, bool optimizeStorage)
        {
            meta.WriteVInt32(field.Number);
            meta.WriteByte(MemoryDocValuesProducer.NUMBER);
            meta.WriteInt64(data.GetFilePointer());
            long minValue = long.MaxValue;
            long maxValue = long.MinValue;
            long gcd      = 0;
            bool missing  = false;
            // TODO: more efficient?
            ISet <long?> uniqueValues = null;

            if (optimizeStorage)
            {
                uniqueValues = new JCG.HashSet <long?>();

                long count = 0;
                foreach (var nv in values)
                {
                    long v;
                    if (nv == null)
                    {
                        v       = 0;
                        missing = true;
                    }
                    else
                    {
                        v = nv.Value;
                    }

                    if (gcd != 1)
                    {
                        if (v < long.MinValue / 2 || v > long.MaxValue / 2)
                        {
                            // in that case v - minValue might overflow and make the GCD computation return
                            // wrong results. Since these extreme values are unlikely, we just discard
                            // GCD computation for them
                            gcd = 1;
                        } // minValue needs to be set first
                        else if (count != 0)
                        {
                            gcd = MathUtil.Gcd(gcd, v - minValue);
                        }
                    }

                    minValue = Math.Min(minValue, v);
                    maxValue = Math.Max(maxValue, v);

                    if (uniqueValues != null)
                    {
                        if (uniqueValues.Add(v))
                        {
                            if (uniqueValues.Count > 256)
                            {
                                uniqueValues = null;
                            }
                        }
                    }

                    ++count;
                }
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(count == maxDoc);
                }
            }

            if (missing)
            {
                long start = data.GetFilePointer();
                WriteMissingBitset(values);
                meta.WriteInt64(start);
                meta.WriteInt64(data.GetFilePointer() - start);
            }
            else
            {
                meta.WriteInt64(-1L);
            }

            if (uniqueValues != null)
            {
                // small number of unique values

                int           bitsPerValue  = PackedInt32s.BitsRequired(uniqueValues.Count - 1);
                FormatAndBits formatAndBits = PackedInt32s.FastestFormatAndBits(maxDoc, bitsPerValue,
                                                                                acceptableOverheadRatio);
                if (formatAndBits.BitsPerValue == 8 && minValue >= sbyte.MinValue && maxValue <= sbyte.MaxValue)
                {
                    meta.WriteByte(MemoryDocValuesProducer.UNCOMPRESSED); // uncompressed
                    foreach (var nv in values)
                    {
                        data.WriteByte((byte)nv.GetValueOrDefault());
                    }
                }
                else
                {
                    meta.WriteByte(MemoryDocValuesProducer.TABLE_COMPRESSED); // table-compressed
                    long?[] decode = new long?[uniqueValues.Count];
                    uniqueValues.CopyTo(decode, 0);

                    var encode = new Dictionary <long?, int?>();
                    data.WriteVInt32(decode.Length);
                    for (int i = 0; i < decode.Length; i++)
                    {
                        data.WriteInt64(decode[i].Value);
                        encode[decode[i]] = i;
                    }

                    meta.WriteVInt32(PackedInt32s.VERSION_CURRENT);
                    data.WriteVInt32(formatAndBits.Format.Id);
                    data.WriteVInt32(formatAndBits.BitsPerValue);

                    PackedInt32s.Writer writer = PackedInt32s.GetWriterNoHeader(data, formatAndBits.Format, maxDoc,
                                                                                formatAndBits.BitsPerValue, PackedInt32s.DEFAULT_BUFFER_SIZE);
                    foreach (var nv in values)
                    {
                        var v = encode[nv.GetValueOrDefault()];

                        writer.Add((long)v);
                    }
                    writer.Finish();
                }
            }
            else if (gcd != 0 && gcd != 1)
            {
                meta.WriteByte(MemoryDocValuesProducer.GCD_COMPRESSED);
                meta.WriteVInt32(PackedInt32s.VERSION_CURRENT);
                data.WriteInt64(minValue);
                data.WriteInt64(gcd);
                data.WriteVInt32(MemoryDocValuesProducer.BLOCK_SIZE);

                var writer = new BlockPackedWriter(data, MemoryDocValuesProducer.BLOCK_SIZE);
                foreach (var nv in values)
                {
                    writer.Add((nv.GetValueOrDefault() - minValue) / gcd);
                }
                writer.Finish();
            }
            else
            {
                meta.WriteByte(MemoryDocValuesProducer.DELTA_COMPRESSED); // delta-compressed

                meta.WriteVInt32(PackedInt32s.VERSION_CURRENT);
                data.WriteVInt32(MemoryDocValuesProducer.BLOCK_SIZE);

                var writer = new BlockPackedWriter(data, MemoryDocValuesProducer.BLOCK_SIZE);
                foreach (var nv in values)
                {
                    writer.Add(nv.GetValueOrDefault());
                }
                writer.Finish();
            }
        }
コード例 #4
0
        public override void AddNumericField(FieldInfo field, IEnumerable <long?> values)
        {
            meta.WriteVInt32(field.Number);
            meta.WriteByte((byte)NUMBER);
            meta.WriteInt64(data.GetFilePointer());
            long minValue = long.MaxValue;
            long maxValue = long.MinValue;
            long gcd      = 0;
            // TODO: more efficient?
            HashSet <long> uniqueValues = null;

            if (true)
            {
                uniqueValues = new HashSet <long>();

                long count = 0;
                foreach (long?nv in values)
                {
                    Debug.Assert(nv != null);
                    long v = nv.Value;

                    if (gcd != 1)
                    {
                        if (v < long.MinValue / 2 || v > long.MaxValue / 2)
                        {
                            // in that case v - minValue might overflow and make the GCD computation return
                            // wrong results. Since these extreme values are unlikely, we just discard
                            // GCD computation for them
                            gcd = 1;
                        } // minValue needs to be set first
                        else if (count != 0)
                        {
                            gcd = MathUtil.Gcd(gcd, v - minValue);
                        }
                    }

                    minValue = Math.Min(minValue, v);
                    maxValue = Math.Max(maxValue, v);

                    if (uniqueValues != null)
                    {
                        if (uniqueValues.Add(v))
                        {
                            if (uniqueValues.Count > 256)
                            {
                                uniqueValues = null;
                            }
                        }
                    }

                    ++count;
                }
                Debug.Assert(count == maxDoc);
            }

            if (uniqueValues != null)
            {
                // small number of unique values
                int           bitsPerValue  = PackedInt32s.BitsRequired(uniqueValues.Count - 1);
                FormatAndBits formatAndBits = PackedInt32s.FastestFormatAndBits(maxDoc, bitsPerValue, acceptableOverheadRatio);
                if (formatAndBits.BitsPerValue == 8 && minValue >= sbyte.MinValue && maxValue <= sbyte.MaxValue)
                {
                    meta.WriteByte((byte)UNCOMPRESSED); // uncompressed
                    foreach (long?nv in values)
                    {
                        data.WriteByte((byte)nv.GetValueOrDefault());
                    }
                }
                else
                {
                    meta.WriteByte((byte)TABLE_COMPRESSED); // table-compressed
                    var decode = uniqueValues.ToArray();
                    var encode = new Dictionary <long, int>();
                    data.WriteVInt32(decode.Length);
                    for (int i = 0; i < decode.Length; i++)
                    {
                        data.WriteInt64(decode[i]);
                        encode[decode[i]] = i;
                    }

                    meta.WriteVInt32(PackedInt32s.VERSION_CURRENT);
                    data.WriteVInt32(formatAndBits.Format.Id);
                    data.WriteVInt32(formatAndBits.BitsPerValue);

                    PackedInt32s.Writer writer = PackedInt32s.GetWriterNoHeader(data, formatAndBits.Format, maxDoc, formatAndBits.BitsPerValue, PackedInt32s.DEFAULT_BUFFER_SIZE);
                    foreach (long?nv in values)
                    {
                        writer.Add(encode[nv.GetValueOrDefault()]);
                    }
                    writer.Finish();
                }
            }
            else if (gcd != 0 && gcd != 1)
            {
                meta.WriteByte((byte)GCD_COMPRESSED);
                meta.WriteVInt32(PackedInt32s.VERSION_CURRENT);
                data.WriteInt64(minValue);
                data.WriteInt64(gcd);
                data.WriteVInt32(BLOCK_SIZE);

                var writer = new BlockPackedWriter(data, BLOCK_SIZE);
                foreach (long?nv in values)
                {
                    writer.Add((nv.GetValueOrDefault() - minValue) / gcd);
                }
                writer.Finish();
            }
            else
            {
                meta.WriteByte((byte)DELTA_COMPRESSED); // delta-compressed

                meta.WriteVInt32(PackedInt32s.VERSION_CURRENT);
                data.WriteVInt32(BLOCK_SIZE);

                var writer = new BlockPackedWriter(data, BLOCK_SIZE);
                foreach (long?nv in values)
                {
                    writer.Add(nv.GetValueOrDefault());
                }
                writer.Finish();
            }
        }
コード例 #5
0
        internal virtual void AddNumericField(FieldInfo field, IEnumerable <long?> values, bool optimizeStorage)
        {
            Meta.WriteVInt32(field.Number);
            Meta.WriteByte((byte)Lucene42DocValuesProducer.NUMBER);
            Meta.WriteInt64(Data.GetFilePointer());
            long minValue = long.MaxValue;
            long maxValue = long.MinValue;
            long gcd      = 0;
            // TODO: more efficient?
            HashSet <long> uniqueValues = null;

            if (optimizeStorage)
            {
                uniqueValues = new HashSet <long>();

                long count = 0;
                foreach (long?nv in values)
                {
                    // TODO: support this as MemoryDVFormat (and be smart about missing maybe)
                    long v = nv.GetValueOrDefault();

                    if (gcd != 1)
                    {
                        if (v < long.MinValue / 2 || v > long.MaxValue / 2)
                        {
                            // in that case v - minValue might overflow and make the GCD computation return
                            // wrong results. Since these extreme values are unlikely, we just discard
                            // GCD computation for them
                            gcd = 1;
                        } // minValue needs to be set first
                        else if (count != 0)
                        {
                            gcd = MathUtil.Gcd(gcd, v - minValue);
                        }
                    }

                    minValue = Math.Min(minValue, v);
                    maxValue = Math.Max(maxValue, v);

                    if (uniqueValues != null)
                    {
                        if (uniqueValues.Add(v))
                        {
                            if (uniqueValues.Count > 256)
                            {
                                uniqueValues = null;
                            }
                        }
                    }

                    ++count;
                }
                Debug.Assert(count == MaxDoc);
            }

            if (uniqueValues != null)
            {
                // small number of unique values
                int           bitsPerValue  = PackedInt32s.BitsRequired(uniqueValues.Count - 1);
                FormatAndBits formatAndBits = PackedInt32s.FastestFormatAndBits(MaxDoc, bitsPerValue, AcceptableOverheadRatio);
                if (formatAndBits.BitsPerValue == 8 && minValue >= sbyte.MinValue && maxValue <= sbyte.MaxValue)
                {
                    Meta.WriteByte((byte)Lucene42DocValuesProducer.UNCOMPRESSED); // uncompressed
                    foreach (long?nv in values)
                    {
                        Data.WriteByte((byte)nv.GetValueOrDefault());
                    }
                }
                else
                {
                    Meta.WriteByte((byte)Lucene42DocValuesProducer.TABLE_COMPRESSED); // table-compressed
                    long[] decode = uniqueValues.ToArray(/*new long?[uniqueValues.Count]*/);
                    var    encode = new Dictionary <long, int>();
                    Data.WriteVInt32(decode.Length);
                    for (int i = 0; i < decode.Length; i++)
                    {
                        Data.WriteInt64(decode[i]);
                        encode[decode[i]] = i;
                    }

                    Meta.WriteVInt32(PackedInt32s.VERSION_CURRENT);
                    Data.WriteVInt32(formatAndBits.Format.Id);
                    Data.WriteVInt32(formatAndBits.BitsPerValue);

                    PackedInt32s.Writer writer = PackedInt32s.GetWriterNoHeader(Data, formatAndBits.Format, MaxDoc, formatAndBits.BitsPerValue, PackedInt32s.DEFAULT_BUFFER_SIZE);
                    foreach (long?nv in values)
                    {
                        writer.Add(encode[nv.GetValueOrDefault()]);
                    }
                    writer.Finish();
                }
            }
            else if (gcd != 0 && gcd != 1)
            {
                Meta.WriteByte((byte)Lucene42DocValuesProducer.GCD_COMPRESSED);
                Meta.WriteVInt32(PackedInt32s.VERSION_CURRENT);
                Data.WriteInt64(minValue);
                Data.WriteInt64(gcd);
                Data.WriteVInt32(Lucene42DocValuesProducer.BLOCK_SIZE);

                BlockPackedWriter writer = new BlockPackedWriter(Data, Lucene42DocValuesProducer.BLOCK_SIZE);
                foreach (long?nv in values)
                {
                    writer.Add((nv.GetValueOrDefault() - minValue) / gcd);
                }
                writer.Finish();
            }
            else
            {
                Meta.WriteByte((byte)Lucene42DocValuesProducer.DELTA_COMPRESSED); // delta-compressed

                Meta.WriteVInt32(PackedInt32s.VERSION_CURRENT);
                Data.WriteVInt32(Lucene42DocValuesProducer.BLOCK_SIZE);

                BlockPackedWriter writer = new BlockPackedWriter(Data, Lucene42DocValuesProducer.BLOCK_SIZE);
                foreach (long?nv in values)
                {
                    writer.Add(nv.GetValueOrDefault());
                }
                writer.Finish();
            }
        }