Exemple #1
0
        private void AddFixedSortedBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable <BytesRef> values, IEnumerable <long?> docToOrd, int length)
        {
            field.PutAttribute(LegacyKey, LegacyDocValuesType.BYTES_FIXED_SORTED.Name);

            CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT);

            CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT);

            /* values */

            data.WriteInt(length);
            int valueCount = 0;

            foreach (BytesRef v in values)
            {
                data.WriteBytes(v.Bytes, v.Offset, v.Length);
                valueCount++;
            }

            /* ordinals */

            index.WriteInt(valueCount);
            int maxDoc = State.SegmentInfo.DocCount;

            Debug.Assert(valueCount > 0);
            PackedInts.Writer w = PackedInts.GetWriter(index, maxDoc, PackedInts.BitsRequired(valueCount - 1), PackedInts.DEFAULT);
            foreach (long n in docToOrd)
            {
                w.Add((long)n);
            }
            w.Finish();
        }
Exemple #2
0
        private void FlushNumTerms(int totalFields)
        {
            int maxNumTerms = 0;

            foreach (DocData dd in PendingDocs)
            {
                foreach (FieldData fd in dd.Fields)
                {
                    maxNumTerms |= fd.NumTerms;
                }
            }
            int bitsRequired = PackedInts.BitsRequired(maxNumTerms);

            VectorsStream.WriteVInt(bitsRequired);
            PackedInts.Writer writer = PackedInts.GetWriterNoHeader(VectorsStream, PackedInts.Format.PACKED, totalFields, bitsRequired, 1);
            foreach (DocData dd in PendingDocs)
            {
                foreach (FieldData fd in dd.Fields)
                {
                    writer.Add(fd.NumTerms);
                }
            }
            Debug.Assert(writer.Ord() == totalFields - 1);
            writer.Finish();
        }
Exemple #3
0
        private void AddVarIntsField(FieldInfo field, IndexOutput output, IEnumerable <long?> values, long minValue, long maxValue)
        {
            field.PutAttribute(LegacyKey, LegacyDocValuesType.VAR_INTS.Name);

            CodecUtil.WriteHeader(output, Lucene40DocValuesFormat.VAR_INTS_CODEC_NAME, Lucene40DocValuesFormat.VAR_INTS_VERSION_CURRENT);

            long delta = maxValue - minValue;

            if (delta < 0)
            {
                // writes longs
                output.WriteByte((byte)Lucene40DocValuesFormat.VAR_INTS_FIXED_64);
                foreach (long?n in values)
                {
                    output.WriteLong(n == null ? 0 : n.Value);
                }
            }
            else
            {
                // writes packed ints
                output.WriteByte((byte)Lucene40DocValuesFormat.VAR_INTS_PACKED);
                output.WriteLong(minValue);
                output.WriteLong(0 - minValue); // default value (representation of 0)
                PackedInts.Writer writer = PackedInts.GetWriter(output, State.SegmentInfo.DocCount, PackedInts.BitsRequired(delta), PackedInts.DEFAULT);
                foreach (long?n in values)
                {
                    long v = n == null ? 0 : (long)n;
                    writer.Add(v - minValue);
                }
                writer.Finish();
            }
        }
Exemple #4
0
        private void AddVarSortedBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable <BytesRef> values, IEnumerable <long?> docToOrd)
        {
            field.PutAttribute(LegacyKey, LegacyDocValuesType.BYTES_VAR_SORTED.Name);

            CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT);

            CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT);

            /* values */

            long startPos = data.FilePointer;

            int valueCount = 0;

            foreach (BytesRef v in values)
            {
                data.WriteBytes(v.Bytes, v.Offset, v.Length);
                valueCount++;
            }

            /* addresses */

            long maxAddress = data.FilePointer - startPos;

            index.WriteLong(maxAddress);

            Debug.Assert(valueCount != int.MaxValue); // unsupported by the 4.0 impl

            PackedInts.Writer w  = PackedInts.GetWriter(index, valueCount + 1, PackedInts.BitsRequired(maxAddress), PackedInts.DEFAULT);
            long currentPosition = 0;

            foreach (BytesRef v in values)
            {
                w.Add(currentPosition);
                currentPosition += v.Length;
            }
            // write sentinel
            Debug.Assert(currentPosition == maxAddress);
            w.Add(currentPosition);
            w.Finish();

            /* ordinals */

            int maxDoc = State.SegmentInfo.DocCount;

            Debug.Assert(valueCount > 0);
            PackedInts.Writer ords = PackedInts.GetWriter(index, maxDoc, PackedInts.BitsRequired(valueCount - 1), PackedInts.DEFAULT);
            foreach (long n in docToOrd)
            {
                ords.Add((long)n);
            }
            ords.Finish();
        }
            public override void Finish(long termsFilePointer)
            {
                // write primary terms dict offsets
                PackedIndexStart = _fgtiw.Output.FilePointer;

                PackedInts.Writer w = PackedInts.GetWriter(_fgtiw.Output, NumIndexTerms,
                                                           PackedInts.BitsRequired(termsFilePointer),
                                                           PackedInts.DEFAULT);

                // relative to our indexStart
                long upto = 0;

                for (int i = 0; i < NumIndexTerms; i++)
                {
                    upto += _termsPointerDeltas[i];
                    w.Add(upto);
                }
                w.Finish();

                PackedOffsetsStart = _fgtiw.Output.FilePointer;

                // write offsets into the byte[] terms
                w = PackedInts.GetWriter(_fgtiw.Output, 1 + NumIndexTerms, PackedInts.BitsRequired(_totTermLength),
                                         PackedInts.DEFAULT);
                upto = 0;
                for (int i = 0; i < NumIndexTerms; i++)
                {
                    w.Add(upto);
                    upto += _termLengths[i];
                }
                w.Add(upto);
                w.Finish();

                // our referrer holds onto us, while other fields are
                // being written, so don't tie up this RAM:
                _termLengths        = null;
                _termsPointerDeltas = null;
            }
Exemple #6
0
 private void FlushFields(int totalFields, int[] fieldNums)
 {
     PackedInts.Writer writer = PackedInts.GetWriterNoHeader(VectorsStream, PackedInts.Format.PACKED, totalFields, PackedInts.BitsRequired(fieldNums.Length - 1), 1);
     foreach (DocData dd in PendingDocs)
     {
         foreach (FieldData fd in dd.Fields)
         {
             int fieldNumIndex = Array.BinarySearch(fieldNums, fd.FieldNum);
             Debug.Assert(fieldNumIndex >= 0);
             writer.Add(fieldNumIndex);
         }
     }
     writer.Finish();
 }
Exemple #7
0
        private void AddFixedDerefBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable <BytesRef> values, int length)
        {
            field.PutAttribute(LegacyKey, LegacyDocValuesType.BYTES_FIXED_DEREF.Name);

            CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_VERSION_CURRENT);

            CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_VERSION_CURRENT);

            // deduplicate
            SortedSet <BytesRef> dictionary = new SortedSet <BytesRef>();

            foreach (BytesRef v in values)
            {
                dictionary.Add(v == null ? new BytesRef() : BytesRef.DeepCopyOf(v));
            }

            /* values */
            data.WriteInt(length);
            foreach (BytesRef v in dictionary)
            {
                data.WriteBytes(v.Bytes, v.Offset, v.Length);
            }

            /* ordinals */
            int valueCount = dictionary.Count;

            Debug.Assert(valueCount > 0);
            index.WriteInt(valueCount);
            int maxDoc = State.SegmentInfo.DocCount;

            PackedInts.Writer w = PackedInts.GetWriter(index, maxDoc, PackedInts.BitsRequired(valueCount - 1), PackedInts.DEFAULT);

            BytesRef brefDummy;

            foreach (BytesRef v in values)
            {
                brefDummy = v;

                if (v == null)
                {
                    brefDummy = new BytesRef();
                }
                //int ord = dictionary.HeadSet(brefDummy).Size();
                int ord = dictionary.Count(@ref => @ref.CompareTo(brefDummy) < 0);
                w.Add(ord);
            }
            w.Finish();
        }
Exemple #8
0
        // NOTE: 4.0 file format docs are crazy/wrong here...
        private void AddVarStraightBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable <BytesRef> values)
        {
            field.PutAttribute(LegacyKey, LegacyDocValuesType.BYTES_VAR_STRAIGHT.Name);

            CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_CURRENT);

            CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_CURRENT);

            /* values */

            long startPos = data.FilePointer;

            foreach (BytesRef v in values)
            {
                if (v != null)
                {
                    data.WriteBytes(v.Bytes, v.Offset, v.Length);
                }
            }

            /* addresses */

            long maxAddress = data.FilePointer - startPos;

            index.WriteVLong(maxAddress);

            int maxDoc = State.SegmentInfo.DocCount;

            Debug.Assert(maxDoc != int.MaxValue); // unsupported by the 4.0 impl

            PackedInts.Writer w  = PackedInts.GetWriter(index, maxDoc + 1, PackedInts.BitsRequired(maxAddress), PackedInts.DEFAULT);
            long currentPosition = 0;

            foreach (BytesRef v in values)
            {
                w.Add(currentPosition);
                if (v != null)
                {
                    currentPosition += v.Length;
                }
            }
            // write sentinel
            Debug.Assert(currentPosition == maxAddress);
            w.Add(currentPosition);
            w.Finish();
        }
Exemple #9
0
        private void AddVarDerefBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable <BytesRef> values)
        {
            field.PutAttribute(LegacyKey, LegacyDocValuesType.BYTES_VAR_DEREF.Name);

            CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_VAR_DEREF_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_CURRENT);

            CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_VAR_DEREF_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_CURRENT);

            // deduplicate
            SortedSet <BytesRef> dictionary = new SortedSet <BytesRef>();

            foreach (BytesRef v in values)
            {
                dictionary.Add(v == null ? new BytesRef() : BytesRef.DeepCopyOf(v));
            }

            /* values */
            long startPosition  = data.FilePointer;
            long currentAddress = 0;
            Dictionary <BytesRef, long> valueToAddress = new Dictionary <BytesRef, long>();

            foreach (BytesRef v in dictionary)
            {
                currentAddress    = data.FilePointer - startPosition;
                valueToAddress[v] = currentAddress;
                WriteVShort(data, v.Length);
                data.WriteBytes(v.Bytes, v.Offset, v.Length);
            }

            /* ordinals */
            long totalBytes = data.FilePointer - startPosition;

            index.WriteLong(totalBytes);
            int maxDoc = State.SegmentInfo.DocCount;

            PackedInts.Writer w = PackedInts.GetWriter(index, maxDoc, PackedInts.BitsRequired(currentAddress), PackedInts.DEFAULT);

            foreach (BytesRef v in values)
            {
                w.Add(valueToAddress[v == null ? new BytesRef() : v]);
            }
            w.Finish();
        }
 private static void SaveInts(int[] values, int length, DataOutput @out)
 {
     Debug.Assert(length > 0);
     if (length == 1)
     {
         @out.WriteVInt(values[0]);
     }
     else
     {
         bool allEqual = true;
         for (int i = 1; i < length; ++i)
         {
             if (values[i] != values[0])
             {
                 allEqual = false;
                 break;
             }
         }
         if (allEqual)
         {
             @out.WriteVInt(0);
             @out.WriteVInt(values[0]);
         }
         else
         {
             long max = 0;
             for (int i = 0; i < length; ++i)
             {
                 max |= (uint)values[i];
             }
             int bitsRequired = PackedInts.BitsRequired(max);
             @out.WriteVInt(bitsRequired);
             PackedInts.Writer w = PackedInts.GetWriterNoHeader(@out, PackedInts.Format.PACKED, length, bitsRequired, 1);
             for (int i = 0; i < length; ++i)
             {
                 w.Add(values[i]);
             }
             w.Finish();
         }
     }
 }
Exemple #11
0
        /// <summary>
        /// Returns a sorted array containing unique field numbers </summary>
        private int[] FlushFieldNums()
        {
            SortedSet <int> fieldNums = new SortedSet <int>();

            foreach (DocData dd in PendingDocs)
            {
                foreach (FieldData fd in dd.Fields)
                {
                    fieldNums.Add(fd.FieldNum);
                }
            }

            int numDistinctFields = fieldNums.Count;

            Debug.Assert(numDistinctFields > 0);
            int bitsRequired = PackedInts.BitsRequired(fieldNums.Last());
            int token        = (Math.Min(numDistinctFields - 1, 0x07) << 5) | bitsRequired;

            VectorsStream.WriteByte((byte)(sbyte)token);
            if (numDistinctFields - 1 >= 0x07)
            {
                VectorsStream.WriteVInt(numDistinctFields - 1 - 0x07);
            }
            PackedInts.Writer writer = PackedInts.GetWriterNoHeader(VectorsStream, PackedInts.Format.PACKED, fieldNums.Count, bitsRequired, 1);
            foreach (int fieldNum in fieldNums)
            {
                writer.Add(fieldNum);
            }
            writer.Finish();

            int[] fns = new int[fieldNums.Count];
            int   i   = 0;

            foreach (int key in fieldNums)
            {
                fns[i++] = key;
            }
            return(fns);
        }
        internal virtual void AddNumericField(FieldInfo field, IEnumerable <long?> values, bool optimizeStorage)
        {
            long count    = 0;
            long minValue = long.MaxValue;
            long maxValue = long.MinValue;
            long gcd      = 0;
            bool missing  = false;
            // TODO: more efficient?
            HashSet <long> uniqueValues = null;

            if (optimizeStorage)
            {
                uniqueValues = new HashSet <long>();

                foreach (long?nv in values)
                {
                    long v;
                    if (nv == null)
                    {
                        v       = 0;
                        missing = true;
                    }
                    else
                    {
                        v = nv.Value;
                    }

                    if (gcd != 1)
                    {
                        if (v < long.MinValue / 2 || v > long.MaxValue / 2)
                        {
                            // in that case v - minValue might overflow and make the GCD computation return
                            // wrong results. Since these extreme values are unlikely, we just discard
                            // GCD computation for them
                            gcd = 1;
                        } // minValue needs to be set first
                        else if (count != 0)
                        {
                            gcd = MathUtil.Gcd(gcd, v - minValue);
                        }
                    }

                    minValue = Math.Min(minValue, v);
                    maxValue = Math.Max(maxValue, v);

                    if (uniqueValues != null)
                    {
                        if (uniqueValues.Add(v))
                        {
                            if (uniqueValues.Count > 256)
                            {
                                uniqueValues = null;
                            }
                        }
                    }

                    ++count;
                }
            }
            else
            {
                foreach (var nv in values)
                {
                    ++count;
                }
            }

            long delta = maxValue - minValue;

            int format;

            if (uniqueValues != null && (delta < 0L || PackedInts.BitsRequired(uniqueValues.Count - 1) < PackedInts.BitsRequired(delta)) && count <= int.MaxValue)
            {
                format = TABLE_COMPRESSED;
            }
            else if (gcd != 0 && gcd != 1)
            {
                format = GCD_COMPRESSED;
            }
            else
            {
                format = DELTA_COMPRESSED;
            }
            Meta.WriteVInt(field.Number);
            Meta.WriteByte((byte)Lucene45DocValuesFormat.NUMERIC);
            Meta.WriteVInt(format);
            if (missing)
            {
                Meta.WriteLong(Data.FilePointer);
                WriteMissingBitset(values);
            }
            else
            {
                Meta.WriteLong(-1L);
            }
            Meta.WriteVInt(PackedInts.VERSION_CURRENT);
            Meta.WriteLong(Data.FilePointer);
            Meta.WriteVLong(count);
            Meta.WriteVInt(BLOCK_SIZE);

            switch (format)
            {
            case GCD_COMPRESSED:
                Meta.WriteLong(minValue);
                Meta.WriteLong(gcd);
                BlockPackedWriter quotientWriter = new BlockPackedWriter(Data, BLOCK_SIZE);
                foreach (long?nv in values)
                {
                    long value = nv == null ? 0 : nv.Value;
                    quotientWriter.Add((value - minValue) / gcd);
                }
                quotientWriter.Finish();
                break;

            case DELTA_COMPRESSED:
                BlockPackedWriter writer = new BlockPackedWriter(Data, BLOCK_SIZE);
                foreach (long?nv in values)
                {
                    writer.Add(nv == null ? 0 : nv.Value);
                }
                writer.Finish();
                break;

            case TABLE_COMPRESSED:
                long[] decode = uniqueValues.ToArray();    //LUCENE TO-DO Hadd oparamerter before
                Dictionary <long, int> encode = new Dictionary <long, int>();
                Meta.WriteVInt(decode.Length);
                for (int i = 0; i < decode.Length; i++)
                {
                    Meta.WriteLong(decode[i]);
                    encode[decode[i]] = i;
                }
                int bitsRequired             = PackedInts.BitsRequired(uniqueValues.Count - 1);
                PackedInts.Writer ordsWriter = PackedInts.GetWriterNoHeader(Data, PackedInts.Format.PACKED, (int)count, bitsRequired, PackedInts.DEFAULT_BUFFER_SIZE);
                foreach (long?nv in values)
                {
                    ordsWriter.Add(encode[nv == null ? 0 : nv.Value]);
                }
                ordsWriter.Finish();
                break;

            default:
                throw new InvalidOperationException();
            }
        }
Exemple #13
0
        private void FlushFlags(int totalFields, int[] fieldNums)
        {
            // check if fields always have the same flags
            bool nonChangingFlags = true;

            int[] fieldFlags = new int[fieldNums.Length];
            Arrays.Fill(fieldFlags, -1);
            bool breakOuterLoop;

            foreach (DocData dd in PendingDocs)
            {
                breakOuterLoop = false;
                foreach (FieldData fd in dd.Fields)
                {
                    int fieldNumOff = Array.BinarySearch(fieldNums, fd.FieldNum);
                    Debug.Assert(fieldNumOff >= 0);
                    if (fieldFlags[fieldNumOff] == -1)
                    {
                        fieldFlags[fieldNumOff] = fd.Flags;
                    }
                    else if (fieldFlags[fieldNumOff] != fd.Flags)
                    {
                        nonChangingFlags = false;
                        breakOuterLoop   = true;
                    }
                }
                if (breakOuterLoop)
                {
                    break;
                }
            }

            if (nonChangingFlags)
            {
                // write one flag per field num
                VectorsStream.WriteVInt(0);
                PackedInts.Writer writer = PackedInts.GetWriterNoHeader(VectorsStream, PackedInts.Format.PACKED, fieldFlags.Length, FLAGS_BITS, 1);
                foreach (int flags in fieldFlags)
                {
                    Debug.Assert(flags >= 0);
                    writer.Add(flags);
                }
                Debug.Assert(writer.Ord() == fieldFlags.Length - 1);
                writer.Finish();
            }
            else
            {
                // write one flag for every field instance
                VectorsStream.WriteVInt(1);
                PackedInts.Writer writer = PackedInts.GetWriterNoHeader(VectorsStream, PackedInts.Format.PACKED, totalFields, FLAGS_BITS, 1);
                foreach (DocData dd in PendingDocs)
                {
                    foreach (FieldData fd in dd.Fields)
                    {
                        writer.Add(fd.Flags);
                    }
                }
                Debug.Assert(writer.Ord() == totalFields - 1);
                writer.Finish();
            }
        }
        internal virtual void AddNumericField(FieldInfo field, IEnumerable <long> values, bool optimizeStorage)
        {
            Meta.WriteVInt(field.Number);
            Meta.WriteByte(Lucene42DocValuesProducer.NUMBER);
            Meta.WriteLong(Data.FilePointer);
            long minValue = long.MaxValue;
            long maxValue = long.MinValue;
            long gcd      = 0;
            // TODO: more efficient?
            HashSet <long> uniqueValues = null;

            if (optimizeStorage)
            {
                uniqueValues = new HashSet <long>();

                long count = 0;
                foreach (long nv in values)
                {
                    // TODO: support this as MemoryDVFormat (and be smart about missing maybe)
                    long v = nv == null ? 0 : (long)nv;

                    if (gcd != 1)
                    {
                        if (v < long.MinValue / 2 || v > long.MaxValue / 2)
                        {
                            // in that case v - minValue might overflow and make the GCD computation return
                            // wrong results. Since these extreme values are unlikely, we just discard
                            // GCD computation for them
                            gcd = 1;
                        } // minValue needs to be set first
                        else if (count != 0)
                        {
                            gcd = MathUtil.Gcd(gcd, v - minValue);
                        }
                    }

                    minValue = Math.Min(minValue, v);
                    maxValue = Math.Max(maxValue, v);

                    if (uniqueValues != null)
                    {
                        if (uniqueValues.Add(v))
                        {
                            if (uniqueValues.Count > 256)
                            {
                                uniqueValues = null;
                            }
                        }
                    }

                    ++count;
                }
                Debug.Assert(count == MaxDoc);
            }

            if (uniqueValues != null)
            {
                // small number of unique values
                int           bitsPerValue  = PackedInts.BitsRequired(uniqueValues.Count - 1);
                FormatAndBits formatAndBits = PackedInts.FastestFormatAndBits(MaxDoc, bitsPerValue, AcceptableOverheadRatio);
                if (formatAndBits.bitsPerValue == 8 && minValue >= sbyte.MinValue && maxValue <= sbyte.MaxValue)
                {
                    Meta.WriteByte(Lucene42DocValuesProducer.UNCOMPRESSED); // uncompressed
                    foreach (long nv in values)
                    {
                        Data.WriteByte(nv == null ? (byte)0 : (byte)nv);
                    }
                }
                else
                {
                    Meta.WriteByte(Lucene42DocValuesProducer.TABLE_COMPRESSED); // table-compressed
                    long[] decode = uniqueValues.ToArray(/*new long?[uniqueValues.Count]*/);
                    Dictionary <long, int> encode = new Dictionary <long, int>();
                    Data.WriteVInt(decode.Length);
                    for (int i = 0; i < decode.Length; i++)
                    {
                        Data.WriteLong(decode[i]);
                        encode[decode[i]] = i;
                    }

                    Meta.WriteVInt(PackedInts.VERSION_CURRENT);
                    Data.WriteVInt(formatAndBits.format.id);
                    Data.WriteVInt(formatAndBits.bitsPerValue);

                    PackedInts.Writer writer = PackedInts.GetWriterNoHeader(Data, formatAndBits.format, MaxDoc, formatAndBits.bitsPerValue, PackedInts.DEFAULT_BUFFER_SIZE);
                    foreach (long nv in values)
                    {
                        writer.Add(encode[nv == null ? 0 : (long)nv]);
                    }
                    writer.Finish();
                }
            }
            else if (gcd != 0 && gcd != 1)
            {
                Meta.WriteByte(Lucene42DocValuesProducer.GCD_COMPRESSED);
                Meta.WriteVInt(PackedInts.VERSION_CURRENT);
                Data.WriteLong(minValue);
                Data.WriteLong(gcd);
                Data.WriteVInt(Lucene42DocValuesProducer.BLOCK_SIZE);

                BlockPackedWriter writer = new BlockPackedWriter(Data, Lucene42DocValuesProducer.BLOCK_SIZE);
                foreach (long nv in values)
                {
                    long value = nv == null ? 0 : (long)nv;
                    writer.Add((value - minValue) / gcd);
                }
                writer.Finish();
            }
            else
            {
                Meta.WriteByte(Lucene42DocValuesProducer.DELTA_COMPRESSED); // delta-compressed

                Meta.WriteVInt(PackedInts.VERSION_CURRENT);
                Data.WriteVInt(Lucene42DocValuesProducer.BLOCK_SIZE);

                BlockPackedWriter writer = new BlockPackedWriter(Data, Lucene42DocValuesProducer.BLOCK_SIZE);
                foreach (long nv in values)
                {
                    writer.Add(nv == null ? 0 : (long)nv);
                }
                writer.Finish();
            }
        }
Exemple #15
0
        public override void AddNumericField(FieldInfo field, IEnumerable <long?> values)
        {
            Meta.WriteVInt(field.Number);
            Meta.WriteByte((byte)NUMBER);
            Meta.WriteLong(Data.FilePointer);
            long minValue = long.MaxValue;
            long maxValue = long.MinValue;
            long gcd      = 0;
            // TODO: more efficient?
            HashSet <long> uniqueValues = null;

            if (true)
            {
                uniqueValues = new HashSet <long>();

                long count = 0;
                foreach (long?nv in values)
                {
                    Debug.Assert(nv != null);
                    long v = nv.Value;

                    if (gcd != 1)
                    {
                        if (v < long.MinValue / 2 || v > long.MaxValue / 2)
                        {
                            // in that case v - minValue might overflow and make the GCD computation return
                            // wrong results. Since these extreme values are unlikely, we just discard
                            // GCD computation for them
                            gcd = 1;
                        } // minValue needs to be set first
                        else if (count != 0)
                        {
                            gcd = MathUtil.Gcd(gcd, v - minValue);
                        }
                    }

                    minValue = Math.Min(minValue, v);
                    maxValue = Math.Max(maxValue, v);

                    if (uniqueValues != null)
                    {
                        if (uniqueValues.Add(v))
                        {
                            if (uniqueValues.Count > 256)
                            {
                                uniqueValues = null;
                            }
                        }
                    }

                    ++count;
                }
                Debug.Assert(count == MaxDoc);
            }

            if (uniqueValues != null)
            {
                // small number of unique values
                int           bitsPerValue  = PackedInts.BitsRequired(uniqueValues.Count - 1);
                FormatAndBits formatAndBits = PackedInts.FastestFormatAndBits(MaxDoc, bitsPerValue, AcceptableOverheadRatio);
                if (formatAndBits.bitsPerValue == 8 && minValue >= sbyte.MinValue && maxValue <= sbyte.MaxValue)
                {
                    Meta.WriteByte((byte)UNCOMPRESSED); // uncompressed
                    foreach (long?nv in values)
                    {
                        Data.WriteByte(nv == null ? (byte)0 : (byte)(sbyte)nv.Value);
                    }
                }
                else
                {
                    Meta.WriteByte((byte)TABLE_COMPRESSED); // table-compressed
                    //LUCENE TO-DO, ToArray had a parameter to start
                    var decode = uniqueValues.ToArray();
                    var encode = new Dictionary <long, int>();
                    Data.WriteVInt(decode.Length);
                    for (int i = 0; i < decode.Length; i++)
                    {
                        Data.WriteLong(decode[i]);
                        encode[decode[i]] = i;
                    }

                    Meta.WriteVInt(PackedInts.VERSION_CURRENT);
                    Data.WriteVInt(formatAndBits.format.id);
                    Data.WriteVInt(formatAndBits.bitsPerValue);

                    PackedInts.Writer writer = PackedInts.GetWriterNoHeader(Data, formatAndBits.format, MaxDoc, formatAndBits.bitsPerValue, PackedInts.DEFAULT_BUFFER_SIZE);
                    foreach (long?nv in values)
                    {
                        writer.Add(encode[nv == null ? 0 : nv.Value]);
                    }
                    writer.Finish();
                }
            }
            else if (gcd != 0 && gcd != 1)
            {
                Meta.WriteByte((byte)GCD_COMPRESSED);
                Meta.WriteVInt(PackedInts.VERSION_CURRENT);
                Data.WriteLong(minValue);
                Data.WriteLong(gcd);
                Data.WriteVInt(BLOCK_SIZE);

                var writer = new BlockPackedWriter(Data, BLOCK_SIZE);
                foreach (long?nv in values)
                {
                    long value = nv == null ? 0 : nv.Value;
                    writer.Add((value - minValue) / gcd);
                }
                writer.Finish();
            }
            else
            {
                Meta.WriteByte((byte)DELTA_COMPRESSED); // delta-compressed

                Meta.WriteVInt(PackedInts.VERSION_CURRENT);
                Data.WriteVInt(BLOCK_SIZE);

                var writer = new BlockPackedWriter(Data, BLOCK_SIZE);
                foreach (long?nv in values)
                {
                    writer.Add(nv == null ? 0 : nv.Value);
                }
                writer.Finish();
            }
        }
Exemple #16
0
        private void WriteBlock()
        {
            Debug.Assert(BlockChunks > 0);
            FieldsIndexOut.WriteVInt(BlockChunks);

            // The trick here is that we only store the difference from the average start
            // pointer or doc base, this helps save bits per value.
            // And in order to prevent a few chunks that would be far from the average to
            // raise the number of bits per value for all of them, we only encode blocks
            // of 1024 chunks at once
            // See LUCENE-4512

            // doc bases
            int avgChunkDocs;

            if (BlockChunks == 1)
            {
                avgChunkDocs = 0;
            }
            else
            {
                avgChunkDocs = (int)Math.Round((float)(BlockDocs - DocBaseDeltas[BlockChunks - 1]) / (BlockChunks - 1));
            }
            FieldsIndexOut.WriteVInt(TotalDocs - BlockDocs); // docBase
            FieldsIndexOut.WriteVInt(avgChunkDocs);
            int  docBase  = 0;
            long maxDelta = 0;

            for (int i = 0; i < BlockChunks; ++i)
            {
                int delta = docBase - avgChunkDocs * i;
                maxDelta |= MoveSignToLowOrderBit(delta);
                docBase  += DocBaseDeltas[i];
            }

            int bitsPerDocBase = PackedInts.BitsRequired(maxDelta);

            FieldsIndexOut.WriteVInt(bitsPerDocBase);
            PackedInts.Writer writer = PackedInts.GetWriterNoHeader(FieldsIndexOut, PackedInts.Format.PACKED, BlockChunks, bitsPerDocBase, 1);
            docBase = 0;
            for (int i = 0; i < BlockChunks; ++i)
            {
                long delta = docBase - avgChunkDocs * i;
                Debug.Assert(PackedInts.BitsRequired(MoveSignToLowOrderBit(delta)) <= writer.BitsPerValue());
                writer.Add(MoveSignToLowOrderBit(delta));
                docBase += DocBaseDeltas[i];
            }
            writer.Finish();

            // start pointers
            FieldsIndexOut.WriteVLong(FirstStartPointer);
            long avgChunkSize;

            if (BlockChunks == 1)
            {
                avgChunkSize = 0;
            }
            else
            {
                avgChunkSize = (MaxStartPointer - FirstStartPointer) / (BlockChunks - 1);
            }
            FieldsIndexOut.WriteVLong(avgChunkSize);
            long startPointer = 0;

            maxDelta = 0;
            for (int i = 0; i < BlockChunks; ++i)
            {
                startPointer += StartPointerDeltas[i];
                long delta = startPointer - avgChunkSize * i;
                maxDelta |= MoveSignToLowOrderBit(delta);
            }

            int bitsPerStartPointer = PackedInts.BitsRequired(maxDelta);

            FieldsIndexOut.WriteVInt(bitsPerStartPointer);
            writer       = PackedInts.GetWriterNoHeader(FieldsIndexOut, PackedInts.Format.PACKED, BlockChunks, bitsPerStartPointer, 1);
            startPointer = 0;
            for (int i = 0; i < BlockChunks; ++i)
            {
                startPointer += StartPointerDeltas[i];
                long delta = startPointer - avgChunkSize * i;
                Debug.Assert(PackedInts.BitsRequired(MoveSignToLowOrderBit(delta)) <= writer.BitsPerValue());
                writer.Add(MoveSignToLowOrderBit(delta));
            }
            writer.Finish();
        }