Пример #1
0
        public override void Encode(int[] values, int valuesOffset, byte[] blocks, int blocksOffset, int iterations)
        {
            int nextBlock = 0;
            int bitsLeft  = 8;

            for (int i = 0; i < ByteValueCount_Renamed * iterations; ++i)
            {
                int v = values[valuesOffset++];
                Debug.Assert(PackedInts.BitsRequired(v & 0xFFFFFFFFL) <= BitsPerValue);
                if (BitsPerValue < bitsLeft)
                {
                    // just buffer
                    nextBlock |= v << (bitsLeft - BitsPerValue);
                    bitsLeft  -= BitsPerValue;
                }
                else
                {
                    // flush as many blocks as possible
                    int bits = BitsPerValue - bitsLeft;
                    blocks[blocksOffset++] = (byte)(nextBlock | ((int)((uint)v >> bits)));
                    while (bits >= 8)
                    {
                        bits -= 8;
                        blocks[blocksOffset++] = (byte)((int)((uint)v >> bits));
                    }
                    // then buffer
                    bitsLeft  = 8 - bits;
                    nextBlock = (v & ((1 << bits) - 1)) << bitsLeft;
                }
            }
            Debug.Assert(bitsLeft == 8);
        }
Пример #2
0
        protected internal override void Flush()
        {
            Debug.Assert(Off > 0);

            // TODO: perform a true linear regression?
            long  min = Values[0];
            float avg = Off == 1 ? 0f : (float)(Values[Off - 1] - min) / (Off - 1);

            long maxZigZagDelta = 0;

            for (int i = 0; i < Off; ++i)
            {
                Values[i]      = ZigZagEncode(Values[i] - min - (long)(avg * i));
                maxZigZagDelta = Math.Max(maxZigZagDelta, Values[i]);
            }

            @out.WriteVLong(min);
            @out.WriteInt(Number.FloatToIntBits(avg));
            if (maxZigZagDelta == 0)
            {
                @out.WriteVInt(0);
            }
            else
            {
                int bitsRequired = PackedInts.BitsRequired(maxZigZagDelta);
                @out.WriteVInt(bitsRequired);
                WriteValues(bitsRequired);
            }

            Off = 0;
        }
Пример #3
0
        private void AddFixedSortedBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable <BytesRef> values, IEnumerable <long?> docToOrd, int length)
        {
            field.PutAttribute(LegacyKey, LegacyDocValuesType.BYTES_FIXED_SORTED.Name);

            CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT);

            CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT);

            /* values */

            data.WriteInt(length);
            int valueCount = 0;

            foreach (BytesRef v in values)
            {
                data.WriteBytes(v.Bytes, v.Offset, v.Length);
                valueCount++;
            }

            /* ordinals */

            index.WriteInt(valueCount);
            int maxDoc = State.SegmentInfo.DocCount;

            Debug.Assert(valueCount > 0);
            PackedInts.Writer w = PackedInts.GetWriter(index, maxDoc, PackedInts.BitsRequired(valueCount - 1), PackedInts.DEFAULT);
            foreach (long n in docToOrd)
            {
                w.Add((long)n);
            }
            w.Finish();
        }
Пример #4
0
        private void AddVarIntsField(FieldInfo field, IndexOutput output, IEnumerable <long?> values, long minValue, long maxValue)
        {
            field.PutAttribute(LegacyKey, LegacyDocValuesType.VAR_INTS.Name);

            CodecUtil.WriteHeader(output, Lucene40DocValuesFormat.VAR_INTS_CODEC_NAME, Lucene40DocValuesFormat.VAR_INTS_VERSION_CURRENT);

            long delta = maxValue - minValue;

            if (delta < 0)
            {
                // writes longs
                output.WriteByte((byte)Lucene40DocValuesFormat.VAR_INTS_FIXED_64);
                foreach (long?n in values)
                {
                    output.WriteLong(n == null ? 0 : n.Value);
                }
            }
            else
            {
                // writes packed ints
                output.WriteByte((byte)Lucene40DocValuesFormat.VAR_INTS_PACKED);
                output.WriteLong(minValue);
                output.WriteLong(0 - minValue); // default value (representation of 0)
                PackedInts.Writer writer = PackedInts.GetWriter(output, State.SegmentInfo.DocCount, PackedInts.BitsRequired(delta), PackedInts.DEFAULT);
                foreach (long?n in values)
                {
                    long v = n == null ? 0 : (long)n;
                    writer.Add(v - minValue);
                }
                writer.Finish();
            }
        }
        private NumericDocValues LoadVarIntsField(FieldInfo field, IndexInput input)
        {
            CodecUtil.CheckHeader(input, Lucene40DocValuesFormat.VAR_INTS_CODEC_NAME, Lucene40DocValuesFormat.VAR_INTS_VERSION_START, Lucene40DocValuesFormat.VAR_INTS_VERSION_CURRENT);
            byte header = input.ReadByte();

            if (header == Lucene40DocValuesFormat.VAR_INTS_FIXED_64)
            {
                int    maxDoc = State.SegmentInfo.DocCount;
                long[] values = new long[maxDoc];
                for (int i = 0; i < values.Length; i++)
                {
                    values[i] = input.ReadLong();
                }
                RamBytesUsed_Renamed.AddAndGet(RamUsageEstimator.SizeOf(values));
                return(new NumericDocValuesAnonymousInnerClassHelper(this, values));
            }
            else if (header == Lucene40DocValuesFormat.VAR_INTS_PACKED)
            {
                long minValue            = input.ReadLong();
                long defaultValue        = input.ReadLong();
                PackedInts.Reader reader = PackedInts.GetReader(input);
                RamBytesUsed_Renamed.AddAndGet(reader.RamBytesUsed());
                return(new NumericDocValuesAnonymousInnerClassHelper2(this, minValue, defaultValue, reader));
            }
            else
            {
                throw new CorruptIndexException("invalid VAR_INTS header byte: " + header + " (resource=" + input + ")");
            }
        }
        // [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass
        public virtual void TestDateCompression()
        {
            Directory         dir     = new RAMDirectory();
            IndexWriterConfig iwc     = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            IndexWriter       iwriter = new IndexWriter(dir, iwc);

            const long @base = 13; // prime
            long       day   = 1000L * 60 * 60 * 24;

            Document doc = new Document();
            NumericDocValuesField dvf = new NumericDocValuesField("dv", 0);

            doc.Add(dvf);
            for (int i = 0; i < 300; ++i)
            {
                dvf.LongValue = @base + Random().Next(1000) * day;
                iwriter.AddDocument(doc);
            }
            iwriter.ForceMerge(1);
            long size1 = DirSize(dir);

            for (int i = 0; i < 50; ++i)
            {
                dvf.LongValue = @base + Random().Next(1000) * day;
                iwriter.AddDocument(doc);
            }
            iwriter.ForceMerge(1);
            long size2 = DirSize(dir);

            // make sure the new longs costed less than if they had only been packed
            Assert.IsTrue(size2 < size1 + (PackedInts.BitsRequired(day) * 50) / 8);
        }
Пример #7
0
        internal override void PackPendingValues()
        {
            // compute max delta
            long minValue = Pending[0];
            long maxValue = Pending[0];

            for (int i = 1; i < PendingOff; ++i)
            {
                minValue = Math.Min(minValue, Pending[i]);
                maxValue = Math.Max(maxValue, Pending[i]);
            }
            long delta = maxValue - minValue;

            MinValues[ValuesOff] = minValue;
            if (delta == 0)
            {
                Values[ValuesOff] = new PackedInts.NullReader(PendingOff);
            }
            else
            {
                // build a new packed reader
                int bitsRequired = delta < 0 ? 64 : PackedInts.BitsRequired(delta);
                for (int i = 0; i < PendingOff; ++i)
                {
                    Pending[i] -= minValue;
                }
                PackedInts.Mutable mutable = PackedInts.GetMutable(PendingOff, bitsRequired, AcceptableOverheadRatio);
                for (int i = 0; i < PendingOff;)
                {
                    i += mutable.Set(i, Pending, i, PendingOff - i);
                }
                Values[ValuesOff] = mutable;
            }
        }
Пример #8
0
        private void FlushNumTerms(int totalFields)
        {
            int maxNumTerms = 0;

            foreach (DocData dd in PendingDocs)
            {
                foreach (FieldData fd in dd.Fields)
                {
                    maxNumTerms |= fd.NumTerms;
                }
            }
            int bitsRequired = PackedInts.BitsRequired(maxNumTerms);

            VectorsStream.WriteVInt(bitsRequired);
            PackedInts.Writer writer = PackedInts.GetWriterNoHeader(VectorsStream, PackedInts.Format.PACKED, totalFields, bitsRequired, 1);
            foreach (DocData dd in PendingDocs)
            {
                foreach (FieldData fd in dd.Fields)
                {
                    writer.Add(fd.NumTerms);
                }
            }
            Debug.Assert(writer.Ord() == totalFields - 1);
            writer.Finish();
        }
Пример #9
0
        internal virtual LongValues GetNumeric(NumericEntry entry)
        {
            IndexInput data = (IndexInput)this.Data.Clone();

            data.Seek(entry.Offset);

            switch (entry.Format)
            {
            case Lucene45DocValuesConsumer.DELTA_COMPRESSED:
                BlockPackedReader reader = new BlockPackedReader(data, entry.PackedIntsVersion, entry.BlockSize, entry.Count, true);
                return(reader);

            case Lucene45DocValuesConsumer.GCD_COMPRESSED:
                long min  = entry.MinValue;
                long mult = entry.Gcd;
                BlockPackedReader quotientReader = new BlockPackedReader(data, entry.PackedIntsVersion, entry.BlockSize, entry.Count, true);
                return(new LongValuesAnonymousInnerClassHelper(this, min, mult, quotientReader));

            case Lucene45DocValuesConsumer.TABLE_COMPRESSED:
                long[]            table        = entry.Table;
                int               bitsRequired = PackedInts.BitsRequired(table.Length - 1);
                PackedInts.Reader ords         = PackedInts.GetDirectReaderNoHeader(data, PackedInts.Format.PACKED, entry.PackedIntsVersion, (int)entry.Count, bitsRequired);
                return(new LongValuesAnonymousInnerClassHelper2(this, table, ords));

            default:
                throw new Exception();
            }
        }
Пример #10
0
        public override void Fill(int fromIndex, int toIndex, long val)
        {
            Debug.Assert(PackedInts.BitsRequired(val) <= BitsPerValue);
            Debug.Assert(fromIndex <= toIndex);

            // minimum number of values that use an exact number of full blocks
            int nAlignedValues = 64 / Gcd(64, bitsPerValue);
            int span           = toIndex - fromIndex;

            if (span <= 3 * nAlignedValues)
            {
                // there needs be at least 2 * nAlignedValues aligned values for the
                // block approach to be worth trying
                base.Fill(fromIndex, toIndex, val);
                return;
            }

            // fill the first values naively until the next block start
            int fromIndexModNAlignedValues = fromIndex % nAlignedValues;

            if (fromIndexModNAlignedValues != 0)
            {
                for (int i = fromIndexModNAlignedValues; i < nAlignedValues; ++i)
                {
                    Set(fromIndex++, val);
                }
            }
            Debug.Assert(fromIndex % nAlignedValues == 0);

            // compute the long[] blocks for nAlignedValues consecutive values and
            // use them to set as many values as possible without applying any mask
            // or shift
            int nAlignedBlocks = (nAlignedValues * bitsPerValue) >> 6;

            long[] nAlignedValuesBlocks;
            {
                Packed64 values = new Packed64(nAlignedValues, bitsPerValue);
                for (int i = 0; i < nAlignedValues; ++i)
                {
                    values.Set(i, val);
                }
                nAlignedValuesBlocks = values.Blocks;
                Debug.Assert(nAlignedBlocks <= nAlignedValuesBlocks.Length);
            }
            int startBlock = (int)((ulong)((long)fromIndex * bitsPerValue) >> 6);
            int endBlock   = (int)((ulong)((long)toIndex * bitsPerValue) >> 6);

            for (int block = startBlock; block < endBlock; ++block)
            {
                long blockValue = nAlignedValuesBlocks[block % nAlignedBlocks];
                Blocks[block] = blockValue;
            }

            // fill the gap
            for (int i = (int)(((long)endBlock << 6) / bitsPerValue); i < toIndex; ++i)
            {
                Set(i, val);
            }
        }
Пример #11
0
        public virtual GrowableWriter Resize(int newSize)
        {
            GrowableWriter next  = new GrowableWriter(BitsPerValue, newSize, AcceptableOverheadRatio);
            int            limit = Math.Min(Size(), newSize);

            PackedInts.Copy(Current, 0, next, 0, limit, PackedInts.DEFAULT_BUFFER_SIZE);
            return(next);
        }
 public NumericDocValuesFieldUpdates(string field, int maxDoc)
     : base(field, Type_e.NUMERIC)
 {
     DocsWithField = new FixedBitSet(64);
     Docs          = new PagedMutable(1, 1024, PackedInts.BitsRequired(maxDoc - 1), PackedInts.COMPACT);
     Values        = new PagedGrowableWriter(1, 1024, 1, PackedInts.FAST);
     Size          = 0;
 }
 /// <summary>
 /// Sole constructor. </summary>
 /// <param name="blockSize"> the number of values of a block, must be equal to the
 ///                  block size of the <seealso cref="BlockPackedWriter"/> which has
 ///                  been used to write the stream </param>
 public BlockPackedReaderIterator(DataInput @in, int packedIntsVersion, int blockSize, long valueCount)
 {
     PackedInts.CheckBlockSize(blockSize, AbstractBlockPackedWriter.MIN_BLOCK_SIZE, AbstractBlockPackedWriter.MAX_BLOCK_SIZE);
     this.PackedIntsVersion = packedIntsVersion;
     this.BlockSize         = blockSize;
     this.Values            = new long[blockSize];
     this.ValuesRef         = new LongsRef(this.Values, 0, 0);
     Reset(@in, valueCount);
 }
Пример #14
0
 internal AbstractAppendingLongBuffer(int initialBlockCount, int pageSize, float acceptableOverheadRatio)
 {
     Values     = new PackedInts.Reader[initialBlockCount];
     Pending    = new long[pageSize];
     PageShift  = PackedInts.CheckBlockSize(pageSize, MIN_PAGE_SIZE, MAX_PAGE_SIZE);
     PageMask   = pageSize - 1;
     ValuesOff  = 0;
     PendingOff = 0;
     this.AcceptableOverheadRatio = acceptableOverheadRatio;
 }
Пример #15
0
        internal AbstractPagedMutable(int bitsPerValue, long size, int pageSize)
        {
            this.BitsPerValue = bitsPerValue;
            this.Size_Renamed = size;
            PageShift         = PackedInts.CheckBlockSize(pageSize, MIN_BLOCK_SIZE, MAX_BLOCK_SIZE);
            PageMask          = pageSize - 1;
            int numPages = PackedInts.NumBlocks(size, pageSize);

            SubMutables = new PackedInts.Mutable[numPages];
        }
 public BinaryDocValuesFieldUpdates(string field, int maxDoc)
     : base(field, Type_e.BINARY)
 {
     DocsWithField = new FixedBitSet(64);
     Docs          = new PagedMutable(1, 1024, PackedInts.BitsRequired(maxDoc - 1), PackedInts.COMPACT);
     Offsets       = new PagedGrowableWriter(1, 1024, 1, PackedInts.FAST);
     Lengths       = new PagedGrowableWriter(1, 1024, 1, PackedInts.FAST);
     Values        = new BytesRef(16); // start small
     Size          = 0;
 }
Пример #17
0
        /// <summary>
        /// Compress <code>bytes[off:off+len]</code> into <code>out</code> using
        /// at most 16KB of memory. <code>ht</code> shouldn't be shared across threads
        /// but can safely be reused.
        /// </summary>
        public static void Compress(byte[] bytes, int off, int len, DataOutput @out, HashTable ht)
        {
            int @base = off;
            int end   = off + len;

            int anchor = off++;

            if (len > LAST_LITERALS + MIN_MATCH)
            {
                int limit      = end - LAST_LITERALS;
                int matchLimit = limit - MIN_MATCH;
                ht.Reset(len);
                int hashLog = ht.HashLog;
                PackedInts.Mutable hashTable = ht.hashTable;

                while (off <= limit)
                {
                    // find a match
                    int @ref;
                    while (true)
                    {
                        if (off >= matchLimit)
                        {
                            goto mainBreak;
                        }
                        int v = ReadInt(bytes, off);
                        int h = Hash(v, hashLog);
                        @ref = @base + (int)hashTable.Get(h);
                        Debug.Assert(PackedInts.BitsRequired(off - @base) <= hashTable.BitsPerValue);
                        hashTable.Set(h, off - @base);
                        if (off - @ref < MAX_DISTANCE && ReadInt(bytes, @ref) == v)
                        {
                            break;
                        }
                        ++off;
                    }

                    // compute match length
                    int matchLen = MIN_MATCH + CommonBytes(bytes, @ref + MIN_MATCH, off + MIN_MATCH, limit);

                    EncodeSequence(bytes, anchor, @ref, off, matchLen, @out);
                    off   += matchLen;
                    anchor = off;
                    mainContinue :;
                }
                mainBreak :;
            }

            // last literals
            int literalLen = end - anchor;

            Debug.Assert(literalLen >= LAST_LITERALS || literalLen == len);
            EncodeLastLiterals(bytes, anchor, end - anchor, @out);
        }
Пример #18
0
        protected internal void FillPages()
        {
            int numPages = PackedInts.NumBlocks(Size_Renamed, PageSize());

            for (int i = 0; i < numPages; ++i)
            {
                // do not allocate for more entries than necessary on the last page
                int valueCount = i == numPages - 1 ? LastPageSize(Size_Renamed) : PageSize();
                SubMutables[i] = NewMutable(valueCount, BitsPerValue);
            }
        }
Пример #19
0
        /// <summary>
        /// Compute the number of bits required to serialize any of the longs in
        /// <code>data</code>.
        /// </summary>
        private static int BitsRequired(int[] data)
        {
            long or = 0;

            for (int i = 0; i < Lucene41PostingsFormat.BLOCK_SIZE; ++i)
            {
                Debug.Assert(data[i] >= 0);
                or |= (uint)data[i];
            }
            return(PackedInts.BitsRequired(or));
        }
Пример #20
0
        private NumericDocValues LoadNumeric(FieldInfo field)
        {
            NumericEntry entry = numerics[field.Number];

            data.Seek(entry.offset + entry.missingBytes);
            switch (entry.format)
            {
            case TABLE_COMPRESSED:
                int size = data.ReadVInt();
                if (size > 256)
                {
                    throw new CorruptIndexException(
                              "TABLE_COMPRESSED cannot have more than 256 distinct values, input=" + data);
                }
                var decode = new long[size];
                for (int i = 0; i < decode.Length; i++)
                {
                    decode[i] = data.ReadLong();
                }
                int formatID     = data.ReadVInt();
                int bitsPerValue = data.ReadVInt();
                var ordsReader   = PackedInts.GetReaderNoHeader(data, PackedInts.Format.ById(formatID),
                                                                entry.packedIntsVersion, maxDoc, bitsPerValue);
                ramBytesUsed.AddAndGet(RamUsageEstimator.SizeOf(decode) + ordsReader.RamBytesUsed());
                return(new NumericDocValuesAnonymousInnerClassHelper(this, decode, ordsReader));

            case DELTA_COMPRESSED:
                int blockSize = data.ReadVInt();
                var reader    = new BlockPackedReader(data, entry.packedIntsVersion, blockSize, maxDoc,
                                                      false);
                ramBytesUsed.AddAndGet(reader.RamBytesUsed());
                return(reader);

            case UNCOMPRESSED:
                var bytes = new byte[maxDoc];
                data.ReadBytes(bytes, 0, bytes.Length);
                ramBytesUsed.AddAndGet(RamUsageEstimator.SizeOf(bytes));
                // LUCENENET: IMPORTANT - some bytes are negative here, so we need to pass as sbyte
                return(new NumericDocValuesAnonymousInnerClassHelper2(this, (sbyte[])(Array)bytes));

            case GCD_COMPRESSED:
                long min  = data.ReadLong();
                long mult = data.ReadLong();
                int  quotientBlockSize = data.ReadVInt();
                var  quotientReader    = new BlockPackedReader(data, entry.packedIntsVersion,
                                                               quotientBlockSize, maxDoc, false);
                ramBytesUsed.AddAndGet(quotientReader.RamBytesUsed());
                return(new NumericDocValuesAnonymousInnerClassHelper3(this, min, mult, quotientReader));

            default:
                throw new InvalidOperationException();
            }
        }
Пример #21
0
        private void AddVarSortedBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable <BytesRef> values, IEnumerable <long?> docToOrd)
        {
            field.PutAttribute(LegacyKey, LegacyDocValuesType.BYTES_VAR_SORTED.Name);

            CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT);

            CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT);

            /* values */

            long startPos = data.FilePointer;

            int valueCount = 0;

            foreach (BytesRef v in values)
            {
                data.WriteBytes(v.Bytes, v.Offset, v.Length);
                valueCount++;
            }

            /* addresses */

            long maxAddress = data.FilePointer - startPos;

            index.WriteLong(maxAddress);

            Debug.Assert(valueCount != int.MaxValue); // unsupported by the 4.0 impl

            PackedInts.Writer w  = PackedInts.GetWriter(index, valueCount + 1, PackedInts.BitsRequired(maxAddress), PackedInts.DEFAULT);
            long currentPosition = 0;

            foreach (BytesRef v in values)
            {
                w.Add(currentPosition);
                currentPosition += v.Length;
            }
            // write sentinel
            Debug.Assert(currentPosition == maxAddress);
            w.Add(currentPosition);
            w.Finish();

            /* ordinals */

            int maxDoc = State.SegmentInfo.DocCount;

            Debug.Assert(valueCount > 0);
            PackedInts.Writer ords = PackedInts.GetWriter(index, maxDoc, PackedInts.BitsRequired(valueCount - 1), PackedInts.DEFAULT);
            foreach (long n in docToOrd)
            {
                ords.Add((long)n);
            }
            ords.Finish();
        }
Пример #22
0
        public override void AddNumericField(FieldInfo field, IEnumerable <long?> values)
        {
            // examine the values to determine best type to use
            long minValue = long.MaxValue;
            long maxValue = long.MinValue;

            foreach (long?n in values)
            {
                long v = n == null ? 0 : (long)n;
                minValue = Math.Min(minValue, v);
                maxValue = Math.Max(maxValue, v);
            }

            string      fileName = IndexFileNames.SegmentFileName(State.SegmentInfo.Name + "_" + Convert.ToString(field.Number), SegmentSuffix, "dat");
            IndexOutput data     = Dir.CreateOutput(fileName, State.Context);
            bool        success  = false;

            try
            {
                if (minValue >= sbyte.MinValue && maxValue <= sbyte.MaxValue && PackedInts.BitsRequired(maxValue - minValue) > 4)
                {
                    // fits in a byte[], would be more than 4bpv, just write byte[]
                    AddBytesField(field, data, values);
                }
                else if (minValue >= short.MinValue && maxValue <= short.MaxValue && PackedInts.BitsRequired(maxValue - minValue) > 8)
                {
                    // fits in a short[], would be more than 8bpv, just write short[]
                    AddShortsField(field, data, values);
                }
                else if (minValue >= int.MinValue && maxValue <= int.MaxValue && PackedInts.BitsRequired(maxValue - minValue) > 16)
                {
                    // fits in a int[], would be more than 16bpv, just write int[]
                    AddIntsField(field, data, values);
                }
                else
                {
                    AddVarIntsField(field, data, values, minValue, maxValue);
                }
                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Close(data);
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(data);
                }
            }
        }
Пример #23
0
 private void FlushFields(int totalFields, int[] fieldNums)
 {
     PackedInts.Writer writer = PackedInts.GetWriterNoHeader(VectorsStream, PackedInts.Format.PACKED, totalFields, PackedInts.BitsRequired(fieldNums.Length - 1), 1);
     foreach (DocData dd in PendingDocs)
     {
         foreach (FieldData fd in dd.Fields)
         {
             int fieldNumIndex = Array.BinarySearch(fieldNums, fd.FieldNum);
             Debug.Assert(fieldNumIndex >= 0);
             writer.Add(fieldNumIndex);
         }
     }
     writer.Finish();
 }
Пример #24
0
            internal void Reset(int len)
            {
                int bitsPerOffset    = PackedInts.BitsRequired(len - LAST_LITERALS);
                int bitsPerOffsetLog = 32 - Number.NumberOfLeadingZeros(bitsPerOffset - 1);

                HashLog = MEMORY_USAGE + 3 - bitsPerOffsetLog;
                if (hashTable == null || hashTable.Size() < 1 << HashLog || hashTable.BitsPerValue < bitsPerOffset)
                {
                    hashTable = PackedInts.GetMutable(1 << HashLog, bitsPerOffset, PackedInts.DEFAULT);
                }
                else
                {
                    hashTable.Clear();
                }
            }
Пример #25
0
        private void Rehash()
        {
            PagedGrowableWriter oldTable = table;

            table = new PagedGrowableWriter(2 * oldTable.Size(), 1 << 30, PackedInts.BitsRequired(count), PackedInts.COMPACT);
            mask  = table.Size() - 1;
            for (long idx = 0; idx < oldTable.Size(); idx++)
            {
                long address = oldTable.Get(idx);
                if (address != 0)
                {
                    AddNew(address);
                }
            }
        }
Пример #26
0
 public override void Add(long v)
 {
     Debug.Assert(bitsPerValue == 64 || (v >= 0 && v <= PackedInts.MaxValue(bitsPerValue)), bitsPerValue.ToString());
     Debug.Assert(!Finished);
     if (valueCount != -1 && Written >= valueCount)
     {
         throw new System.IO.EndOfStreamException("Writing past end of stream");
     }
     NextValues[Off++] = v;
     if (Off == NextValues.Length)
     {
         Flush();
     }
     ++Written;
 }
Пример #27
0
        protected internal override void Flush()
        {
            Debug.Assert(Off > 0);
            long min = long.MaxValue, max = long.MinValue;

            for (int i = 0; i < Off; ++i)
            {
                min = Math.Min(Values[i], min);
                max = Math.Max(Values[i], max);
            }

            long delta        = max - min;
            int  bitsRequired = delta < 0 ? 64 : delta == 0L ? 0 : PackedInts.BitsRequired(delta);

            if (bitsRequired == 64)
            {
                // no need to delta-encode
                min = 0L;
            }
            else if (min > 0L)
            {
                // make min as small as possible so that writeVLong requires fewer bytes
                min = Math.Max(0L, max - PackedInts.MaxValue(bitsRequired));
            }

            int token = (bitsRequired << BPV_SHIFT) | (min == 0 ? MIN_VALUE_EQUALS_0 : 0);

            @out.WriteByte((byte)(sbyte)token);

            if (min != 0)
            {
                WriteVLong(@out, ZigZagEncode(min) - 1);
            }

            if (bitsRequired > 0)
            {
                if (min != 0)
                {
                    for (int i = 0; i < Off; ++i)
                    {
                        Values[i] -= min;
                    }
                }
                WriteValues(bitsRequired);
            }

            Off = 0;
        }
Пример #28
0
        public override void Fill(int fromIndex, int toIndex, long val)
        {
            Debug.Assert(fromIndex >= 0);
            Debug.Assert(fromIndex <= toIndex);
            Debug.Assert(PackedInts.BitsRequired(val) <= bitsPerValue);

            int valuesPerBlock = 64 / bitsPerValue;

            if (toIndex - fromIndex <= valuesPerBlock << 1)
            {
                // there needs to be at least one full block to set for the block
                // approach to be worth trying
                base.Fill(fromIndex, toIndex, val);
                return;
            }

            // set values naively until the next block start
            int fromOffsetInBlock = fromIndex % valuesPerBlock;

            if (fromOffsetInBlock != 0)
            {
                for (int i = fromOffsetInBlock; i < valuesPerBlock; ++i)
                {
                    Set(fromIndex++, val);
                }
                Debug.Assert(fromIndex % valuesPerBlock == 0);
            }

            // bulk set of the inner blocks
            int fromBlock = fromIndex / valuesPerBlock;
            int toBlock   = toIndex / valuesPerBlock;

            Debug.Assert(fromBlock * valuesPerBlock == fromIndex);

            long blockValue = 0L;

            for (int i = 0; i < valuesPerBlock; ++i)
            {
                blockValue = blockValue | (val << (i * bitsPerValue));
            }
            Arrays.Fill(Blocks, fromBlock, toBlock, blockValue);

            // fill the gap
            for (int i = valuesPerBlock * toBlock; i < toIndex; ++i)
            {
                Set(i, val);
            }
        }
Пример #29
0
        private NumericDocValues LoadNumeric(FieldInfo field)
        {
            NumericEntry entry = Numerics[field.Number];

            Data.Seek(entry.Offset);
            switch (entry.Format)
            {
            case TABLE_COMPRESSED:
                int size = Data.ReadVInt();
                if (size > 256)
                {
                    throw new CorruptIndexException("TABLE_COMPRESSED cannot have more than 256 distinct values, input=" + Data);
                }
                var decode = new long[size];
                for (int i = 0; i < decode.Length; i++)
                {
                    decode[i] = Data.ReadLong();
                }
                int formatID                 = Data.ReadVInt();
                int bitsPerValue             = Data.ReadVInt();
                PackedInts.Reader ordsReader = PackedInts.GetReaderNoHeader(Data, PackedInts.Format.ById(formatID), entry.PackedIntsVersion, MaxDoc, bitsPerValue);
                RamBytesUsed_Renamed.AddAndGet(RamUsageEstimator.SizeOf(decode) + ordsReader.RamBytesUsed());
                return(new NumericDocValuesAnonymousInnerClassHelper(decode, ordsReader));

            case DELTA_COMPRESSED:
                int blockSize = Data.ReadVInt();
                var reader    = new BlockPackedReader(Data, entry.PackedIntsVersion, blockSize, MaxDoc, false);
                RamBytesUsed_Renamed.AddAndGet(reader.RamBytesUsed());
                return(reader);

            case UNCOMPRESSED:
                byte[] bytes = new byte[MaxDoc];
                Data.ReadBytes(bytes, 0, bytes.Length);
                RamBytesUsed_Renamed.AddAndGet(RamUsageEstimator.SizeOf(bytes));
                return(new NumericDocValuesAnonymousInnerClassHelper2(this, bytes));

            case GCD_COMPRESSED:
                long min  = Data.ReadLong();
                long mult = Data.ReadLong();
                int  quotientBlockSize           = Data.ReadVInt();
                BlockPackedReader quotientReader = new BlockPackedReader(Data, entry.PackedIntsVersion, quotientBlockSize, MaxDoc, false);
                RamBytesUsed_Renamed.AddAndGet(quotientReader.RamBytesUsed());
                return(new NumericDocValuesAnonymousInnerClassHelper3(min, mult, quotientReader));

            default:
                throw new InvalidOperationException();
            }
        }
Пример #30
0
        private void AddFixedDerefBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable <BytesRef> values, int length)
        {
            field.PutAttribute(LegacyKey, LegacyDocValuesType.BYTES_FIXED_DEREF.Name);

            CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_VERSION_CURRENT);

            CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_VERSION_CURRENT);

            // deduplicate
            SortedSet <BytesRef> dictionary = new SortedSet <BytesRef>();

            foreach (BytesRef v in values)
            {
                dictionary.Add(v == null ? new BytesRef() : BytesRef.DeepCopyOf(v));
            }

            /* values */
            data.WriteInt(length);
            foreach (BytesRef v in dictionary)
            {
                data.WriteBytes(v.Bytes, v.Offset, v.Length);
            }

            /* ordinals */
            int valueCount = dictionary.Count;

            Debug.Assert(valueCount > 0);
            index.WriteInt(valueCount);
            int maxDoc = State.SegmentInfo.DocCount;

            PackedInts.Writer w = PackedInts.GetWriter(index, maxDoc, PackedInts.BitsRequired(valueCount - 1), PackedInts.DEFAULT);

            BytesRef brefDummy;

            foreach (BytesRef v in values)
            {
                brefDummy = v;

                if (v == null)
                {
                    brefDummy = new BytesRef();
                }
                //int ord = dictionary.HeadSet(brefDummy).Size();
                int ord = dictionary.Count(@ref => @ref.CompareTo(brefDummy) < 0);
                w.Add(ord);
            }
            w.Finish();
        }
 public SortedDocValuesAnonymousInnerClassHelper(Lucene40DocValuesReader outerInstance, int fixedLength, int valueCount, PagedBytes.Reader bytesReader, PackedInts.Reader reader)
 {
     this.OuterInstance = outerInstance;
     this.FixedLength = fixedLength;
     this.valueCount = valueCount;
     this.BytesReader = bytesReader;
     this.Reader = reader;
 }
 public BinaryDocValuesAnonymousInnerClassHelper4(Lucene40DocValuesReader outerInstance, PagedBytes.Reader bytesReader, PackedInts.Reader reader)
 {
     this.OuterInstance = outerInstance;
     this.BytesReader = bytesReader;
     this.Reader = reader;
 }
 public NumericDocValuesAnonymousInnerClassHelper2(Lucene40DocValuesReader outerInstance, long minValue, long defaultValue, PackedInts.Reader reader)
 {
     this.OuterInstance = outerInstance;
     this.MinValue = minValue;
     this.DefaultValue = defaultValue;
     this.Reader = reader;
 }
 public SortedDocValuesAnonymousInnerClassHelper2(Lucene40DocValuesReader outerInstance, PagedBytes.Reader bytesReader, PackedInts.Reader addressReader, PackedInts.Reader ordsReader, int valueCount)
 {
     this.OuterInstance = outerInstance;
     this.BytesReader = bytesReader;
     this.AddressReader = addressReader;
     this.OrdsReader = ordsReader;
     this.valueCount = valueCount;
 }
 public NumericDocValuesAnonymousInnerClassHelper(Lucene42DocValuesProducer outerInstance, long[] decode, PackedInts.Reader ordsReader)
 {
     this.OuterInstance = outerInstance;
     this.Decode = decode;
     this.OrdsReader = ordsReader;
 }
 public NumericDocValuesAnonymousInnerClassHelper(long[] decode, PackedInts.Reader ordsReader)
 {
     this.Decode = decode;
     this.OrdsReader = ordsReader;
 }