Esempio n. 1
0
        private void AddVarSortedBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable <BytesRef> values, IEnumerable <long?> docToOrd)
        {
            field.PutAttribute(legacyKey, LegacyDocValuesType.BYTES_VAR_SORTED.ToString());

            CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT);

            CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT);

            /* values */

            long startPos = data.GetFilePointer();

            int valueCount = 0;

            foreach (BytesRef v in values)
            {
                data.WriteBytes(v.Bytes, v.Offset, v.Length);
                valueCount++;
            }

            /* addresses */

            long maxAddress = data.GetFilePointer() - startPos;

            index.WriteInt64(maxAddress);

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(valueCount != int.MaxValue);                           // unsupported by the 4.0 impl
            }
            PackedInt32s.Writer w = PackedInt32s.GetWriter(index, valueCount + 1, PackedInt32s.BitsRequired(maxAddress), PackedInt32s.DEFAULT);
            long currentPosition  = 0;

            foreach (BytesRef v in values)
            {
                w.Add(currentPosition);
                currentPosition += v.Length;
            }
            // write sentinel
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(currentPosition == maxAddress);
            }
            w.Add(currentPosition);
            w.Finish();

            /* ordinals */

            int maxDoc = state.SegmentInfo.DocCount;

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(valueCount > 0);
            }
            PackedInt32s.Writer ords = PackedInt32s.GetWriter(index, maxDoc, PackedInt32s.BitsRequired(valueCount - 1), PackedInt32s.DEFAULT);
            foreach (long n in docToOrd)
            {
                ords.Add((long)n);
            }
            ords.Finish();
        }
Esempio n. 2
0
        public override void AddNumericField(FieldInfo field, IEnumerable <long?> values)
        {
            meta.WriteVInt32(field.Number);
            meta.WriteByte((byte)NUMBER);
            meta.WriteInt64(data.GetFilePointer());
            long minValue = long.MaxValue;
            long maxValue = long.MinValue;
            long gcd      = 0;

            // TODO: more efficient?
            JCG.HashSet <long> uniqueValues = null;
            if (true)
            {
                uniqueValues = new JCG.HashSet <long>();

                long count = 0;
                foreach (long?nv in values)
                {
                    Debug.Assert(nv != null);
                    long v = nv.Value;

                    if (gcd != 1)
                    {
                        if (v < long.MinValue / 2 || v > long.MaxValue / 2)
                        {
                            // in that case v - minValue might overflow and make the GCD computation return
                            // wrong results. Since these extreme values are unlikely, we just discard
                            // GCD computation for them
                            gcd = 1;
                        } // minValue needs to be set first
                        else if (count != 0)
                        {
                            gcd = MathUtil.Gcd(gcd, v - minValue);
                        }
                    }

                    minValue = Math.Min(minValue, v);
                    maxValue = Math.Max(maxValue, v);

                    if (uniqueValues != null)
                    {
                        if (uniqueValues.Add(v))
                        {
                            if (uniqueValues.Count > 256)
                            {
                                uniqueValues = null;
                            }
                        }
                    }

                    ++count;
                }
                Debug.Assert(count == maxDoc);
            }

            if (uniqueValues != null)
            {
                // small number of unique values
                int           bitsPerValue  = PackedInt32s.BitsRequired(uniqueValues.Count - 1);
                FormatAndBits formatAndBits = PackedInt32s.FastestFormatAndBits(maxDoc, bitsPerValue, acceptableOverheadRatio);
                if (formatAndBits.BitsPerValue == 8 && minValue >= sbyte.MinValue && maxValue <= sbyte.MaxValue)
                {
                    meta.WriteByte((byte)UNCOMPRESSED); // uncompressed
                    foreach (long?nv in values)
                    {
                        data.WriteByte((byte)nv.GetValueOrDefault());
                    }
                }
                else
                {
                    meta.WriteByte((byte)TABLE_COMPRESSED); // table-compressed
                    var decode = new long[uniqueValues.Count];
                    uniqueValues.CopyTo(decode);
                    var encode = new Dictionary <long, int>();
                    data.WriteVInt32(decode.Length);
                    for (int i = 0; i < decode.Length; i++)
                    {
                        data.WriteInt64(decode[i]);
                        encode[decode[i]] = i;
                    }

                    meta.WriteVInt32(PackedInt32s.VERSION_CURRENT);
                    data.WriteVInt32(formatAndBits.Format.Id);
                    data.WriteVInt32(formatAndBits.BitsPerValue);

                    PackedInt32s.Writer writer = PackedInt32s.GetWriterNoHeader(data, formatAndBits.Format, maxDoc, formatAndBits.BitsPerValue, PackedInt32s.DEFAULT_BUFFER_SIZE);
                    foreach (long?nv in values)
                    {
                        writer.Add(encode[nv.GetValueOrDefault()]);
                    }
                    writer.Finish();
                }
            }
            else if (gcd != 0 && gcd != 1)
            {
                meta.WriteByte((byte)GCD_COMPRESSED);
                meta.WriteVInt32(PackedInt32s.VERSION_CURRENT);
                data.WriteInt64(minValue);
                data.WriteInt64(gcd);
                data.WriteVInt32(BLOCK_SIZE);

                var writer = new BlockPackedWriter(data, BLOCK_SIZE);
                foreach (long?nv in values)
                {
                    writer.Add((nv.GetValueOrDefault() - minValue) / gcd);
                }
                writer.Finish();
            }
            else
            {
                meta.WriteByte((byte)DELTA_COMPRESSED); // delta-compressed

                meta.WriteVInt32(PackedInt32s.VERSION_CURRENT);
                data.WriteVInt32(BLOCK_SIZE);

                var writer = new BlockPackedWriter(data, BLOCK_SIZE);
                foreach (long?nv in values)
                {
                    writer.Add(nv.GetValueOrDefault());
                }
                writer.Finish();
            }
        }
Esempio n. 3
0
        [ExceptionToNetNumericConvention] // LUCENENET: Private API, keeping as-is
        private void AddVarIntsField(FieldInfo field, IndexOutput output, IEnumerable <long?> values, long minValue, long maxValue)
        {
            field.PutAttribute(legacyKey, LegacyDocValuesType.VAR_INTS.ToString());

            CodecUtil.WriteHeader(output, Lucene40DocValuesFormat.VAR_INTS_CODEC_NAME, Lucene40DocValuesFormat.VAR_INTS_VERSION_CURRENT);

            long delta = maxValue - minValue;

            if (delta < 0)
            {
                // writes longs
                output.WriteByte((byte)Lucene40DocValuesFormat.VAR_INTS_FIXED_64);
                foreach (long?n in values)
                {
                    output.WriteInt64(n.GetValueOrDefault());
                }
            }
            else
            {
                // writes packed ints
                output.WriteByte((byte)Lucene40DocValuesFormat.VAR_INTS_PACKED);
                output.WriteInt64(minValue);
                output.WriteInt64(0 - minValue); // default value (representation of 0)
                PackedInt32s.Writer writer = PackedInt32s.GetWriter(output, state.SegmentInfo.DocCount, PackedInt32s.BitsRequired(delta), PackedInt32s.DEFAULT);
                foreach (long?n in values)
                {
                    writer.Add(n.GetValueOrDefault() - minValue);
                }
                writer.Finish();
            }
        }
Esempio n. 4
0
            /// <summary>
            /// Go to the chunk containing the provided <paramref name="doc"/> ID.
            /// </summary>
            internal void Next(int doc)
            {
                Debug.Assert(doc >= this.docBase + this.chunkDocs, doc + " " + this.docBase + " " + this.chunkDocs);
                fieldsStream.Seek(outerInstance.indexReader.GetStartPointer(doc));

                int docBase   = fieldsStream.ReadVInt32();
                int chunkDocs = fieldsStream.ReadVInt32();

                if (docBase < this.docBase + this.chunkDocs || docBase + chunkDocs > outerInstance.numDocs)
                {
                    throw new CorruptIndexException("Corrupted: current docBase=" + this.docBase + ", current numDocs=" + this.chunkDocs + ", new docBase=" + docBase + ", new numDocs=" + chunkDocs + " (resource=" + fieldsStream + ")");
                }
                this.docBase   = docBase;
                this.chunkDocs = chunkDocs;

                if (chunkDocs > numStoredFields.Length)
                {
                    int newLength = ArrayUtil.Oversize(chunkDocs, 4);
                    numStoredFields = new int[newLength];
                    lengths         = new int[newLength];
                }

                if (chunkDocs == 1)
                {
                    numStoredFields[0] = fieldsStream.ReadVInt32();
                    lengths[0]         = fieldsStream.ReadVInt32();
                }
                else
                {
                    int bitsPerStoredFields = fieldsStream.ReadVInt32();
                    if (bitsPerStoredFields == 0)
                    {
                        Arrays.Fill(numStoredFields, 0, chunkDocs, fieldsStream.ReadVInt32());
                    }
                    else if (bitsPerStoredFields > 31)
                    {
                        throw new CorruptIndexException("bitsPerStoredFields=" + bitsPerStoredFields + " (resource=" + fieldsStream + ")");
                    }
                    else
                    {
                        PackedInt32s.IReaderIterator it = PackedInt32s.GetReaderIteratorNoHeader(fieldsStream, PackedInt32s.Format.PACKED, outerInstance.packedIntsVersion, chunkDocs, bitsPerStoredFields, 1);
                        for (int i = 0; i < chunkDocs; ++i)
                        {
                            numStoredFields[i] = (int)it.Next();
                        }
                    }

                    int bitsPerLength = fieldsStream.ReadVInt32();
                    if (bitsPerLength == 0)
                    {
                        Arrays.Fill(lengths, 0, chunkDocs, fieldsStream.ReadVInt32());
                    }
                    else if (bitsPerLength > 31)
                    {
                        throw new CorruptIndexException("bitsPerLength=" + bitsPerLength);
                    }
                    else
                    {
                        PackedInt32s.IReaderIterator it = PackedInt32s.GetReaderIteratorNoHeader(fieldsStream, PackedInt32s.Format.PACKED, outerInstance.packedIntsVersion, chunkDocs, bitsPerLength, 1);
                        for (int i = 0; i < chunkDocs; ++i)
                        {
                            lengths[i] = (int)it.Next();
                        }
                    }
                }
            }
Esempio n. 5
0
            public override void Finish(long termsFilePointer)
            {
                // write primary terms dict offsets
                packedIndexStart = outerInstance.m_output.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream

                PackedInt32s.Writer w = PackedInt32s.GetWriter(outerInstance.m_output, numIndexTerms, PackedInt32s.BitsRequired(termsFilePointer), PackedInt32s.DEFAULT);

                // relative to our indexStart
                long upto = 0;

                for (int i = 0; i < numIndexTerms; i++)
                {
                    upto += termsPointerDeltas[i];
                    w.Add(upto);
                }
                w.Finish();

                packedOffsetsStart = outerInstance.m_output.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream

                // write offsets into the byte[] terms
                w    = PackedInt32s.GetWriter(outerInstance.m_output, 1 + numIndexTerms, PackedInt32s.BitsRequired(totTermLength), PackedInt32s.DEFAULT);
                upto = 0;
                for (int i = 0; i < numIndexTerms; i++)
                {
                    w.Add(upto);
                    upto += termLengths[i];
                }
                w.Add(upto);
                w.Finish();

                // our referrer holds onto us, while other fields are
                // being written, so don't tie up this RAM:
                termLengths        = null;
                termsPointerDeltas = null;
            }
Esempio n. 6
0
        private void FlushFlags(int totalFields, int[] fieldNums)
        {
            // check if fields always have the same flags
            bool nonChangingFlags = true;

            int[] fieldFlags = new int[fieldNums.Length];
            Arrays.Fill(fieldFlags, -1);
            bool breakOuterLoop;

            foreach (DocData dd in pendingDocs)
            {
                breakOuterLoop = false;
                foreach (FieldData fd in dd.fields)
                {
                    int fieldNumOff = Array.BinarySearch(fieldNums, fd.fieldNum);
                    Debug.Assert(fieldNumOff >= 0);
                    if (fieldFlags[fieldNumOff] == -1)
                    {
                        fieldFlags[fieldNumOff] = fd.flags;
                    }
                    else if (fieldFlags[fieldNumOff] != fd.flags)
                    {
                        nonChangingFlags = false;
                        breakOuterLoop   = true;
                    }
                }
                if (breakOuterLoop)
                {
                    break;
                }
            }

            if (nonChangingFlags)
            {
                // write one flag per field num
                vectorsStream.WriteVInt32(0);
                PackedInt32s.Writer writer = PackedInt32s.GetWriterNoHeader(vectorsStream, PackedInt32s.Format.PACKED, fieldFlags.Length, FLAGS_BITS, 1);
                foreach (int flags in fieldFlags)
                {
                    Debug.Assert(flags >= 0);
                    writer.Add(flags);
                }
                Debug.Assert(writer.Ord == fieldFlags.Length - 1);
                writer.Finish();
            }
            else
            {
                // write one flag for every field instance
                vectorsStream.WriteVInt32(1);
                PackedInt32s.Writer writer = PackedInt32s.GetWriterNoHeader(vectorsStream, PackedInt32s.Format.PACKED, totalFields, FLAGS_BITS, 1);
                foreach (DocData dd in pendingDocs)
                {
                    foreach (FieldData fd in dd.fields)
                    {
                        writer.Add(fd.flags);
                    }
                }
                Debug.Assert(writer.Ord == totalFields - 1);
                writer.Finish();
            }
        }
Esempio n. 7
0
        public override void VisitDocument(int docID, StoredFieldVisitor visitor)
        {
            fieldsStream.Seek(indexReader.GetStartPointer(docID));

            int docBase   = fieldsStream.ReadVInt32();
            int chunkDocs = fieldsStream.ReadVInt32();

            if (docID < docBase || docID >= docBase + chunkDocs || docBase + chunkDocs > numDocs)
            {
                throw new CorruptIndexException("Corrupted: docID=" + docID + ", docBase=" + docBase + ", chunkDocs=" + chunkDocs + ", numDocs=" + numDocs + " (resource=" + fieldsStream + ")");
            }

            int numStoredFields, offset, length, totalLength;

            if (chunkDocs == 1)
            {
                numStoredFields = fieldsStream.ReadVInt32();
                offset          = 0;
                length          = fieldsStream.ReadVInt32();
                totalLength     = length;
            }
            else
            {
                int bitsPerStoredFields = fieldsStream.ReadVInt32();
                if (bitsPerStoredFields == 0)
                {
                    numStoredFields = fieldsStream.ReadVInt32();
                }
                else if (bitsPerStoredFields > 31)
                {
                    throw new CorruptIndexException("bitsPerStoredFields=" + bitsPerStoredFields + " (resource=" + fieldsStream + ")");
                }
                else
                {
                    long filePointer           = fieldsStream.GetFilePointer();
                    PackedInt32s.Reader reader = PackedInt32s.GetDirectReaderNoHeader(fieldsStream, PackedInt32s.Format.PACKED, packedIntsVersion, chunkDocs, bitsPerStoredFields);
                    numStoredFields = (int)(reader.Get(docID - docBase));
                    fieldsStream.Seek(filePointer + PackedInt32s.Format.PACKED.ByteCount(packedIntsVersion, chunkDocs, bitsPerStoredFields));
                }

                int bitsPerLength = fieldsStream.ReadVInt32();
                if (bitsPerLength == 0)
                {
                    length      = fieldsStream.ReadVInt32();
                    offset      = (docID - docBase) * length;
                    totalLength = chunkDocs * length;
                }
                else if (bitsPerStoredFields > 31)
                {
                    throw new CorruptIndexException("bitsPerLength=" + bitsPerLength + " (resource=" + fieldsStream + ")");
                }
                else
                {
                    PackedInt32s.IReaderIterator it = PackedInt32s.GetReaderIteratorNoHeader(fieldsStream, PackedInt32s.Format.PACKED, packedIntsVersion, chunkDocs, bitsPerLength, 1);
                    int off = 0;
                    for (int i = 0; i < docID - docBase; ++i)
                    {
                        off += (int)it.Next();
                    }
                    offset = off;
                    length = (int)it.Next();
                    off   += length;
                    for (int i = docID - docBase + 1; i < chunkDocs; ++i)
                    {
                        off += (int)it.Next();
                    }
                    totalLength = off;
                }
            }

            if ((length == 0) != (numStoredFields == 0))
            {
                throw new CorruptIndexException("length=" + length + ", numStoredFields=" + numStoredFields + " (resource=" + fieldsStream + ")");
            }
            if (numStoredFields == 0)
            {
                // nothing to do
                return;
            }

            DataInput documentInput;

            if (version >= CompressingStoredFieldsWriter.VERSION_BIG_CHUNKS && totalLength >= 2 * chunkSize)
            {
                Debug.Assert(chunkSize > 0);
                Debug.Assert(offset < chunkSize);

                decompressor.Decompress(fieldsStream, chunkSize, offset, Math.Min(length, chunkSize - offset), bytes);
                documentInput = new DataInputAnonymousInnerClassHelper(this, offset, length);
            }
            else
            {
                BytesRef bytes = totalLength <= BUFFER_REUSE_THRESHOLD ? this.bytes : new BytesRef();
                decompressor.Decompress(fieldsStream, totalLength, offset, length, bytes);
                Debug.Assert(bytes.Length == length);
                documentInput = new ByteArrayDataInput(bytes.Bytes, bytes.Offset, bytes.Length);
            }

            for (int fieldIDX = 0; fieldIDX < numStoredFields; fieldIDX++)
            {
                long      infoAndBits = documentInput.ReadVInt64();
                int       fieldNumber = (int)((long)((ulong)infoAndBits >> CompressingStoredFieldsWriter.TYPE_BITS));
                FieldInfo fieldInfo   = fieldInfos.FieldInfo(fieldNumber);

                int bits = (int)(infoAndBits & CompressingStoredFieldsWriter.TYPE_MASK);
                Debug.Assert(bits <= CompressingStoredFieldsWriter.NUMERIC_DOUBLE, "bits=" + bits.ToString("x"));

                switch (visitor.NeedsField(fieldInfo))
                {
                case StoredFieldVisitor.Status.YES:
                    ReadField(documentInput, visitor, fieldInfo, bits);
                    break;

                case StoredFieldVisitor.Status.NO:
                    SkipField(documentInput, bits);
                    break;

                case StoredFieldVisitor.Status.STOP:
                    return;
                }
            }
        }
        internal virtual void AddNumericField(FieldInfo field, IEnumerable <long?> values, bool optimizeStorage)
        {
            meta.WriteVInt32(field.Number);
            meta.WriteByte(MemoryDocValuesProducer.NUMBER);
            meta.WriteInt64(data.GetFilePointer());
            long minValue = long.MaxValue;
            long maxValue = long.MinValue;
            long gcd      = 0;
            bool missing  = false;
            // TODO: more efficient?
            ISet <long?> uniqueValues = null;

            if (optimizeStorage)
            {
                uniqueValues = new JCG.HashSet <long?>();

                long count = 0;
                foreach (var nv in values)
                {
                    long v;
                    if (nv == null)
                    {
                        v       = 0;
                        missing = true;
                    }
                    else
                    {
                        v = nv.Value;
                    }

                    if (gcd != 1)
                    {
                        if (v < long.MinValue / 2 || v > long.MaxValue / 2)
                        {
                            // in that case v - minValue might overflow and make the GCD computation return
                            // wrong results. Since these extreme values are unlikely, we just discard
                            // GCD computation for them
                            gcd = 1;
                        } // minValue needs to be set first
                        else if (count != 0)
                        {
                            gcd = MathUtil.Gcd(gcd, v - minValue);
                        }
                    }

                    minValue = Math.Min(minValue, v);
                    maxValue = Math.Max(maxValue, v);

                    if (uniqueValues != null)
                    {
                        if (uniqueValues.Add(v))
                        {
                            if (uniqueValues.Count > 256)
                            {
                                uniqueValues = null;
                            }
                        }
                    }

                    ++count;
                }
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(count == maxDoc);
                }
            }

            if (missing)
            {
                long start = data.GetFilePointer();
                WriteMissingBitset(values);
                meta.WriteInt64(start);
                meta.WriteInt64(data.GetFilePointer() - start);
            }
            else
            {
                meta.WriteInt64(-1L);
            }

            if (uniqueValues != null)
            {
                // small number of unique values

                int           bitsPerValue  = PackedInt32s.BitsRequired(uniqueValues.Count - 1);
                FormatAndBits formatAndBits = PackedInt32s.FastestFormatAndBits(maxDoc, bitsPerValue,
                                                                                acceptableOverheadRatio);
                if (formatAndBits.BitsPerValue == 8 && minValue >= sbyte.MinValue && maxValue <= sbyte.MaxValue)
                {
                    meta.WriteByte(MemoryDocValuesProducer.UNCOMPRESSED); // uncompressed
                    foreach (var nv in values)
                    {
                        data.WriteByte((byte)nv.GetValueOrDefault());
                    }
                }
                else
                {
                    meta.WriteByte(MemoryDocValuesProducer.TABLE_COMPRESSED); // table-compressed
                    long?[] decode = new long?[uniqueValues.Count];
                    uniqueValues.CopyTo(decode, 0);

                    var encode = new Dictionary <long?, int?>();
                    data.WriteVInt32(decode.Length);
                    for (int i = 0; i < decode.Length; i++)
                    {
                        data.WriteInt64(decode[i].Value);
                        encode[decode[i]] = i;
                    }

                    meta.WriteVInt32(PackedInt32s.VERSION_CURRENT);
                    data.WriteVInt32(formatAndBits.Format.Id);
                    data.WriteVInt32(formatAndBits.BitsPerValue);

                    PackedInt32s.Writer writer = PackedInt32s.GetWriterNoHeader(data, formatAndBits.Format, maxDoc,
                                                                                formatAndBits.BitsPerValue, PackedInt32s.DEFAULT_BUFFER_SIZE);
                    foreach (var nv in values)
                    {
                        var v = encode[nv.GetValueOrDefault()];

                        writer.Add((long)v);
                    }
                    writer.Finish();
                }
            }
            else if (gcd != 0 && gcd != 1)
            {
                meta.WriteByte(MemoryDocValuesProducer.GCD_COMPRESSED);
                meta.WriteVInt32(PackedInt32s.VERSION_CURRENT);
                data.WriteInt64(minValue);
                data.WriteInt64(gcd);
                data.WriteVInt32(MemoryDocValuesProducer.BLOCK_SIZE);

                var writer = new BlockPackedWriter(data, MemoryDocValuesProducer.BLOCK_SIZE);
                foreach (var nv in values)
                {
                    writer.Add((nv.GetValueOrDefault() - minValue) / gcd);
                }
                writer.Finish();
            }
            else
            {
                meta.WriteByte(MemoryDocValuesProducer.DELTA_COMPRESSED); // delta-compressed

                meta.WriteVInt32(PackedInt32s.VERSION_CURRENT);
                data.WriteVInt32(MemoryDocValuesProducer.BLOCK_SIZE);

                var writer = new BlockPackedWriter(data, MemoryDocValuesProducer.BLOCK_SIZE);
                foreach (var nv in values)
                {
                    writer.Add(nv.GetValueOrDefault());
                }
                writer.Finish();
            }
        }
Esempio n. 9
0
 private void FlushFields(int totalFields, int[] fieldNums)
 {
     PackedInt32s.Writer writer = PackedInt32s.GetWriterNoHeader(vectorsStream, PackedInt32s.Format.PACKED, totalFields, PackedInt32s.BitsRequired(fieldNums.Length - 1), 1);
     foreach (DocData dd in pendingDocs)
     {
         foreach (FieldData fd in dd.fields)
         {
             int fieldNumIndex = Array.BinarySearch(fieldNums, fd.fieldNum);
             Debug.Assert(fieldNumIndex >= 0);
             writer.Add(fieldNumIndex);
         }
     }
     writer.Finish();
 }
Esempio n. 10
0
 /// <summary>
 /// Sole constructor. </summary>
 /// <param name="blockSize"> the number of values of a single block, must be a multiple of <c>64</c>. </param>
 protected AbstractBlockPackedWriter(DataOutput @out, int blockSize) // LUCENENET specific - marked protected instead of public
 {
     PackedInt32s.CheckBlockSize(blockSize, MIN_BLOCK_SIZE, MAX_BLOCK_SIZE);
     Reset(@out);
     m_values = new long[blockSize];
 }
Esempio n. 11
0
        public override void Fill(int fromIndex, int toIndex, long val)
        {
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(PackedInt32s.BitsRequired(val) <= BitsPerValue);
                Debugging.Assert(fromIndex <= toIndex);
            }

            // minimum number of values that use an exact number of full blocks
            int nAlignedValues = 64 / Gcd(64, m_bitsPerValue);
            int span           = toIndex - fromIndex;

            if (span <= 3 * nAlignedValues)
            {
                // there needs be at least 2 * nAlignedValues aligned values for the
                // block approach to be worth trying
                base.Fill(fromIndex, toIndex, val);
                return;
            }

            // fill the first values naively until the next block start
            int fromIndexModNAlignedValues = fromIndex % nAlignedValues;

            if (fromIndexModNAlignedValues != 0)
            {
                for (int i = fromIndexModNAlignedValues; i < nAlignedValues; ++i)
                {
                    Set(fromIndex++, val);
                }
            }
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(fromIndex % nAlignedValues == 0);
            }

            // compute the long[] blocks for nAlignedValues consecutive values and
            // use them to set as many values as possible without applying any mask
            // or shift
            int nAlignedBlocks = (nAlignedValues * m_bitsPerValue) >> 6;

            long[] nAlignedValuesBlocks;
            {
                Packed64 values = new Packed64(nAlignedValues, m_bitsPerValue);
                for (int i = 0; i < nAlignedValues; ++i)
                {
                    values.Set(i, val);
                }
                nAlignedValuesBlocks = values.blocks;
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(nAlignedBlocks <= nAlignedValuesBlocks.Length);
                }
            }
            int startBlock = (int)((ulong)((long)fromIndex * m_bitsPerValue) >> 6);
            int endBlock   = (int)((ulong)((long)toIndex * m_bitsPerValue) >> 6);

            for (int block = startBlock; block < endBlock; ++block)
            {
                long blockValue = nAlignedValuesBlocks[block % nAlignedBlocks];
                blocks[block] = blockValue;
            }

            // fill the gap
            for (int i = (int)(((long)endBlock << 6) / m_bitsPerValue); i < toIndex; ++i)
            {
                Set(i, val);
            }
        }
Esempio n. 12
0
        public virtual void TestEncodeDecode()
        {
            int   iterations = RandomInts.RandomInt32Between(Random, 1, 1000);
            float AcceptableOverheadRatio = (float)Random.NextDouble();

            int[] values = new int[(iterations - 1) * Lucene41PostingsFormat.BLOCK_SIZE + ForUtil.MAX_DATA_SIZE];
            for (int i = 0; i < iterations; ++i)
            {
                int bpv = Random.Next(32);
                if (bpv == 0)
                {
                    int value = RandomInts.RandomInt32Between(Random, 0, int.MaxValue);
                    for (int j = 0; j < Lucene41PostingsFormat.BLOCK_SIZE; ++j)
                    {
                        values[i * Lucene41PostingsFormat.BLOCK_SIZE + j] = value;
                    }
                }
                else
                {
                    for (int j = 0; j < Lucene41PostingsFormat.BLOCK_SIZE; ++j)
                    {
                        values[i * Lucene41PostingsFormat.BLOCK_SIZE + j] = RandomInts.RandomInt32Between(Random, 0, (int)PackedInt32s.MaxValue(bpv));
                    }
                }
            }

            Directory d = new RAMDirectory();
            long      endPointer;

            {
                // encode
                IndexOutput @out    = d.CreateOutput("test.bin", IOContext.DEFAULT);
                ForUtil     forUtil = new ForUtil(AcceptableOverheadRatio, @out);

                for (int i = 0; i < iterations; ++i)
                {
                    forUtil.WriteBlock(Arrays.CopyOfRange(values, i * Lucene41PostingsFormat.BLOCK_SIZE, values.Length), new byte[Lucene41.ForUtil.MAX_ENCODED_SIZE], @out);
                }
                endPointer = @out.GetFilePointer();
                @out.Dispose();
            }

            {
                // decode
                IndexInput @in     = d.OpenInput("test.bin", IOContext.READ_ONCE);
                ForUtil    forUtil = new ForUtil(@in);
                for (int i = 0; i < iterations; ++i)
                {
                    if (Random.NextBoolean())
                    {
                        forUtil.SkipBlock(@in);
                        continue;
                    }
                    int[] restored = new int[Lucene41.ForUtil.MAX_DATA_SIZE];
                    forUtil.ReadBlock(@in, new byte[Lucene41.ForUtil.MAX_ENCODED_SIZE], restored);
                    Assert.AreEqual(Arrays.CopyOfRange(values, i * Lucene41PostingsFormat.BLOCK_SIZE, (i + 1) * Lucene41PostingsFormat.BLOCK_SIZE), Arrays.CopyOf(restored, Lucene41PostingsFormat.BLOCK_SIZE));
                }
                assertEquals(endPointer, @in.GetFilePointer());
                @in.Dispose();
            }
        }
Esempio n. 13
0
 private static long Mask(int bitsPerValue)
 {
     return(bitsPerValue == 64 ? ~0L : PackedInt32s.MaxValue(bitsPerValue));
 }
Esempio n. 14
0
 /// <param name="startBitsPerValue">       the initial number of bits per value, may grow depending on the data </param>
 /// <param name="valueCount">              the number of values </param>
 /// <param name="acceptableOverheadRatio"> an acceptable overhead ratio </param>
 public GrowableWriter(int startBitsPerValue, int valueCount, float acceptableOverheadRatio)
 {
     this.acceptableOverheadRatio = acceptableOverheadRatio;
     current     = PackedInt32s.GetMutable(valueCount, startBitsPerValue, this.acceptableOverheadRatio);
     currentMask = Mask(current.BitsPerValue);
 }
Esempio n. 15
0
        private void DoTest(DocValuesType type)
        {
            Directory         d        = NewDirectory();
            IndexWriterConfig iwConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
            int   nDocs = AtLeast(50);
            Field id    = new NumericDocValuesField("id", 0);
            Field f;

            switch (type)
            {
            case DocValuesType.BINARY:
                f = new BinaryDocValuesField("dv", new BytesRef());
                break;

            case DocValuesType.SORTED:
                f = new SortedDocValuesField("dv", new BytesRef());
                break;

            case DocValuesType.NUMERIC:
                f = new NumericDocValuesField("dv", 0);
                break;

            default:
                throw AssertionError.Create();
            }
            Document document = new Document();

            document.Add(id);
            document.Add(f);

            object[] vals = new object[nDocs];

            RandomIndexWriter iw = new RandomIndexWriter(Random, d, iwConfig);

            for (int i = 0; i < nDocs; ++i)
            {
                id.SetInt64Value(i);
                switch (type)
                {
                case DocValuesType.SORTED:
                case DocValuesType.BINARY:
                    do
                    {
                        vals[i] = TestUtil.RandomSimpleString(Random, 20);
                    } while (((string)vals[i]).Length == 0);
                    f.SetBytesValue(new BytesRef((string)vals[i]));
                    break;

                case DocValuesType.NUMERIC:
                    int bitsPerValue = RandomInts.RandomInt32Between(Random, 1, 31);     // keep it an int
                    vals[i] = (long)Random.Next((int)PackedInt32s.MaxValue(bitsPerValue));
                    f.SetInt64Value((long)vals[i]);
                    break;
                }
                iw.AddDocument(document);
                if (Random.NextBoolean() && i % 10 == 9)
                {
                    iw.Commit();
                }
            }
            iw.Dispose();

            DirectoryReader rd = DirectoryReader.Open(d);

            foreach (AtomicReaderContext leave in rd.Leaves)
            {
                FunctionValues ids = (new Int64FieldSource("id")).GetValues(null, leave);
                ValueSource    vs;
                switch (type)
                {
                case DocValuesType.BINARY:
                case DocValuesType.SORTED:
                    vs = new BytesRefFieldSource("dv");
                    break;

                case DocValuesType.NUMERIC:
                    vs = new Int64FieldSource("dv");
                    break;

                default:
                    throw AssertionError.Create();
                }
                FunctionValues values = vs.GetValues(null, leave);
                BytesRef       bytes  = new BytesRef();
                for (int i = 0; i < leave.AtomicReader.MaxDoc; ++i)
                {
                    assertTrue(values.Exists(i));
                    if (vs is BytesRefFieldSource)
                    {
                        assertTrue(values.ObjectVal(i) is string);
                    }
                    else if (vs is Int64FieldSource)
                    {
                        assertTrue(values.ObjectVal(i) is long?);
                        assertTrue(values.BytesVal(i, bytes));
                    }
                    else
                    {
                        throw AssertionError.Create();
                    }

                    object expected = vals[ids.Int32Val(i)];
                    switch (type)
                    {
                    case DocValuesType.SORTED:
                        values.OrdVal(i);     // no exception
                        assertTrue(values.NumOrd >= 1);
                        goto case DocValuesType.BINARY;

                    case DocValuesType.BINARY:
                        assertEquals(expected, values.ObjectVal(i));
                        assertEquals(expected, values.StrVal(i));
                        assertEquals(expected, values.ObjectVal(i));
                        assertEquals(expected, values.StrVal(i));
                        assertTrue(values.BytesVal(i, bytes));
                        assertEquals(new BytesRef((string)expected), bytes);
                        break;

                    case DocValuesType.NUMERIC:
                        assertEquals(Convert.ToInt64(expected, CultureInfo.InvariantCulture), values.Int64Val(i));
                        break;
                    }
                }
            }
            rd.Dispose();
            d.Dispose();
        }
 /// <summary>
 /// Sole constructor. </summary>
 /// <param name="blockSize"> the number of values of a single block, must be a multiple of <c>64</c>. </param>
 public AbstractBlockPackedWriter(DataOutput @out, int blockSize)
 {
     PackedInt32s.CheckBlockSize(blockSize, MIN_BLOCK_SIZE, MAX_BLOCK_SIZE);
     Reset(@out);
     m_values = new long[blockSize];
 }