Exemple #1
0
        /// <summary>
        /// NOTE: This was loadVarIntsField() in Lucene.
        /// </summary>
        private NumericDocValues LoadVarInt32sField(/*FieldInfo field, // LUCENENET: Never read */ IndexInput input)
        {
            CodecUtil.CheckHeader(input, Lucene40DocValuesFormat.VAR_INTS_CODEC_NAME, Lucene40DocValuesFormat.VAR_INTS_VERSION_START, Lucene40DocValuesFormat.VAR_INTS_VERSION_CURRENT);
            var header = (sbyte)input.ReadByte();

            if (header == Lucene40DocValuesFormat.VAR_INTS_FIXED_64)
            {
                int maxDoc = state.SegmentInfo.DocCount;
                var values = new long[maxDoc];
                for (int i = 0; i < values.Length; i++)
                {
                    values[i] = input.ReadInt64();
                }
                ramBytesUsed.AddAndGet(RamUsageEstimator.SizeOf(values));
                return(new NumericDocValuesAnonymousClass(values));
            }
            else if (header == Lucene40DocValuesFormat.VAR_INTS_PACKED)
            {
                long minValue              = input.ReadInt64();
                long defaultValue          = input.ReadInt64();
                PackedInt32s.Reader reader = PackedInt32s.GetReader(input);
                ramBytesUsed.AddAndGet(reader.RamBytesUsed());
                return(new NumericDocValuesAnonymousClass2(minValue, defaultValue, reader));
            }
            else
            {
                throw new CorruptIndexException("invalid VAR_INTS header byte: " + header + " (resource=" + input + ")");
            }
        }
        private NumericDocValues LoadNumeric(FieldInfo field)
        {
            NumericEntry entry = numerics[field.Number];

            data.Seek(entry.Offset);
            switch (entry.Format)
            {
            case TABLE_COMPRESSED:
                int size = data.ReadVInt32();
                if (size > 256)
                {
                    throw new CorruptIndexException("TABLE_COMPRESSED cannot have more than 256 distinct values, input=" + data);
                }
                var decode = new long[size];
                for (int i = 0; i < decode.Length; i++)
                {
                    decode[i] = data.ReadInt64();
                }
                int formatID     = data.ReadVInt32();
                int bitsPerValue = data.ReadVInt32();
                PackedInt32s.Reader ordsReader = PackedInt32s.GetReaderNoHeader(data, PackedInt32s.Format.ById(formatID), entry.PackedInt32sVersion, maxDoc, bitsPerValue);
                ramBytesUsed.AddAndGet(RamUsageEstimator.SizeOf(decode) + ordsReader.RamBytesUsed());
                return(new NumericDocValuesAnonymousInnerClassHelper(decode, ordsReader));

            case DELTA_COMPRESSED:
                int blockSize = data.ReadVInt32();
                var reader    = new BlockPackedReader(data, entry.PackedInt32sVersion, blockSize, maxDoc, false);
                ramBytesUsed.AddAndGet(reader.RamBytesUsed());
                return(reader);

            case UNCOMPRESSED:
                byte[] bytes = new byte[maxDoc];
                data.ReadBytes(bytes, 0, bytes.Length);
                ramBytesUsed.AddAndGet(RamUsageEstimator.SizeOf(bytes));
                return(new NumericDocValuesAnonymousInnerClassHelper2(this, bytes));

            case GCD_COMPRESSED:
                long min  = data.ReadInt64();
                long mult = data.ReadInt64();
                int  quotientBlockSize           = data.ReadVInt32();
                BlockPackedReader quotientReader = new BlockPackedReader(data, entry.PackedInt32sVersion, quotientBlockSize, maxDoc, false);
                ramBytesUsed.AddAndGet(quotientReader.RamBytesUsed());
                return(new NumericDocValuesAnonymousInnerClassHelper3(min, mult, quotientReader));

            default:
                throw new InvalidOperationException();
            }
        }
Exemple #3
0
        private SortedDocValues LoadBytesFixedSorted(/*FieldInfo field, // LUCENENET: Never read */ IndexInput data, IndexInput index)
        {
            CodecUtil.CheckHeader(data, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_START, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT);
            CodecUtil.CheckHeader(index, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_START, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT);

            int fixedLength = data.ReadInt32();
            int valueCount  = index.ReadInt32();

            PagedBytes bytes = new PagedBytes(16);

            bytes.Copy(data, fixedLength * (long)valueCount);
            PagedBytes.Reader   bytesReader = bytes.Freeze(true);
            PackedInt32s.Reader reader      = PackedInt32s.GetReader(index);
            ramBytesUsed.AddAndGet(bytes.RamBytesUsed() + reader.RamBytesUsed());

            return(CorrectBuggyOrds(new SortedDocValuesAnonymousClass(fixedLength, valueCount, bytesReader, reader)));
        }
Exemple #4
0
        private SortedDocValues LoadBytesVarSorted(/*FieldInfo field, // LUCENENET: Never read */ IndexInput data, IndexInput index)
        {
            CodecUtil.CheckHeader(data, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT);
            CodecUtil.CheckHeader(index, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT);

            long       maxAddress = index.ReadInt64();
            PagedBytes bytes      = new PagedBytes(16);

            bytes.Copy(data, maxAddress);
            PagedBytes.Reader   bytesReader   = bytes.Freeze(true);
            PackedInt32s.Reader addressReader = PackedInt32s.GetReader(index);
            PackedInt32s.Reader ordsReader    = PackedInt32s.GetReader(index);

            int valueCount = addressReader.Count - 1;

            ramBytesUsed.AddAndGet(bytes.RamBytesUsed() + addressReader.RamBytesUsed() + ordsReader.RamBytesUsed());

            return(CorrectBuggyOrds(new SortedDocValuesAnonymousClass2(bytesReader, addressReader, ordsReader, valueCount)));
        }
Exemple #5
0
        private BinaryDocValues LoadBytesFixedDeref(FieldInfo field)
        {
            string     dataName  = IndexFileNames.SegmentFileName(state.SegmentInfo.Name + "_" + Convert.ToString(field.Number, CultureInfo.InvariantCulture), segmentSuffix, "dat");
            string     indexName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name + "_" + Convert.ToString(field.Number, CultureInfo.InvariantCulture), segmentSuffix, "idx");
            IndexInput data      = null;
            IndexInput index     = null;
            bool       success   = false;

            try
            {
                data = dir.OpenInput(dataName, state.Context);
                CodecUtil.CheckHeader(data, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_VERSION_START, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_VERSION_CURRENT);
                index = dir.OpenInput(indexName, state.Context);
                CodecUtil.CheckHeader(index, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_VERSION_START, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_VERSION_CURRENT);

                int        fixedLength = data.ReadInt32();
                int        valueCount  = index.ReadInt32();
                PagedBytes bytes       = new PagedBytes(16);
                bytes.Copy(data, fixedLength * (long)valueCount);
                PagedBytes.Reader   bytesReader = bytes.Freeze(true);
                PackedInt32s.Reader reader      = PackedInt32s.GetReader(index);
                CodecUtil.CheckEOF(data);
                CodecUtil.CheckEOF(index);
                ramBytesUsed.AddAndGet(bytes.RamBytesUsed() + reader.RamBytesUsed());
                success = true;
                return(new BinaryDocValuesAnonymousClass3(fixedLength, bytesReader, reader));
            }
            finally
            {
                if (success)
                {
                    IOUtils.Dispose(data, index);
                }
                else
                {
                    IOUtils.DisposeWhileHandlingException(data, index);
                }
            }
        }
        private BinaryDocValues LoadBytesVarStraight(FieldInfo field)
        {
            string     dataName  = IndexFileNames.SegmentFileName(state.SegmentInfo.Name + "_" + Convert.ToString(field.Number, CultureInfo.InvariantCulture), segmentSuffix, "dat");
            string     indexName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name + "_" + Convert.ToString(field.Number, CultureInfo.InvariantCulture), segmentSuffix, "idx");
            IndexInput data      = null;
            IndexInput index     = null;
            bool       success   = false;

            try
            {
                data = dir.OpenInput(dataName, state.Context);
                CodecUtil.CheckHeader(data, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_CURRENT);
                index = dir.OpenInput(indexName, state.Context);
                CodecUtil.CheckHeader(index, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_CURRENT);
                long       totalBytes = index.ReadVInt64();
                PagedBytes bytes      = new PagedBytes(16);
                bytes.Copy(data, totalBytes);
                PagedBytes.Reader   bytesReader = bytes.Freeze(true);
                PackedInt32s.Reader reader      = PackedInt32s.GetReader(index);
                CodecUtil.CheckEOF(data);
                CodecUtil.CheckEOF(index);
                success = true;
                ramBytesUsed.AddAndGet(bytes.RamBytesUsed() + reader.RamBytesUsed());
                return(new BinaryDocValuesAnonymousInnerClassHelper2(bytesReader, reader));
            }
            finally
            {
                if (success)
                {
                    IOUtils.Close(data, index);
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(data, index);
                }
            }
        }
        /// <summary>
        /// Loads the segment information at segment load time.
        /// </summary>
        /// <param name="indexEnum">
        ///          The term enum. </param>
        /// <param name="indexDivisor">
        ///          The index divisor. </param>
        /// <param name="tiiFileLength">
        ///          The size of the tii file, used to approximate the size of the
        ///          buffer. </param>
        /// <param name="totalIndexInterval">
        ///          The total index interval. </param>
        public TermInfosReaderIndex(SegmentTermEnum indexEnum, int indexDivisor, long tiiFileLength, int totalIndexInterval)
        {
            this.totalIndexInterval = totalIndexInterval;
            indexSize    = 1 + ((int)indexEnum.size - 1) / indexDivisor;
            skipInterval = indexEnum.skipInterval;
            // this is only an inital size, it will be GCed once the build is complete
            long                 initialSize    = (long)(tiiFileLength * 1.5) / indexDivisor;
            PagedBytes           dataPagedBytes = new PagedBytes(EstimatePageBits(initialSize));
            PagedBytesDataOutput dataOutput     = dataPagedBytes.GetDataOutput();

            int            bitEstimate  = 1 + MathUtil.Log(tiiFileLength, 2);
            GrowableWriter indexToTerms = new GrowableWriter(bitEstimate, indexSize, PackedInt32s.DEFAULT);

            string         currentField = null;
            IList <string> fieldStrs    = new List <string>();
            int            fieldCounter = -1;

            for (int i = 0; indexEnum.Next(); i++)
            {
                Term term = indexEnum.Term();
                if (currentField == null || !currentField.Equals(term.Field, StringComparison.Ordinal))
                {
                    currentField = term.Field;
                    fieldStrs.Add(currentField);
                    fieldCounter++;
                }
                TermInfo termInfo = indexEnum.TermInfo();
                indexToTerms.Set(i, dataOutput.GetPosition());
                dataOutput.WriteVInt32(fieldCounter);
                dataOutput.WriteString(term.Text());
                dataOutput.WriteVInt32(termInfo.DocFreq);
                if (termInfo.DocFreq >= skipInterval)
                {
                    dataOutput.WriteVInt32(termInfo.SkipOffset);
                }
                dataOutput.WriteVInt64(termInfo.FreqPointer);
                dataOutput.WriteVInt64(termInfo.ProxPointer);
                dataOutput.WriteVInt64(indexEnum.indexPointer);
                for (int j = 1; j < indexDivisor; j++)
                {
                    if (!indexEnum.Next())
                    {
                        break;
                    }
                }
            }

            fields = new Term[fieldStrs.Count];
            for (int i = 0; i < fields.Length; i++)
            {
                fields[i] = new Term(fieldStrs[i]);
            }

            dataPagedBytes.Freeze(true);
            dataInput         = dataPagedBytes.GetDataInput();
            indexToDataOffset = indexToTerms.Mutable;

            ramBytesUsed = fields.Length * (RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.ShallowSizeOfInstance(typeof(Term))) + dataPagedBytes.RamBytesUsed() + indexToDataOffset.RamBytesUsed();
        }
 /// <summary>Returns approximate RAM bytes used.</summary>
 public long RamBytesUsed()
 {
     return(((termOffsets != null) ? termOffsets.RamBytesUsed() : 0) +
            ((termsDictOffsets != null) ? termsDictOffsets.RamBytesUsed() : 0));
 }