예제 #1
0
            internal TermsReader(FSTOrdTermsReader outerInstance, FieldInfo fieldInfo, IndexInput blockIn, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize, FST <long> index)
            {
                this.outerInstance    = outerInstance;
                this.fieldInfo        = fieldInfo;
                this.numTerms         = numTerms;
                this.sumTotalTermFreq = sumTotalTermFreq;
                this.sumDocFreq       = sumDocFreq;
                this.docCount         = docCount;
                this.longsSize        = longsSize;
                this.index            = index;

                Debug.Assert((numTerms & (~0xffffffffL)) == 0);
                int numBlocks = (int)(numTerms + INTERVAL - 1) / INTERVAL;

                this.numSkipInfo    = longsSize + 3;
                this.skipInfo       = new long[numBlocks * numSkipInfo];
                this.statsBlock     = new sbyte[(int)blockIn.ReadVLong()];
                this.metaLongsBlock = new sbyte[(int)blockIn.ReadVLong()];
                this.metaBytesBlock = new sbyte[(int)blockIn.ReadVLong()];

                int last = 0, next = 0;

                for (int i = 1; i < numBlocks; i++)
                {
                    next = numSkipInfo * i;
                    for (int j = 0; j < numSkipInfo; j++)
                    {
                        skipInfo[next + j] = skipInfo[last + j] + blockIn.ReadVLong();
                    }
                    last = next;
                }
                blockIn.ReadBytes(statsBlock, 0, statsBlock.Length);
                blockIn.ReadBytes(metaLongsBlock, 0, metaLongsBlock.Length);
                blockIn.ReadBytes(metaBytesBlock, 0, metaBytesBlock.Length);
            }
예제 #2
0
        /// <summary>
        /// Increments the enumeration to the next element.  True if one exists. </summary>
        public bool Next()
        {
            PrevBuffer.Set(TermBuffer);
            //System.out.println("  ste setPrev=" + prev() + " this=" + this);

            if (Position++ >= Size - 1)
            {
                TermBuffer.Reset();
                //System.out.println("    EOF");
                return(false);
            }

            TermBuffer.Read(Input, FieldInfos);
            NewSuffixStart = TermBuffer.NewSuffixStart;

            TermInfo_Renamed.DocFreq      = Input.ReadVInt();  // read doc freq
            TermInfo_Renamed.FreqPointer += Input.ReadVLong(); // read freq pointer
            TermInfo_Renamed.ProxPointer += Input.ReadVLong(); // read prox pointer

            if (TermInfo_Renamed.DocFreq >= SkipInterval)
            {
                TermInfo_Renamed.SkipOffset = Input.ReadVInt();
            }

            if (IsIndex)
            {
                IndexPointer += Input.ReadVLong(); // read index pointer
            }

            //System.out.println("  ste ret term=" + term());
            return(true);
        }
예제 #3
0
        public FSTTermsReader(SegmentReadState state, PostingsReaderBase postingsReader)
        {
            string termsFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, FSTTermsWriter.TERMS_EXTENSION);

            this.postingsReader = postingsReader;
            IndexInput @in = state.Directory.OpenInput(termsFileName, state.Context);

            bool success = false;

            try
            {
                version = ReadHeader(@in);
                if (version >= FSTTermsWriter.TERMS_VERSION_CHECKSUM)
                {
                    CodecUtil.ChecksumEntireFile(@in);
                }
                this.postingsReader.Init(@in);
                SeekDir(@in);

                FieldInfos fieldInfos = state.FieldInfos;
                int        numFields  = @in.ReadVInt();
                for (int i = 0; i < numFields; i++)
                {
                    int         fieldNumber      = @in.ReadVInt();
                    FieldInfo   fieldInfo        = fieldInfos.FieldInfo(fieldNumber);
                    long        numTerms         = @in.ReadVLong();
                    long        sumTotalTermFreq = fieldInfo.FieldIndexOptions == IndexOptions.DOCS_ONLY ? -1 : @in.ReadVLong();
                    long        sumDocFreq       = @in.ReadVLong();
                    int         docCount         = @in.ReadVInt();
                    int         longsSize        = @in.ReadVInt();
                    TermsReader current          = new TermsReader(this, fieldInfo, @in, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize);
                    TermsReader previous;
                    // LUCENENET NOTE: This simulates a put operation in Java,
                    // getting the prior value first before setting it.
                    fields.TryGetValue(fieldInfo.Name, out previous);
                    fields[fieldInfo.Name] = current;
                    CheckFieldSummary(state.SegmentInfo, @in, current, previous);
                }
                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Close(@in);
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(@in);
                }
            }
        }
        public VariableGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor,
            String segmentSuffix, IOContext context)
        {
            _input =
                dir.OpenInput(
                    IndexFileNames.SegmentFileName(segment, segmentSuffix,
                        VariableGapTermsIndexWriter.TERMS_INDEX_EXTENSION), new IOContext(context, true));
            var success = false;

            Debug.Assert(indexDivisor == -1 || indexDivisor > 0);

            try
            {

                _version = ReadHeader(_input);
                _indexDivisor = indexDivisor;

                if (_version >= VariableGapTermsIndexWriter.VERSION_CHECKSUM)
                    CodecUtil.ChecksumEntireFile(_input);
                
                SeekDir(_input, _dirOffset);

                // Read directory
                var numFields = _input.ReadVInt();
                if (numFields < 0)
                {
                    throw new CorruptIndexException("invalid numFields: " + numFields + " (resource=" + _input + ")");
                }

                for (var i = 0; i < numFields; i++)
                {
                    var field = _input.ReadVInt();
                    var indexStart = _input.ReadVLong();
                    var fieldInfo = fieldInfos.FieldInfo(field);
                    
                    try
                    {
                        _fields.Add(fieldInfo, new FieldIndexData(indexStart, this));
                    }
                    catch (ArgumentException)
                    {
                        throw new CorruptIndexException(String.Format("Duplicate Field: {0}, Resource: {1}",
                            fieldInfo.Name, _input));
                    }
                }
                success = true;
            }
            finally
            {
                if (indexDivisor > 0)
                {
                    _input.Dispose();
                    _input = null;
                    if (success)
                    {
                        _indexLoaded = true;
                    }
                }
            }
        }
예제 #5
0
        internal static BinaryEntry ReadBinaryEntry(IndexInput meta)
        {
            BinaryEntry entry = new BinaryEntry();

            entry.Format        = meta.ReadVInt();
            entry.MissingOffset = meta.ReadLong();
            entry.MinLength     = meta.ReadVInt();
            entry.MaxLength     = meta.ReadVInt();
            entry.Count         = meta.ReadVLong();
            entry.Offset        = meta.ReadLong();
            switch (entry.Format)
            {
            case Lucene45DocValuesConsumer.BINARY_FIXED_UNCOMPRESSED:
                break;

            case Lucene45DocValuesConsumer.BINARY_PREFIX_COMPRESSED:
                entry.AddressInterval   = meta.ReadVInt();
                entry.AddressesOffset   = meta.ReadLong();
                entry.PackedIntsVersion = meta.ReadVInt();
                entry.BlockSize         = meta.ReadVInt();
                break;

            case Lucene45DocValuesConsumer.BINARY_VARIABLE_UNCOMPRESSED:
                entry.AddressesOffset   = meta.ReadLong();
                entry.PackedIntsVersion = meta.ReadVInt();
                entry.BlockSize         = meta.ReadVInt();
                break;

            default:
                throw new Exception("Unknown format: " + entry.Format + ", input=" + meta);
            }
            return(entry);
        }
예제 #6
0
            public TermsReader(FieldInfos fieldInfos, IndexInput @in, int termCount)
            {
                this.termCount = termCount;
                int fieldNumber = @in.ReadVInt();

                field = fieldInfos.FieldInfo(fieldNumber);
                if (field.FieldIndexOptions != IndexOptions.DOCS_ONLY)
                {
                    sumTotalTermFreq = @in.ReadVLong();
                }
                else
                {
                    sumTotalTermFreq = -1;
                }
                sumDocFreq = @in.ReadVLong();
                docCount   = @in.ReadVInt();

                fst = new FST <BytesRef>(@in, outputs);
            }
예제 #7
0
        /// <summary>Increments the enumeration to the next element.  True if one exists.</summary>
        public override bool Next()
        {
            if (position++ >= size - 1)
            {
                prevBuffer.Set(termBuffer);
                termBuffer.Reset();
                return(false);
            }

            prevBuffer.Set(termBuffer);
            termBuffer.Read(input, fieldInfos);

            termInfo.docFreq      = input.ReadVInt();        // read doc freq
            termInfo.freqPointer += input.ReadVLong();       // read freq pointer
            termInfo.proxPointer += input.ReadVLong();       // read prox pointer

            if (format == -1)
            {
                //  just read skipOffset in order to increment  file pointer;
                // value is never used since skipTo is switched off
                if (!isIndex)
                {
                    if (termInfo.docFreq > formatM1SkipInterval)
                    {
                        termInfo.skipOffset = input.ReadVInt();
                    }
                }
            }
            else
            {
                if (termInfo.docFreq >= skipInterval)
                {
                    termInfo.skipOffset = input.ReadVInt();
                }
            }

            if (isIndex)
            {
                indexPointer += input.ReadVLong();                 // read index pointer
            }
            return(true);
        }
예제 #8
0
        internal static NumericEntry ReadNumericEntry(IndexInput meta)
        {
            NumericEntry entry = new NumericEntry();

            entry.Format            = meta.ReadVInt();
            entry.MissingOffset     = meta.ReadLong();
            entry.PackedIntsVersion = meta.ReadVInt();
            entry.Offset            = meta.ReadLong();
            entry.Count             = meta.ReadVLong();
            entry.BlockSize         = meta.ReadVInt();
            switch (entry.Format)
            {
            case Lucene45DocValuesConsumer.GCD_COMPRESSED:
                entry.MinValue = meta.ReadLong();
                entry.Gcd      = meta.ReadLong();
                break;

            case Lucene45DocValuesConsumer.TABLE_COMPRESSED:
                if (entry.Count > int.MaxValue)
                {
                    throw new Exception("Cannot use TABLE_COMPRESSED with more than MAX_VALUE values, input=" + meta);
                }
                int uniqueValues = meta.ReadVInt();
                if (uniqueValues > 256)
                {
                    throw new Exception("TABLE_COMPRESSED cannot have more than 256 distinct values, input=" + meta);
                }
                entry.Table = new long[uniqueValues];
                for (int i = 0; i < uniqueValues; ++i)
                {
                    entry.Table[i] = meta.ReadLong();
                }
                break;

            case Lucene45DocValuesConsumer.DELTA_COMPRESSED:
                break;

            default:
                throw new Exception("Unknown format: " + entry.Format + ", input=" + meta);
            }
            return(entry);
        }
예제 #9
0
        /// <summary>
        /// Sole constructor. </summary>
        public MonotonicBlockPackedReader(IndexInput @in, int packedIntsVersion, int blockSize, long valueCount, bool direct)
        {
            this.ValueCount = valueCount;
            BlockShift      = PackedInts.CheckBlockSize(blockSize, AbstractBlockPackedWriter.MIN_BLOCK_SIZE, AbstractBlockPackedWriter.MAX_BLOCK_SIZE);
            BlockMask       = blockSize - 1;
            int numBlocks = PackedInts.NumBlocks(valueCount, blockSize);

            MinValues  = new long[numBlocks];
            Averages   = new float[numBlocks];
            SubReaders = new PackedInts.Reader[numBlocks];
            for (int i = 0; i < numBlocks; ++i)
            {
                MinValues[i] = @in.ReadVLong();
                Averages[i]  = Number.IntBitsToFloat(@in.ReadInt());
                int bitsPerValue = @in.ReadVInt();
                if (bitsPerValue > 64)
                {
                    throw new Exception("Corrupted");
                }
                if (bitsPerValue == 0)
                {
                    SubReaders[i] = new PackedInts.NullReader(blockSize);
                }
                else
                {
                    int size = (int)Math.Min(blockSize, valueCount - (long)i * blockSize);
                    if (direct)
                    {
                        long pointer = @in.FilePointer;
                        SubReaders[i] = PackedInts.GetDirectReaderNoHeader(@in, PackedInts.Format.PACKED, packedIntsVersion, size, bitsPerValue);
                        @in.Seek(pointer + PackedInts.Format.PACKED.ByteCount(packedIntsVersion, size, bitsPerValue));
                    }
                    else
                    {
                        SubReaders[i] = PackedInts.GetReaderNoHeader(@in, PackedInts.Format.PACKED, packedIntsVersion, size, bitsPerValue);
                    }
                }
            }
        }
        private BinaryDocValues LoadBytesVarStraight(FieldInfo field)
        {
            string     dataName  = IndexFileNames.SegmentFileName(State.SegmentInfo.Name + "_" + Convert.ToString(field.Number), SegmentSuffix, "dat");
            string     indexName = IndexFileNames.SegmentFileName(State.SegmentInfo.Name + "_" + Convert.ToString(field.Number), SegmentSuffix, "idx");
            IndexInput data      = null;
            IndexInput index     = null;
            bool       success   = false;

            try
            {
                data = Dir.OpenInput(dataName, State.Context);
                CodecUtil.CheckHeader(data, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_CURRENT);
                index = Dir.OpenInput(indexName, State.Context);
                CodecUtil.CheckHeader(index, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_CURRENT);
                long       totalBytes = index.ReadVLong();
                PagedBytes bytes      = new PagedBytes(16);
                bytes.Copy(data, totalBytes);
                PagedBytes.Reader bytesReader = bytes.Freeze(true);
                PackedInts.Reader reader      = PackedInts.GetReader(index);
                CodecUtil.CheckEOF(data);
                CodecUtil.CheckEOF(index);
                success = true;
                RamBytesUsed_Renamed.AddAndGet(bytes.RamBytesUsed() + reader.RamBytesUsed());
                return(new BinaryDocValuesAnonymousInnerClassHelper2(this, bytesReader, reader));
            }
            finally
            {
                if (success)
                {
                    IOUtils.Close(data, index);
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(data, index);
                }
            }
        }
예제 #11
0
        //static final boolean TEST = false;

        public FSTOrdTermsReader(SegmentReadState state, PostingsReaderBase postingsReader)
        {
            string termsIndexFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, FSTOrdTermsWriter.TERMS_INDEX_EXTENSION);
            string termsBlockFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, FSTOrdTermsWriter.TERMS_BLOCK_EXTENSION);

            this.postingsReader = postingsReader;
            ChecksumIndexInput indexIn = null;
            IndexInput         blockIn = null;
            bool success = false;

            try
            {
                indexIn = state.Directory.OpenChecksumInput(termsIndexFileName, state.Context);
                blockIn = state.Directory.OpenInput(termsBlockFileName, state.Context);
                version = ReadHeader(indexIn);
                ReadHeader(blockIn);
                if (version >= FSTOrdTermsWriter.TERMS_VERSION_CHECKSUM)
                {
                    CodecUtil.ChecksumEntireFile(blockIn);
                }

                this.postingsReader.Init(blockIn);
                SeekDir(blockIn);

                FieldInfos fieldInfos = state.FieldInfos;
                int        numFields  = blockIn.ReadVInt();
                for (int i = 0; i < numFields; i++)
                {
                    FieldInfo fieldInfo        = fieldInfos.FieldInfo(blockIn.ReadVInt());
                    bool      hasFreq          = fieldInfo.IndexOptions != FieldInfo.IndexOptions.DOCS_ONLY;
                    long      numTerms         = blockIn.ReadVLong();
                    long      sumTotalTermFreq = hasFreq ? blockIn.ReadVLong() : -1;
                    long      sumDocFreq       = blockIn.ReadVLong();
                    int       docCount         = blockIn.ReadVInt();
                    int       longsSize        = blockIn.ReadVInt();
                    var       index            = new FST <long>(indexIn, PositiveIntOutputs.Singleton);

                    var current  = new TermsReader(fieldInfo, blockIn, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize, index);
                    var previous = fields[fieldInfo.Name] = current;
                    CheckFieldSummary(state.SegmentInfo, indexIn, blockIn, current, previous);
                }
                if (version >= FSTOrdTermsWriter.TERMS_VERSION_CHECKSUM)
                {
                    CodecUtil.CheckFooter(indexIn);
                }
                else
                {
                    CodecUtil.CheckEOF(indexIn);
                }
                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Close(indexIn, blockIn);
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(indexIn, blockIn);
                }
            }
        }
예제 #12
0
        /// <summary>
        /// Sole constructor. </summary>
        public BlockTreeTermsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo info, PostingsReaderBase postingsReader, IOContext ioContext, string segmentSuffix, int indexDivisor)
        {
            if (!InstanceFieldsInitialized)
            {
                InitializeInstanceFields();
                InstanceFieldsInitialized = true;
            }

            this.PostingsReader = postingsReader;

            this.Segment = info.Name;
            @in = dir.OpenInput(IndexFileNames.SegmentFileName(Segment, segmentSuffix, BlockTreeTermsWriter.TERMS_EXTENSION), ioContext);

            bool success = false;
            IndexInput indexIn = null;

            try
            {
                Version = ReadHeader(@in);
                if (indexDivisor != -1)
                {
                    indexIn = dir.OpenInput(IndexFileNames.SegmentFileName(Segment, segmentSuffix, BlockTreeTermsWriter.TERMS_INDEX_EXTENSION), ioContext);
                    int indexVersion = ReadIndexHeader(indexIn);
                    if (indexVersion != Version)
                    {
                        throw new CorruptIndexException("mixmatched version files: " + @in + "=" + Version + "," + indexIn + "=" + indexVersion);
                    }
                }

                // verify
                if (indexIn != null && Version >= BlockTreeTermsWriter.VERSION_CHECKSUM)
                {
                    CodecUtil.ChecksumEntireFile(indexIn);
                }

                // Have PostingsReader init itself
                postingsReader.Init(@in);

                // Read per-field details
                SeekDir(@in, DirOffset);
                if (indexDivisor != -1)
                {
                    SeekDir(indexIn, IndexDirOffset);
                }

                int numFields = @in.ReadVInt();
                if (numFields < 0)
                {
                    throw new CorruptIndexException("invalid numFields: " + numFields + " (resource=" + @in + ")");
                }

                for (int i = 0; i < numFields; i++)
                {
                    int field = @in.ReadVInt();
                    long numTerms = @in.ReadVLong();
                    Debug.Assert(numTerms >= 0);
                    int numBytes = @in.ReadVInt();
                    BytesRef rootCode = new BytesRef(new byte[numBytes]);
                    @in.ReadBytes(rootCode.Bytes, 0, numBytes);
                    rootCode.Length = numBytes;
                    FieldInfo fieldInfo = fieldInfos.FieldInfo(field);
                    Debug.Assert(fieldInfo != null, "field=" + field);
                    long sumTotalTermFreq = fieldInfo.FieldIndexOptions == FieldInfo.IndexOptions.DOCS_ONLY ? -1 : @in.ReadVLong();
                    long sumDocFreq = @in.ReadVLong();
                    int docCount = @in.ReadVInt();
                    int longsSize = Version >= BlockTreeTermsWriter.VERSION_META_ARRAY ? @in.ReadVInt() : 0;
                    if (docCount < 0 || docCount > info.DocCount) // #docs with field must be <= #docs
                    {
                        throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + info.DocCount + " (resource=" + @in + ")");
                    }
                    if (sumDocFreq < docCount) // #postings must be >= #docs with field
                    {
                        throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount + " (resource=" + @in + ")");
                    }
                    if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) // #positions must be >= #postings
                    {
                        throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq + " (resource=" + @in + ")");
                    }
                    long indexStartFP = indexDivisor != -1 ? indexIn.ReadVLong() : 0;

                    if (Fields.ContainsKey(fieldInfo.Name))
                    {
                        throw new CorruptIndexException("duplicate field: " + fieldInfo.Name + " (resource=" + @in + ")");
                    }
                    else
                    {
                        Fields[fieldInfo.Name] = new FieldReader(this, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, indexStartFP, longsSize, indexIn);
                    }
                }
                if (indexDivisor != -1)
                {
                    indexIn.Dispose();
                }

                success = true;
            }
            finally
            {
                if (!success)
                {
                    // this.close() will close in:
                    IOUtils.CloseWhileHandlingException(indexIn, this);
                }
            }
        }
예제 #13
0
        public BlockTermsReader(TermsIndexReaderBase indexReader, Directory dir, FieldInfos fieldInfos, SegmentInfo info,
            PostingsReaderBase postingsReader, IOContext context,
            String segmentSuffix)
        {
            _postingsReader = postingsReader;

            _input =
                dir.OpenInput(
                    IndexFileNames.SegmentFileName(info.Name, segmentSuffix, BlockTermsWriter.TERMS_EXTENSION),
                    context);

            var success = false;
            try
            {
                _version = ReadHeader(_input);

                // Have PostingsReader init itself
                postingsReader.Init(_input);

                // Read per-field details
                SeekDir(_input, _dirOffset);

                int numFields = _input.ReadVInt();
                if (numFields < 0)
                {
                    throw new CorruptIndexException(String.Format("Invalid number of fields: {0}, Resource: {1}",
                        numFields, _input));
                }

                for (var i = 0; i < numFields; i++)
                {
                    var field = _input.ReadVInt();
                    var numTerms = _input.ReadVLong();

                    Debug.Assert(numTerms >= 0);

                    var termsStartPointer = _input.ReadVLong();
                    var fieldInfo = fieldInfos.FieldInfo(field);
                    var sumTotalTermFreq = fieldInfo.FieldIndexOptions == FieldInfo.IndexOptions.DOCS_ONLY
                        ? -1
                        : _input.ReadVLong();
                    var sumDocFreq = _input.ReadVLong();
                    var docCount = _input.ReadVInt();
                    var longsSize = _version >= BlockTermsWriter.VERSION_META_ARRAY ? _input.ReadVInt() : 0;

                    if (docCount < 0 || docCount > info.DocCount)
                    {
                        // #docs with field must be <= #docs
                        throw new CorruptIndexException(
                            String.Format("Invalid DocCount: {0}, MaxDoc: {1}, Resource: {2}", docCount, info.DocCount,
                                _input));
                    }

                    if (sumDocFreq < docCount)
                    {
                        // #postings must be >= #docs with field
                        throw new CorruptIndexException(
                            String.Format("Invalid sumDocFreq: {0}, DocCount: {1}, Resource: {2}", sumDocFreq, docCount,
                                _input));
                    }

                    if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq)
                    {
                        // #positions must be >= #postings
                        throw new CorruptIndexException(
                            String.Format("Invalid sumTotalTermFreq: {0}, sumDocFreq: {1}, Resource: {2}",
                                sumTotalTermFreq, sumDocFreq, _input));
                    }

                    try
                    {
                        _fields.Add(fieldInfo.Name,
                            new FieldReader(fieldInfo, this, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq,
                                docCount,
                                longsSize));
                    }
                    catch (ArgumentException)
                    {
                        throw new CorruptIndexException(String.Format("Duplicate fields: {0}, Resource: {1}",
                            fieldInfo.Name, _input));
                    }

                }
                success = true;
            }
            finally
            {
                if (!success)
                {
                    _input.Dispose();
                }
            }

            _indexReader = indexReader;
        }
예제 #14
0
        public BlockTermsReader(TermsIndexReaderBase indexReader, Directory dir, FieldInfos fieldInfos, SegmentInfo info,
                                PostingsReaderBase postingsReader, IOContext context,
                                String segmentSuffix)
        {
            _postingsReader = postingsReader;

            _input =
                dir.OpenInput(
                    IndexFileNames.SegmentFileName(info.Name, segmentSuffix, BlockTermsWriter.TERMS_EXTENSION),
                    context);

            var success = false;

            try
            {
                _version = ReadHeader(_input);

                // Have PostingsReader init itself
                postingsReader.Init(_input);

                // Read per-field details
                SeekDir(_input, _dirOffset);

                int numFields = _input.ReadVInt();
                if (numFields < 0)
                {
                    throw new CorruptIndexException(String.Format("Invalid number of fields: {0}, Resource: {1}",
                                                                  numFields, _input));
                }

                for (var i = 0; i < numFields; i++)
                {
                    var field    = _input.ReadVInt();
                    var numTerms = _input.ReadVLong();

                    Debug.Assert(numTerms >= 0);

                    var termsStartPointer = _input.ReadVLong();
                    var fieldInfo         = fieldInfos.FieldInfo(field);
                    var sumTotalTermFreq  = fieldInfo.FieldIndexOptions == FieldInfo.IndexOptions.DOCS_ONLY
                        ? -1
                        : _input.ReadVLong();
                    var sumDocFreq = _input.ReadVLong();
                    var docCount   = _input.ReadVInt();
                    var longsSize  = _version >= BlockTermsWriter.VERSION_META_ARRAY ? _input.ReadVInt() : 0;

                    if (docCount < 0 || docCount > info.DocCount)
                    {
                        // #docs with field must be <= #docs
                        throw new CorruptIndexException(
                                  String.Format("Invalid DocCount: {0}, MaxDoc: {1}, Resource: {2}", docCount, info.DocCount,
                                                _input));
                    }

                    if (sumDocFreq < docCount)
                    {
                        // #postings must be >= #docs with field
                        throw new CorruptIndexException(
                                  String.Format("Invalid sumDocFreq: {0}, DocCount: {1}, Resource: {2}", sumDocFreq, docCount,
                                                _input));
                    }

                    if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq)
                    {
                        // #positions must be >= #postings
                        throw new CorruptIndexException(
                                  String.Format("Invalid sumTotalTermFreq: {0}, sumDocFreq: {1}, Resource: {2}",
                                                sumTotalTermFreq, sumDocFreq, _input));
                    }

                    try
                    {
                        _fields.Add(fieldInfo.Name,
                                    new FieldReader(fieldInfo, this, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq,
                                                    docCount,
                                                    longsSize));
                    }
                    catch (ArgumentException)
                    {
                        throw new CorruptIndexException(String.Format("Duplicate fields: {0}, Resource: {1}",
                                                                      fieldInfo.Name, _input));
                    }
                }
                success = true;
            }
            finally
            {
                if (!success)
                {
                    _input.Dispose();
                }
            }

            _indexReader = indexReader;
        }
예제 #15
0
        public VariableGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor,
                                           String segmentSuffix, IOContext context)
        {
            _input =
                dir.OpenInput(
                    IndexFileNames.SegmentFileName(segment, segmentSuffix,
                                                   VariableGapTermsIndexWriter.TERMS_INDEX_EXTENSION), new IOContext(context, true));
            var success = false;

            Debug.Assert(indexDivisor == -1 || indexDivisor > 0);

            try
            {
                _version      = ReadHeader(_input);
                _indexDivisor = indexDivisor;

                if (_version >= VariableGapTermsIndexWriter.VERSION_CHECKSUM)
                {
                    CodecUtil.ChecksumEntireFile(_input);
                }

                SeekDir(_input, _dirOffset);

                // Read directory
                var numFields = _input.ReadVInt();
                if (numFields < 0)
                {
                    throw new CorruptIndexException("invalid numFields: " + numFields + " (resource=" + _input + ")");
                }

                for (var i = 0; i < numFields; i++)
                {
                    var field      = _input.ReadVInt();
                    var indexStart = _input.ReadVLong();
                    var fieldInfo  = fieldInfos.FieldInfo(field);

                    try
                    {
                        _fields.Add(fieldInfo, new FieldIndexData(indexStart, this));
                    }
                    catch (ArgumentException)
                    {
                        throw new CorruptIndexException(String.Format("Duplicate Field: {0}, Resource: {1}",
                                                                      fieldInfo.Name, _input));
                    }
                }
                success = true;
            }
            finally
            {
                if (indexDivisor > 0)
                {
                    _input.Dispose();
                    _input = null;
                    if (success)
                    {
                        _indexLoaded = true;
                    }
                }
            }
        }
예제 #16
0
        private void ReadFields(IndexInput meta, FieldInfos infos)
        {
            int fieldNumber = meta.ReadVInt();

            while (fieldNumber != -1)
            {
                // check should be: infos.fieldInfo(fieldNumber) != null, which incorporates negative check
                // but docvalues updates are currently buggy here (loading extra stuff, etc): LUCENE-5616
                if (fieldNumber < 0)
                {
                    // trickier to validate more: because we re-use for norms, because we use multiple entries
                    // for "composite" types like sortedset, etc.
                    throw new CorruptIndexException("Invalid field number: " + fieldNumber + ", input=" + meta);
                }
                int fieldType = meta.ReadByte();
                if (fieldType == NUMBER)
                {
                    var entry = new NumericEntry {
                        Offset = meta.ReadLong(), Format = (sbyte)meta.ReadByte()
                    };
                    switch (entry.Format)
                    {
                    case DELTA_COMPRESSED:
                    case TABLE_COMPRESSED:
                    case GCD_COMPRESSED:
                    case UNCOMPRESSED:
                        break;

                    default:
                        throw new CorruptIndexException("Unknown format: " + entry.Format + ", input=" + meta);
                    }
                    if (entry.Format != UNCOMPRESSED)
                    {
                        entry.PackedIntsVersion = meta.ReadVInt();
                    }
                    Numerics[fieldNumber] = entry;
                }
                else if (fieldType == BYTES)
                {
                    BinaryEntry entry = new BinaryEntry();
                    entry.Offset    = meta.ReadLong();
                    entry.NumBytes  = meta.ReadLong();
                    entry.MinLength = meta.ReadVInt();
                    entry.MaxLength = meta.ReadVInt();
                    if (entry.MinLength != entry.MaxLength)
                    {
                        entry.PackedIntsVersion = meta.ReadVInt();
                        entry.BlockSize         = meta.ReadVInt();
                    }
                    Binaries[fieldNumber] = entry;
                }
                else if (fieldType == FST)
                {
                    FSTEntry entry = new FSTEntry();
                    entry.Offset      = meta.ReadLong();
                    entry.NumOrds     = meta.ReadVLong();
                    Fsts[fieldNumber] = entry;
                }
                else
                {
                    throw new CorruptIndexException("invalid entry type: " + fieldType + ", input=" + meta);
                }
                fieldNumber = meta.ReadVInt();
            }
        }
예제 #17
0
            public TermsReader(FieldInfos fieldInfos, IndexInput @in, int termCount)
            {
                this.termCount = termCount;
                int fieldNumber = @in.ReadVInt();
                field = fieldInfos.FieldInfo(fieldNumber);
                if (field.FieldIndexOptions != IndexOptions.DOCS_ONLY)
                {
                    sumTotalTermFreq = @in.ReadVLong();
                }
                else
                {
                    sumTotalTermFreq = -1;
                }
                sumDocFreq = @in.ReadVLong();
                docCount = @in.ReadVInt();

                fst = new FST<BytesRef>(@in, outputs);
            }
        internal readonly PackedInts.Reader[] StartPointersDeltas; // delta from the avg

        // It is the responsibility of the caller to close fieldsIndexIn after this constructor
        // has been called
        internal CompressingStoredFieldsIndexReader(IndexInput fieldsIndexIn, SegmentInfo si)
        {
            MaxDoc = si.DocCount;
            int[]  docBases      = new int[16];
            long[] startPointers = new long[16];
            int[]  avgChunkDocs  = new int[16];
            long[] avgChunkSizes = new long[16];
            PackedInts.Reader[] docBasesDeltas      = new PackedInts.Reader[16];
            PackedInts.Reader[] startPointersDeltas = new PackedInts.Reader[16];

            int packedIntsVersion = fieldsIndexIn.ReadVInt();

            int blockCount = 0;

            for (; ;)
            {
                int numChunks = fieldsIndexIn.ReadVInt();
                if (numChunks == 0)
                {
                    break;
                }
                if (blockCount == docBases.Length)
                {
                    int newSize = ArrayUtil.Oversize(blockCount + 1, 8);
                    docBases            = Arrays.CopyOf(docBases, newSize);
                    startPointers       = Arrays.CopyOf(startPointers, newSize);
                    avgChunkDocs        = Arrays.CopyOf(avgChunkDocs, newSize);
                    avgChunkSizes       = Arrays.CopyOf(avgChunkSizes, newSize);
                    docBasesDeltas      = Arrays.CopyOf(docBasesDeltas, newSize);
                    startPointersDeltas = Arrays.CopyOf(startPointersDeltas, newSize);
                }

                // doc bases
                docBases[blockCount]     = fieldsIndexIn.ReadVInt();
                avgChunkDocs[blockCount] = fieldsIndexIn.ReadVInt();
                int bitsPerDocBase = fieldsIndexIn.ReadVInt();
                if (bitsPerDocBase > 32)
                {
                    throw new CorruptIndexException("Corrupted bitsPerDocBase (resource=" + fieldsIndexIn + ")");
                }
                docBasesDeltas[blockCount] = PackedInts.GetReaderNoHeader(fieldsIndexIn, PackedInts.Format.PACKED, packedIntsVersion, numChunks, bitsPerDocBase);

                // start pointers
                startPointers[blockCount] = fieldsIndexIn.ReadVLong();
                avgChunkSizes[blockCount] = fieldsIndexIn.ReadVLong();
                int bitsPerStartPointer = fieldsIndexIn.ReadVInt();
                if (bitsPerStartPointer > 64)
                {
                    throw new CorruptIndexException("Corrupted bitsPerStartPointer (resource=" + fieldsIndexIn + ")");
                }
                startPointersDeltas[blockCount] = PackedInts.GetReaderNoHeader(fieldsIndexIn, PackedInts.Format.PACKED, packedIntsVersion, numChunks, bitsPerStartPointer);

                ++blockCount;
            }

            this.DocBases            = Arrays.CopyOf(docBases, blockCount);
            this.StartPointers       = Arrays.CopyOf(startPointers, blockCount);
            this.AvgChunkDocs        = Arrays.CopyOf(avgChunkDocs, blockCount);
            this.AvgChunkSizes       = Arrays.CopyOf(avgChunkSizes, blockCount);
            this.DocBasesDeltas      = Arrays.CopyOf(docBasesDeltas, blockCount);
            this.StartPointersDeltas = Arrays.CopyOf(startPointersDeltas, blockCount);
        }
        /// <summary>
        /// Reads the snapshots information from the given <seealso cref="Directory"/>. this
        /// method can be used if the snapshots information is needed, however you
        /// cannot instantiate the deletion policy (because e.g., some other process
        /// keeps a lock on the snapshots directory).
        /// </summary>
        private void LoadPriorSnapshots()
        {
            lock (this)
            {
                long           genLoaded     = -1;
                IOException    ioe           = null;
                IList <string> snapshotFiles = new List <string>();
                foreach (string file in Dir.ListAll())
                {
                    if (file.StartsWith(SNAPSHOTS_PREFIX))
                    {
                        long gen = Convert.ToInt64(file.Substring(SNAPSHOTS_PREFIX.Length));
                        if (genLoaded == -1 || gen > genLoaded)
                        {
                            snapshotFiles.Add(file);
                            IDictionary <long, int> m = new Dictionary <long, int>();
                            IndexInput @in            = Dir.OpenInput(file, IOContext.DEFAULT);
                            try
                            {
                                CodecUtil.CheckHeader(@in, CODEC_NAME, VERSION_START, VERSION_START);
                                int count = @in.ReadVInt();
                                for (int i = 0; i < count; i++)
                                {
                                    long commitGen = @in.ReadVLong();
                                    int  refCount  = @in.ReadVInt();
                                    m[commitGen] = refCount;
                                }
                            }
                            catch (IOException ioe2)
                            {
                                // Save first exception & throw in the end
                                if (ioe == null)
                                {
                                    ioe = ioe2;
                                }
                            }
                            finally
                            {
                                @in.Dispose();
                            }

                            genLoaded = gen;
                            RefCounts.Clear();
                            RefCounts.PutAll(m);
                        }
                    }
                }

                if (genLoaded == -1)
                {
                    // Nothing was loaded...
                    if (ioe != null)
                    {
                        // ... not for lack of trying:
                        throw ioe;
                    }
                }
                else
                {
                    if (snapshotFiles.Count > 1)
                    {
                        // Remove any broken / old snapshot files:
                        string curFileName = SNAPSHOTS_PREFIX + genLoaded;
                        foreach (string file in snapshotFiles)
                        {
                            if (!curFileName.Equals(file))
                            {
                                Dir.DeleteFile(file);
                            }
                        }
                    }
                    NextWriteGen = 1 + genLoaded;
                }
            }
        }
예제 #20
0
        private void ReadFields(IndexInput meta, FieldInfos infos)
        {
            int fieldNumber = meta.ReadVInt();

            while (fieldNumber != -1)
            {
                int fieldType = meta.ReadByte();
                if (fieldType == NUMBER)
                {
                    var entry = new NumericEntry {
                        offset = meta.ReadLong(), missingOffset = meta.ReadLong()
                    };
                    if (entry.missingOffset != -1)
                    {
                        entry.missingBytes = meta.ReadLong();
                    }
                    else
                    {
                        entry.missingBytes = 0;
                    }
                    entry.format = meta.ReadByte();
                    switch (entry.format)
                    {
                    case DELTA_COMPRESSED:
                    case TABLE_COMPRESSED:
                    case GCD_COMPRESSED:
                    case UNCOMPRESSED:
                        break;

                    default:
                        throw new CorruptIndexException("Unknown format: " + entry.format + ", input=" + meta);
                    }
                    if (entry.format != UNCOMPRESSED)
                    {
                        entry.packedIntsVersion = meta.ReadVInt();
                    }
                    numerics[fieldNumber] = entry;
                }
                else if (fieldType == BYTES)
                {
                    var entry = new BinaryEntry
                    {
                        offset        = meta.ReadLong(),
                        numBytes      = meta.ReadLong(),
                        missingOffset = meta.ReadLong()
                    };
                    if (entry.missingOffset != -1)
                    {
                        entry.missingBytes = meta.ReadLong();
                    }
                    else
                    {
                        entry.missingBytes = 0;
                    }
                    entry.minLength = meta.ReadVInt();
                    entry.maxLength = meta.ReadVInt();
                    if (entry.minLength != entry.maxLength)
                    {
                        entry.packedIntsVersion = meta.ReadVInt();
                        entry.blockSize         = meta.ReadVInt();
                    }
                    binaries[fieldNumber] = entry;
                }
                else if (fieldType == FST)
                {
                    var entry = new FSTEntry {
                        offset = meta.ReadLong(), numOrds = meta.ReadVLong()
                    };
                    fsts[fieldNumber] = entry;
                }
                else
                {
                    throw new CorruptIndexException("invalid entry type: " + fieldType + ", input=" + meta);
                }
                fieldNumber = meta.ReadVInt();
            }
        }
예제 #21
0
        public FixedGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor,
                                        IComparer <BytesRef> termComp, String segmentSuffix, IOContext context)
        {
            _termComp = termComp;

            Debug.Assert(indexDivisor == -1 || indexDivisor > 0);

            _input =
                dir.OpenInput(
                    IndexFileNames.SegmentFileName(segment, segmentSuffix,
                                                   FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION),
                    context);

            var success = false;

            try
            {
                _version = ReadHeader(_input);

                if (_version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM)
                {
                    CodecUtil.ChecksumEntireFile(_input);
                }

                indexInterval = _input.ReadInt();

                if (indexInterval < 1)
                {
                    throw new CorruptIndexException(String.Format("Invalid indexInterval: {0}, Resource: {1}",
                                                                  indexInterval, _input));
                }

                _indexDivisor = indexDivisor;

                if (indexDivisor < 0)
                {
                    _totalIndexInterval = indexInterval;
                }
                else
                {
                    // In case terms index gets loaded, later, on demand
                    _totalIndexInterval = indexInterval * indexDivisor;
                }

                Debug.Assert(_totalIndexInterval > 0);

                SeekDir(_input, _dirOffset);

                // Read directory
                int numFields = _input.ReadVInt();

                if (numFields < 0)
                {
                    throw new CorruptIndexException(String.Format("Invalid numFields: {0}, Resource: {1}", numFields,
                                                                  _input));
                }

                for (int i = 0; i < numFields; i++)
                {
                    int field         = _input.ReadVInt();
                    int numIndexTerms = _input.ReadVInt();
                    if (numIndexTerms < 0)
                    {
                        throw new CorruptIndexException(String.Format("Invalid numIndexTerms: {0}, Resource: {1}",
                                                                      numIndexTerms,
                                                                      _input));
                    }

                    long termsStart         = _input.ReadVLong();
                    long indexStart         = _input.ReadVLong();
                    long packedIndexStart   = _input.ReadVLong();
                    long packedOffsetsStart = _input.ReadVLong();

                    if (packedIndexStart < indexStart)
                    {
                        throw new CorruptIndexException(
                                  String.Format(
                                      "Invalid packedIndexStart: {0}, IndexStart: {1}, NumIndexTerms: {2}, Resource: {3}",
                                      packedIndexStart,
                                      indexStart, numIndexTerms, _input));
                    }

                    FieldInfo fieldInfo = fieldInfos.FieldInfo(field);

                    try
                    {
                        _fields.Add(fieldInfo,
                                    new FieldIndexData(numIndexTerms, indexStart, termsStart, packedIndexStart,
                                                       packedOffsetsStart, this));
                    }
                    catch (ArgumentException)
                    {
                        throw new CorruptIndexException(String.Format("Duplicate field: {0}, Resource {1}",
                                                                      fieldInfo.Name,
                                                                      _input));
                    }
                }
                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.CloseWhileHandlingException(_input);
                }
                if (indexDivisor > 0)
                {
                    _input.Dispose();
                    _input = null;
                    if (success)
                    {
                        _indexLoaded = true;
                    }

                    _termBytesReader = _termBytes.Freeze(true);
                }
            }
        }
        public FixedGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor,
            IComparer<BytesRef> termComp, String segmentSuffix, IOContext context)
        {
            _termComp = termComp;

            Debug.Assert(indexDivisor == -1 || indexDivisor > 0);

            _input =
                dir.OpenInput(
                    IndexFileNames.SegmentFileName(segment, segmentSuffix,
                        FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION),
                    context);

            var success = false;

            try
            {

                _version = ReadHeader(_input);

                if (_version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM)
                    CodecUtil.ChecksumEntireFile(_input);
                
                indexInterval = _input.ReadInt();
                
                if (indexInterval < 1)
                {
                    throw new CorruptIndexException(String.Format("Invalid indexInterval: {0}, Resource: {1}",
                        indexInterval, _input));
                }

                _indexDivisor = indexDivisor;

                if (indexDivisor < 0)
                {
                    _totalIndexInterval = indexInterval;
                }
                else
                {
                    // In case terms index gets loaded, later, on demand
                    _totalIndexInterval = indexInterval*indexDivisor;
                }

                Debug.Assert(_totalIndexInterval > 0);

                SeekDir(_input, _dirOffset);

                // Read directory
                int numFields = _input.ReadVInt();

                if (numFields < 0)
                    throw new CorruptIndexException(String.Format("Invalid numFields: {0}, Resource: {1}", numFields,
                        _input));

                for (int i = 0; i < numFields; i++)
                {
                    int field = _input.ReadVInt();
                    int numIndexTerms = _input.ReadVInt();
                    if (numIndexTerms < 0)
                        throw new CorruptIndexException(String.Format("Invalid numIndexTerms: {0}, Resource: {1}",
                            numIndexTerms,
                            _input));

                    long termsStart = _input.ReadVLong();
                    long indexStart = _input.ReadVLong();
                    long packedIndexStart = _input.ReadVLong();
                    long packedOffsetsStart = _input.ReadVLong();

                    if (packedIndexStart < indexStart)
                        throw new CorruptIndexException(
                            String.Format(
                                "Invalid packedIndexStart: {0}, IndexStart: {1}, NumIndexTerms: {2}, Resource: {3}",
                                packedIndexStart,
                                indexStart, numIndexTerms, _input));

                    FieldInfo fieldInfo = fieldInfos.FieldInfo(field);

                    try
                    {
                        _fields.Add(fieldInfo,
                            new FieldIndexData(numIndexTerms, indexStart, termsStart, packedIndexStart,
                                packedOffsetsStart, this));
                    }
                    catch (ArgumentException)
                    {
                        throw new CorruptIndexException(String.Format("Duplicate field: {0}, Resource {1}",
                            fieldInfo.Name,
                            _input));
                    }


                }
                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.CloseWhileHandlingException(_input);
                }
                if (indexDivisor > 0)
                {
                    _input.Dispose();
                    _input = null;
                    if (success)
                        _indexLoaded = true;

                    _termBytesReader = _termBytes.Freeze(true);
                }
            }
        }
예제 #23
0
        public virtual void  Get(int docNum, System.String field, TermVectorMapper mapper)
        {
            if (tvx != null)
            {
                int fieldNumber = fieldInfos.FieldNumber(field);
                //We need to account for the FORMAT_SIZE at when seeking in the tvx
                //We don't need to do this in other seeks because we already have the
                // file pointer
                //that was written in another file
                SeekTvx(docNum);
                //System.out.println("TVX Pointer: " + tvx.getFilePointer());
                long tvdPosition = tvx.ReadLong();

                tvd.Seek(tvdPosition);
                int fieldCount = tvd.ReadVInt();
                //System.out.println("Num Fields: " + fieldCount);
                // There are only a few fields per document. We opt for a full scan
                // rather then requiring that they be ordered. We need to read through
                // all of the fields anyway to get to the tvf pointers.
                int number = 0;
                int found  = -1;
                for (int i = 0; i < fieldCount; i++)
                {
                    if (format >= FORMAT_VERSION)
                    {
                        number = tvd.ReadVInt();
                    }
                    else
                    {
                        number += tvd.ReadVInt();
                    }

                    if (number == fieldNumber)
                    {
                        found = i;
                    }
                }

                // This field, although valid in the segment, was not found in this
                // document
                if (found != -1)
                {
                    // Compute position in the tvf file
                    long position;
                    if (format >= FORMAT_VERSION2)
                    {
                        position = tvx.ReadLong();
                    }
                    else
                    {
                        position = tvd.ReadVLong();
                    }
                    for (int i = 1; i <= found; i++)
                    {
                        position += tvd.ReadVLong();
                    }

                    mapper.SetDocumentNumber(docNum);
                    ReadTermVector(field, position, mapper);
                }
                else
                {
                    //System.out.println("Fieldable not found");
                }
            }
            else
            {
                //System.out.println("No tvx file");
            }
        }