Exemple #1
0
        internal static BinaryEntry ReadBinaryEntry(IndexInput meta)
        {
            BinaryEntry entry = new BinaryEntry();

            entry.format        = meta.ReadVInt32();
            entry.missingOffset = meta.ReadInt64();
            entry.minLength     = meta.ReadVInt32();
            entry.maxLength     = meta.ReadVInt32();
            entry.Count         = meta.ReadVInt64();
            entry.offset        = meta.ReadInt64();
            switch (entry.format)
            {
            case Lucene45DocValuesConsumer.BINARY_FIXED_UNCOMPRESSED:
                break;

            case Lucene45DocValuesConsumer.BINARY_PREFIX_COMPRESSED:
                entry.AddressInterval     = meta.ReadVInt32();
                entry.AddressesOffset     = meta.ReadInt64();
                entry.PackedInt32sVersion = meta.ReadVInt32();
                entry.BlockSize           = meta.ReadVInt32();
                break;

            case Lucene45DocValuesConsumer.BINARY_VARIABLE_UNCOMPRESSED:
                entry.AddressesOffset     = meta.ReadInt64();
                entry.PackedInt32sVersion = meta.ReadVInt32();
                entry.BlockSize           = meta.ReadVInt32();
                break;

            default:
                throw new Exception("Unknown format: " + entry.format + ", input=" + meta);
            }
            return(entry);
        }
Exemple #2
0
        /// <summary>
        /// NOTE: This was loadVarIntsField() in Lucene.
        /// </summary>
        private NumericDocValues LoadVarInt32sField(/*FieldInfo field, // LUCENENET: Never read */ IndexInput input)
        {
            CodecUtil.CheckHeader(input, Lucene40DocValuesFormat.VAR_INTS_CODEC_NAME, Lucene40DocValuesFormat.VAR_INTS_VERSION_START, Lucene40DocValuesFormat.VAR_INTS_VERSION_CURRENT);
            var header = (sbyte)input.ReadByte();

            if (header == Lucene40DocValuesFormat.VAR_INTS_FIXED_64)
            {
                int maxDoc = state.SegmentInfo.DocCount;
                var values = new long[maxDoc];
                for (int i = 0; i < values.Length; i++)
                {
                    values[i] = input.ReadInt64();
                }
                ramBytesUsed.AddAndGet(RamUsageEstimator.SizeOf(values));
                return(new NumericDocValuesAnonymousClass(values));
            }
            else if (header == Lucene40DocValuesFormat.VAR_INTS_PACKED)
            {
                long minValue              = input.ReadInt64();
                long defaultValue          = input.ReadInt64();
                PackedInt32s.Reader reader = PackedInt32s.GetReader(input);
                ramBytesUsed.AddAndGet(reader.RamBytesUsed());
                return(new NumericDocValuesAnonymousClass2(minValue, defaultValue, reader));
            }
            else
            {
                throw new CorruptIndexException("invalid VAR_INTS header byte: " + header + " (resource=" + input + ")");
            }
        }
        public override void VisitDocument(int n, StoredFieldVisitor visitor)
        {
            SeekIndex(n);
            fieldsStream.Seek(indexStream.ReadInt64());

            int numFields = fieldsStream.ReadVInt32();

            for (int fieldIDX = 0; fieldIDX < numFields; fieldIDX++)
            {
                int       fieldNumber = fieldsStream.ReadVInt32();
                FieldInfo fieldInfo   = fieldInfos.FieldInfo(fieldNumber);

                int bits = fieldsStream.ReadByte() & 0xFF;
                Debug.Assert(bits <= (Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_MASK | Lucene40StoredFieldsWriter.FIELD_IS_BINARY), "bits=" + bits.ToString("x"));

                switch (visitor.NeedsField(fieldInfo))
                {
                case StoredFieldVisitor.Status.YES:
                    ReadField(visitor, fieldInfo, bits);
                    break;

                case StoredFieldVisitor.Status.NO:
                    SkipField(bits);
                    break;

                case StoredFieldVisitor.Status.STOP:
                    return;
                }
            }
        }
        /// <summary>
        /// Retrieve the length (in bytes) of the tvd and tvf
        /// entries for the next <paramref name="numDocs"/> starting with
        /// <paramref name="startDocID"/>.  This is used for bulk copying when
        /// merging segments, if the field numbers are
        /// congruent.  Once this returns, the tvf &amp; tvd streams
        /// are seeked to the <paramref name="startDocID"/>.
        /// </summary>
        internal void RawDocs(int[] tvdLengths, int[] tvfLengths, int startDocID, int numDocs)
        {
            if (tvx == null)
            {
                Arrays.Fill(tvdLengths, 0);
                Arrays.Fill(tvfLengths, 0);
                return;
            }

            SeekTvx(startDocID);

            long tvdPosition = tvx.ReadInt64();

            tvd.Seek(tvdPosition);

            long tvfPosition = tvx.ReadInt64();

            tvf.Seek(tvfPosition);

            long lastTvdPosition = tvdPosition;
            long lastTvfPosition = tvfPosition;

            int count = 0;

            while (count < numDocs)
            {
                int docID = startDocID + count + 1;
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(docID <= numTotalDocs);
                }
                if (docID < numTotalDocs)
                {
                    tvdPosition = tvx.ReadInt64();
                    tvfPosition = tvx.ReadInt64();
                }
                else
                {
                    tvdPosition = tvd.Length;
                    tvfPosition = tvf.Length;
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(count == numDocs - 1);
                    }
                }
                tvdLengths[count] = (int)(tvdPosition - lastTvdPosition);
                tvfLengths[count] = (int)(tvfPosition - lastTvfPosition);
                count++;
                lastTvdPosition = tvdPosition;
                lastTvfPosition = tvfPosition;
            }
        }
Exemple #5
0
 private void SeekDir(IndexInput input, long dirOffset)
 {
     if (version >= BlockTermsWriter.VERSION_CHECKSUM)
     {
         input.Seek(input.Length - CodecUtil.FooterLength() - 8);
         dirOffset = input.ReadInt64();
     }
     else if (version >= BlockTermsWriter.VERSION_APPEND_ONLY)
     {
         input.Seek(input.Length - 8);
         dirOffset = input.ReadInt64();
     }
     input.Seek(dirOffset);
 }
Exemple #6
0
        public override sealed void VisitDocument(int n, StoredFieldVisitor visitor)
        {
            SeekIndex(n);
            fieldsStream.Seek(indexStream.ReadInt64());

            int numFields = fieldsStream.ReadVInt32();

            for (int fieldIDX = 0; fieldIDX < numFields; fieldIDX++)
            {
                int       fieldNumber = fieldsStream.ReadVInt32();
                FieldInfo fieldInfo   = fieldInfos.FieldInfo(fieldNumber);

                int bits = fieldsStream.ReadByte() & 0xFF;
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(bits <= (FIELD_IS_NUMERIC_MASK | FIELD_IS_BINARY), "bits={0:x}", bits);
                }

                switch (visitor.NeedsField(fieldInfo))
                {
                case StoredFieldVisitor.Status.YES:
                    ReadField(visitor, fieldInfo, bits);
                    break;

                case StoredFieldVisitor.Status.NO:
                    SkipField(bits);
                    break;

                case StoredFieldVisitor.Status.STOP:
                    return;
                }
            }
        }
Exemple #7
0
        protected override void SeekDir(IndexInput input, long dirOffset)
        {
            input.Seek(input.Length - sizeof(long) / 8);
            long offset = input.ReadInt64();

            input.Seek(offset);
        }
Exemple #8
0
        public static void ReadLegacyInfos(SegmentInfos infos, Directory directory, IndexInput input, int format)
        {
            infos.Version = input.ReadInt64(); // read version
            infos.Counter = input.ReadInt32(); // read counter
            Lucene3xSegmentInfoReader reader = new Lucene3xSegmentInfoReader();

            for (int i = input.ReadInt32(); i > 0; i--) // read segmentInfos
            {
                SegmentCommitInfo siPerCommit = reader.ReadLegacySegmentInfo(directory, format, input);
                SegmentInfo       si          = siPerCommit.Info;

                if (si.Version == null)
                {
                    // Could be a 3.0 - try to open the doc stores - if it fails, it's a
                    // 2.x segment, and an IndexFormatTooOldException will be thrown,
                    // which is what we want.
                    Directory dir = directory;
                    if (Lucene3xSegmentInfoFormat.GetDocStoreOffset(si) != -1)
                    {
                        if (Lucene3xSegmentInfoFormat.GetDocStoreIsCompoundFile(si))
                        {
                            dir = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(Lucene3xSegmentInfoFormat.GetDocStoreSegment(si), "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), IOContext.READ_ONCE, false);
                        }
                    }
                    else if (si.UseCompoundFile)
                    {
                        dir = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), IOContext.READ_ONCE, false);
                    }

                    try
                    {
                        Lucene3xStoredFieldsReader.CheckCodeVersion(dir, Lucene3xSegmentInfoFormat.GetDocStoreSegment(si));
                    }
                    finally
                    {
                        // If we opened the directory, close it
                        if (dir != directory)
                        {
                            dir.Dispose();
                        }
                    }

                    // Above call succeeded, so it's a 3.0 segment. Upgrade it so the next
                    // time the segment is read, its version won't be null and we won't
                    // need to open FieldsReader every time for each such segment.
                    si.Version = "3.0";
                }
                else if (si.Version.Equals("2.x", StringComparison.Ordinal))
                {
                    // If it's a 3x index touched by 3.1+ code, then segments record their
                    // version, whether they are 2.x ones or not. We detect that and throw
                    // appropriate exception.
                    throw new IndexFormatTooOldException("segment " + si.Name + " in resource " + input, si.Version);
                }
                infos.Add(siPerCommit);
            }

            infos.UserData = input.ReadStringStringMap();
        }
        private void ReadField(StoredFieldVisitor visitor, FieldInfo info, int bits)
        {
            int numeric = bits & Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_MASK;

            if (numeric != 0)
            {
                switch (numeric)
                {
                case Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_INT:
                    visitor.Int32Field(info, fieldsStream.ReadInt32());
                    return;

                case Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_LONG:
                    visitor.Int64Field(info, fieldsStream.ReadInt64());
                    return;

                case Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_FLOAT:
                    visitor.SingleField(info, J2N.BitConversion.Int32BitsToSingle(fieldsStream.ReadInt32()));
                    return;

                case Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_DOUBLE:
                    visitor.DoubleField(info, J2N.BitConversion.Int64BitsToDouble(fieldsStream.ReadInt64()));
                    return;

                default:
                    throw new CorruptIndexException("Invalid numeric type: " + numeric.ToString("x"));
                }
            }
            else
            {
                int length = fieldsStream.ReadVInt32();
                var bytes  = new byte[length];
                fieldsStream.ReadBytes(bytes, 0, length);
                if ((bits & Lucene40StoredFieldsWriter.FIELD_IS_BINARY) != 0)
                {
                    visitor.BinaryField(info, bytes);
                }
                else
                {
#pragma warning disable 612, 618
                    visitor.StringField(info, IOUtils.CHARSET_UTF_8.GetString(bytes));
#pragma warning restore 612, 618
                }
            }
        }
Exemple #10
0
        private NumericEntry ReadNumericEntry(IndexInput meta)
        {
            var entry = new NumericEntry {
                offset = meta.ReadInt64(), count = meta.ReadInt32(), missingOffset = meta.ReadInt64()
            };

            if (entry.missingOffset != -1)
            {
                entry.missingBytes = meta.ReadInt64();
            }
            else
            {
                entry.missingBytes = 0;
            }
            entry.byteWidth = meta.ReadByte();

            return(entry);
        }
        private static NumericEntry ReadNumericEntry(IndexInput meta) // LUCENENET: CA1822: Mark members as static
        {
            var entry = new NumericEntry {
                offset = meta.ReadInt64(), count = meta.ReadInt32(), missingOffset = meta.ReadInt64()
            };

            if (entry.missingOffset != -1)
            {
                entry.missingBytes = meta.ReadInt64();
            }
            else
            {
                entry.missingBytes = 0;
            }
            entry.byteWidth = meta.ReadByte();

            return(entry);
        }
        private static BinaryEntry ReadBinaryEntry(IndexInput meta) // LUCENENET: CA1822: Mark members as static
        {
            var entry = new BinaryEntry();

            entry.offset        = meta.ReadInt64();
            entry.numBytes      = meta.ReadInt32();
            entry.count         = meta.ReadInt32();
            entry.missingOffset = meta.ReadInt64();
            if (entry.missingOffset != -1)
            {
                entry.missingBytes = meta.ReadInt64();
            }
            else
            {
                entry.missingBytes = 0;
            }

            return(entry);
        }
Exemple #13
0
        private BinaryEntry ReadBinaryEntry(IndexInput meta)
        {
            var entry = new BinaryEntry();

            entry.offset        = meta.ReadInt64();
            entry.numBytes      = meta.ReadInt32();
            entry.count         = meta.ReadInt32();
            entry.missingOffset = meta.ReadInt64();
            if (entry.missingOffset != -1)
            {
                entry.missingBytes = meta.ReadInt64();
            }
            else
            {
                entry.missingBytes = 0;
            }

            return(entry);
        }
Exemple #14
0
        internal static NumericEntry ReadNumericEntry(IndexInput meta)
        {
            NumericEntry entry = new NumericEntry();

            entry.format              = meta.ReadVInt32();
            entry.missingOffset       = meta.ReadInt64();
            entry.PackedInt32sVersion = meta.ReadVInt32();
            entry.Offset              = meta.ReadInt64();
            entry.Count     = meta.ReadVInt64();
            entry.BlockSize = meta.ReadVInt32();
            switch (entry.format)
            {
            case Lucene45DocValuesConsumer.GCD_COMPRESSED:
                entry.minValue = meta.ReadInt64();
                entry.gcd      = meta.ReadInt64();
                break;

            case Lucene45DocValuesConsumer.TABLE_COMPRESSED:
                if (entry.Count > int.MaxValue)
                {
                    throw new Exception("Cannot use TABLE_COMPRESSED with more than MAX_VALUE values, input=" + meta);
                }
                int uniqueValues = meta.ReadVInt32();
                if (uniqueValues > 256)
                {
                    throw new Exception("TABLE_COMPRESSED cannot have more than 256 distinct values, input=" + meta);
                }
                entry.table = new long[uniqueValues];
                for (int i = 0; i < uniqueValues; ++i)
                {
                    entry.table[i] = meta.ReadInt64();
                }
                break;

            case Lucene45DocValuesConsumer.DELTA_COMPRESSED:
                break;

            default:
                throw new Exception("Unknown format: " + entry.format + ", input=" + meta);
            }
            return(entry);
        }
        private int ReadHeader(IndexInput input)
        {
            int version = CodecUtil.CheckHeader(input, VariableGapTermsIndexWriter.CODEC_NAME,
                                                VariableGapTermsIndexWriter.VERSION_START, VariableGapTermsIndexWriter.VERSION_CURRENT);

            if (version < VariableGapTermsIndexWriter.VERSION_APPEND_ONLY)
            {
                dirOffset = input.ReadInt64();
            }
            return(version);
        }
Exemple #16
0
 private void SeekDir(IndexInput @in)
 {
     if (version >= FSTTermsWriter.TERMS_VERSION_CHECKSUM)
     {
         @in.Seek(@in.Length - CodecUtil.FooterLength() - 8);
     }
     else
     {
         @in.Seek(@in.Length - 8);
     }
     @in.Seek(@in.ReadInt64());
 }
Exemple #17
0
        public SegmentTermEnum(IndexInput i, FieldInfos fis, bool isi)
        {
            input         = i;
            fieldInfos    = fis;
            isIndex       = isi;
            maxSkipLevels = 1; // use single-level skip lists for formats > -3

            int firstInt = input.ReadInt32();

            if (firstInt >= 0)
            {
                // original-format file, without explicit format version number
                format = 0;
                size   = firstInt;

                // back-compatible settings
                indexInterval = 128;
                skipInterval  = int.MaxValue; // switch off skipTo optimization
            }
            else
            {
                // we have a format version number
                format = firstInt;

                // check that it is a format we can understand
                if (format > FORMAT_MINIMUM)
                {
                    throw new IndexFormatTooOldException(input, format, FORMAT_MINIMUM, FORMAT_CURRENT);
                }
                if (format < FORMAT_CURRENT)
                {
                    throw new IndexFormatTooNewException(input, format, FORMAT_MINIMUM, FORMAT_CURRENT);
                }

                size = input.ReadInt64(); // read the size

                indexInterval = input.ReadInt32();
                skipInterval  = input.ReadInt32();
                maxSkipLevels = input.ReadInt32();
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(indexInterval > 0, "indexInterval={0} is negative; must be > 0", indexInterval);
                    Debugging.Assert(skipInterval > 0, "skipInterval={0} is negative; must be > 0", skipInterval);
                }
            }
        }
Exemple #18
0
        public override long Get(int index)
        {
            int  blockOffset = index / valuesPerBlock;
            long skip        = ((long)blockOffset) << 3;

            try
            {
                @in.Seek(startPointer + skip);

                long block         = @in.ReadInt64();
                int  offsetInBlock = index % valuesPerBlock;
                return((block.TripleShift(offsetInBlock * m_bitsPerValue)) & mask);
            }
            catch (Exception e) when(e.IsIOException())
            {
                throw IllegalStateException.Create("failed", e);
            }
        }
Exemple #19
0
        public override long Get(int index)
        {
            int  blockOffset = index / valuesPerBlock;
            long skip        = ((long)blockOffset) << 3;

            try
            {
                @in.Seek(startPointer + skip);

                long block         = @in.ReadInt64();
                int  offsetInBlock = index % valuesPerBlock;
                return(((long)((ulong)block >> (offsetInBlock * m_bitsPerValue))) & mask);
            }
            catch (IOException e)
            {
                throw new InvalidOperationException("failed", e);
            }
        }
Exemple #20
0
        private SortedDocValues LoadBytesVarSorted(/*FieldInfo field, // LUCENENET: Never read */ IndexInput data, IndexInput index)
        {
            CodecUtil.CheckHeader(data, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT);
            CodecUtil.CheckHeader(index, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT);

            long       maxAddress = index.ReadInt64();
            PagedBytes bytes      = new PagedBytes(16);

            bytes.Copy(data, maxAddress);
            PagedBytes.Reader   bytesReader   = bytes.Freeze(true);
            PackedInt32s.Reader addressReader = PackedInt32s.GetReader(index);
            PackedInt32s.Reader ordsReader    = PackedInt32s.GetReader(index);

            int valueCount = addressReader.Count - 1;

            ramBytesUsed.AddAndGet(bytes.RamBytesUsed() + addressReader.RamBytesUsed() + ordsReader.RamBytesUsed());

            return(CorrectBuggyOrds(new SortedDocValuesAnonymousClass2(bytesReader, addressReader, ordsReader, valueCount)));
        }
Exemple #21
0
        private NumericDocValues LoadDoubleField(/*FieldInfo field, // LUCENENET: Never read */ IndexInput input)
        {
            CodecUtil.CheckHeader(input, Lucene40DocValuesFormat.FLOATS_CODEC_NAME, Lucene40DocValuesFormat.FLOATS_VERSION_START, Lucene40DocValuesFormat.FLOATS_VERSION_CURRENT);
            int valueSize = input.ReadInt32();

            if (valueSize != 8)
            {
                throw new CorruptIndexException("invalid valueSize: " + valueSize);
            }
            int maxDoc = state.SegmentInfo.DocCount;

            long[] values = new long[maxDoc];
            for (int i = 0; i < values.Length; i++)
            {
                values[i] = input.ReadInt64();
            }
            ramBytesUsed.AddAndGet(RamUsageEstimator.SizeOf(values));
            return(new NumericDocValuesAnonymousClass8(values));
        }
Exemple #22
0
        private BinaryDocValues LoadBytesVarDeref(FieldInfo field)
        {
            string     dataName  = IndexFileNames.SegmentFileName(state.SegmentInfo.Name + "_" + Convert.ToString(field.Number, CultureInfo.InvariantCulture), segmentSuffix, "dat");
            string     indexName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name + "_" + Convert.ToString(field.Number, CultureInfo.InvariantCulture), segmentSuffix, "idx");
            IndexInput data      = null;
            IndexInput index     = null;
            bool       success   = false;

            try
            {
                data = dir.OpenInput(dataName, state.Context);
                CodecUtil.CheckHeader(data, Lucene40DocValuesFormat.BYTES_VAR_DEREF_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_CURRENT);
                index = dir.OpenInput(indexName, state.Context);
                CodecUtil.CheckHeader(index, Lucene40DocValuesFormat.BYTES_VAR_DEREF_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_CURRENT);

                long       totalBytes = index.ReadInt64();
                PagedBytes bytes      = new PagedBytes(16);
                bytes.Copy(data, totalBytes);
                PagedBytes.Reader   bytesReader = bytes.Freeze(true);
                PackedInt32s.Reader reader      = PackedInt32s.GetReader(index);
                CodecUtil.CheckEOF(data);
                CodecUtil.CheckEOF(index);
                ramBytesUsed.AddAndGet(bytes.RamBytesUsed() + reader.RamBytesUsed());
                success = true;
                return(new BinaryDocValuesAnonymousClass4(bytesReader, reader));
            }
            finally
            {
                if (success)
                {
                    IOUtils.Dispose(data, index);
                }
                else
                {
                    IOUtils.DisposeWhileHandlingException(data, index);
                }
            }
        }
Exemple #23
0
        public override FieldsProducer FieldsProducer(SegmentReadState state)
        {
            string     seedFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, SEED_EXT);
            IndexInput @in          = state.Directory.OpenInput(seedFileName, state.Context);
            long       seed         = @in.ReadInt64();

            if (LuceneTestCase.Verbose)
            {
                Console.WriteLine("MockRandomCodec: reading from seg=" + state.SegmentInfo.Name + " formatID=" + state.SegmentSuffix + " seed=" + seed);
            }
            @in.Dispose();

            Random random = new J2N.Randomizer(seed);

            int readBufferSize = TestUtil.NextInt32(random, 1, 4096);

            if (LuceneTestCase.Verbose)
            {
                Console.WriteLine("MockRandomCodec: readBufferSize=" + readBufferSize);
            }

            PostingsReaderBase postingsReader;

            if (random.NextBoolean())
            {
                if (LuceneTestCase.Verbose)
                {
                    Console.WriteLine("MockRandomCodec: reading Sep postings");
                }
                postingsReader = new SepPostingsReader(state.Directory, state.FieldInfos, state.SegmentInfo,
                                                       state.Context, new MockInt32StreamFactory(random), state.SegmentSuffix);
            }
            else
            {
                if (LuceneTestCase.Verbose)
                {
                    Console.WriteLine("MockRandomCodec: reading Standard postings");
                }
                postingsReader = new Lucene41PostingsReader(state.Directory, state.FieldInfos, state.SegmentInfo, state.Context, state.SegmentSuffix);
            }

            if (random.NextBoolean())
            {
                int totTFCutoff = TestUtil.NextInt32(random, 1, 20);
                if (LuceneTestCase.Verbose)
                {
                    Console.WriteLine("MockRandomCodec: reading pulsing postings with totTFCutoff=" + totTFCutoff);
                }
                postingsReader = new PulsingPostingsReader(state, postingsReader);
            }

            FieldsProducer fields;
            int            t1 = random.Next(4);

            if (t1 == 0)
            {
                bool success = false;
                try
                {
                    fields  = new FSTTermsReader(state, postingsReader);
                    success = true;
                }
                finally
                {
                    if (!success)
                    {
                        postingsReader.Dispose();
                    }
                }
            }
            else if (t1 == 1)
            {
                bool success = false;
                try
                {
                    fields  = new FSTOrdTermsReader(state, postingsReader);
                    success = true;
                }
                finally
                {
                    if (!success)
                    {
                        postingsReader.Dispose();
                    }
                }
            }
            else if (t1 == 2)
            {
                // Use BlockTree terms dict
                if (LuceneTestCase.Verbose)
                {
                    Console.WriteLine("MockRandomCodec: reading BlockTree terms dict");
                }

                bool success = false;
                try
                {
                    fields = new BlockTreeTermsReader(state.Directory,
                                                      state.FieldInfos,
                                                      state.SegmentInfo,
                                                      postingsReader,
                                                      state.Context,
                                                      state.SegmentSuffix,
                                                      state.TermsIndexDivisor);
                    success = true;
                }
                finally
                {
                    if (!success)
                    {
                        postingsReader.Dispose();
                    }
                }
            }
            else
            {
                if (LuceneTestCase.Verbose)
                {
                    Console.WriteLine("MockRandomCodec: reading Block terms dict");
                }
                TermsIndexReaderBase indexReader;
                bool success = false;
                try
                {
                    bool doFixedGap = random.NextBoolean();

                    // randomness diverges from writer, here:
                    if (state.TermsIndexDivisor != -1)
                    {
                        state.TermsIndexDivisor = TestUtil.NextInt32(random, 1, 10);
                    }

                    if (doFixedGap)
                    {
                        // if termsIndexDivisor is set to -1, we should not touch it. It means a
                        // test explicitly instructed not to load the terms index.
                        if (LuceneTestCase.Verbose)
                        {
                            Console.WriteLine("MockRandomCodec: fixed-gap terms index (divisor=" + state.TermsIndexDivisor + ")");
                        }
                        indexReader = new FixedGapTermsIndexReader(state.Directory,
                                                                   state.FieldInfos,
                                                                   state.SegmentInfo.Name,
                                                                   state.TermsIndexDivisor,
                                                                   BytesRef.UTF8SortedAsUnicodeComparer,
                                                                   state.SegmentSuffix, state.Context);
                    }
                    else
                    {
                        int n2 = random.Next(3);
                        if (n2 == 1)
                        {
                            random.Next();
                        }
                        else if (n2 == 2)
                        {
                            random.NextInt64();
                        }
                        if (LuceneTestCase.Verbose)
                        {
                            Console.WriteLine("MockRandomCodec: variable-gap terms index (divisor=" + state.TermsIndexDivisor + ")");
                        }
                        indexReader = new VariableGapTermsIndexReader(state.Directory,
                                                                      state.FieldInfos,
                                                                      state.SegmentInfo.Name,
                                                                      state.TermsIndexDivisor,
                                                                      state.SegmentSuffix, state.Context);
                    }

                    success = true;
                }
                finally
                {
                    if (!success)
                    {
                        postingsReader.Dispose();
                    }
                }

                success = false;
                try
                {
                    fields = new BlockTermsReader(indexReader,
                                                  state.Directory,
                                                  state.FieldInfos,
                                                  state.SegmentInfo,
                                                  postingsReader,
                                                  state.Context,
                                                  state.SegmentSuffix);
                    success = true;
                }
                finally
                {
                    if (!success)
                    {
                        try
                        {
                            postingsReader.Dispose();
                        }
                        finally
                        {
                            indexReader.Dispose();
                        }
                    }
                }
            }

            return(fields);
        }
Exemple #24
0
        private void ReadFields(IndexInput meta, FieldInfos infos)
        {
            int fieldNumber = meta.ReadVInt32();

            while (fieldNumber != -1)
            {
                int fieldType = meta.ReadByte();
                if (fieldType == NUMBER)
                {
                    var entry = new NumericEntry {
                        offset = meta.ReadInt64(), missingOffset = meta.ReadInt64()
                    };
                    if (entry.missingOffset != -1)
                    {
                        entry.missingBytes = meta.ReadInt64();
                    }
                    else
                    {
                        entry.missingBytes = 0;
                    }
                    entry.format = meta.ReadByte();
                    switch (entry.format)
                    {
                    case DELTA_COMPRESSED:
                    case TABLE_COMPRESSED:
                    case GCD_COMPRESSED:
                    case UNCOMPRESSED:
                        break;

                    default:
                        throw new CorruptIndexException("Unknown format: " + entry.format + ", input=" + meta);
                    }
                    if (entry.format != UNCOMPRESSED)
                    {
                        entry.packedIntsVersion = meta.ReadVInt32();
                    }
                    numerics[fieldNumber] = entry;
                }
                else if (fieldType == BYTES)
                {
                    var entry = new BinaryEntry
                    {
                        offset        = meta.ReadInt64(),
                        numBytes      = meta.ReadInt64(),
                        missingOffset = meta.ReadInt64()
                    };
                    if (entry.missingOffset != -1)
                    {
                        entry.missingBytes = meta.ReadInt64();
                    }
                    else
                    {
                        entry.missingBytes = 0;
                    }
                    entry.minLength = meta.ReadVInt32();
                    entry.maxLength = meta.ReadVInt32();
                    if (entry.minLength != entry.maxLength)
                    {
                        entry.packedIntsVersion = meta.ReadVInt32();
                        entry.blockSize         = meta.ReadVInt32();
                    }
                    binaries[fieldNumber] = entry;
                }
                else if (fieldType == FST)
                {
                    var entry = new FSTEntry {
                        offset = meta.ReadInt64(), numOrds = meta.ReadVInt64()
                    };
                    fsts[fieldNumber] = entry;
                }
                else
                {
                    throw new CorruptIndexException("invalid entry type: " + fieldType + ", input=" + meta);
                }
                fieldNumber = meta.ReadVInt32();
            }
        }
Exemple #25
0
        /// <summary>
        /// reads from legacy 3.x segments_N </summary>
        private SegmentCommitInfo ReadLegacySegmentInfo(Directory dir, int format, IndexInput input)
        {
            // check that it is a format we can understand
            if (format > Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS)
            {
                throw new IndexFormatTooOldException(input, format, Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS, Lucene3xSegmentInfoFormat.FORMAT_3_1);
            }
            if (format < Lucene3xSegmentInfoFormat.FORMAT_3_1)
            {
                throw new IndexFormatTooNewException(input, format, Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS, Lucene3xSegmentInfoFormat.FORMAT_3_1);
            }
            string version;

            if (format <= Lucene3xSegmentInfoFormat.FORMAT_3_1)
            {
                version = input.ReadString();
            }
            else
            {
                version = null;
            }

            string name = input.ReadString();

            int  docCount = input.ReadInt32();
            long delGen   = input.ReadInt64();

            int docStoreOffset = input.ReadInt32();
            IDictionary <string, string> attributes = new Dictionary <string, string>();

            // parse the docstore stuff and shove it into attributes
            string docStoreSegment;
            bool   docStoreIsCompoundFile;

            if (docStoreOffset != -1)
            {
                docStoreSegment        = input.ReadString();
                docStoreIsCompoundFile = input.ReadByte() == SegmentInfo.YES;
                attributes[Lucene3xSegmentInfoFormat.DS_OFFSET_KEY]   = Convert.ToString(docStoreOffset, CultureInfo.InvariantCulture);
                attributes[Lucene3xSegmentInfoFormat.DS_NAME_KEY]     = docStoreSegment;
                attributes[Lucene3xSegmentInfoFormat.DS_COMPOUND_KEY] = Convert.ToString(docStoreIsCompoundFile, CultureInfo.InvariantCulture);
            }
            else
            {
                docStoreSegment        = name;
                docStoreIsCompoundFile = false;
            }

            // pre-4.0 indexes write a byte if there is a single norms file
            byte b = input.ReadByte();

            //System.out.println("version=" + version + " name=" + name + " docCount=" + docCount + " delGen=" + delGen + " dso=" + docStoreOffset + " dss=" + docStoreSegment + " dssCFs=" + docStoreIsCompoundFile + " b=" + b + " format=" + format);

            Debug.Assert(1 == b, "expected 1 but was: " + b + " format: " + format);
            int numNormGen = input.ReadInt32();
            IDictionary <int, long> normGen;

            if (numNormGen == SegmentInfo.NO)
            {
                normGen = null;
            }
            else
            {
                normGen = new Dictionary <int, long>();
                for (int j = 0; j < numNormGen; j++)
                {
                    normGen[j] = input.ReadInt64();
                }
            }
            bool isCompoundFile = input.ReadByte() == SegmentInfo.YES;

            int delCount = input.ReadInt32();

            Debug.Assert(delCount <= docCount);

            bool hasProx = input.ReadByte() == 1;

            IDictionary <string, string> diagnostics = input.ReadStringStringMap();

            if (format <= Lucene3xSegmentInfoFormat.FORMAT_HAS_VECTORS)
            {
                // NOTE: unused
                int hasVectors = input.ReadByte();
            }

            // Replicate logic from 3.x's SegmentInfo.files():
            ISet <string> files = new HashSet <string>();

            if (isCompoundFile)
            {
                files.Add(IndexFileNames.SegmentFileName(name, "", IndexFileNames.COMPOUND_FILE_EXTENSION));
            }
            else
            {
                AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xFieldInfosReader.FIELD_INFOS_EXTENSION));
                AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.FREQ_EXTENSION));
                AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.PROX_EXTENSION));
                AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.TERMS_EXTENSION));
                AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION));
                AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xNormsProducer.NORMS_EXTENSION));
            }

            if (docStoreOffset != -1)
            {
                if (docStoreIsCompoundFile)
                {
                    files.Add(IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION));
                }
                else
                {
                    files.Add(IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION));
                    files.Add(IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION));
                    AddIfExists(dir, files, IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION));
                    AddIfExists(dir, files, IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION));
                    AddIfExists(dir, files, IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION));
                }
            }
            else if (!isCompoundFile)
            {
                files.Add(IndexFileNames.SegmentFileName(name, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION));
                files.Add(IndexFileNames.SegmentFileName(name, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION));
                AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION));
                AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION));
                AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION));
            }

            // parse the normgen stuff and shove it into attributes
            if (normGen != null)
            {
                attributes[Lucene3xSegmentInfoFormat.NORMGEN_KEY] = Convert.ToString(numNormGen, CultureInfo.InvariantCulture);
                foreach (KeyValuePair <int, long> ent in normGen)
                {
                    long gen = ent.Value;
                    if (gen >= SegmentInfo.YES)
                    {
                        // Definitely a separate norm file, with generation:
                        files.Add(IndexFileNames.FileNameFromGeneration(name, "s" + ent.Key, gen));
                        attributes[Lucene3xSegmentInfoFormat.NORMGEN_PREFIX + ent.Key] = Convert.ToString(gen, CultureInfo.InvariantCulture);
                    }
                    else if (gen == SegmentInfo.NO)
                    {
                        // No separate norm
                    }
                    else
                    {
                        // We should have already hit indexformat too old exception
                        Debug.Assert(false);
                    }
                }
            }

            SegmentInfo info = new SegmentInfo(dir, version, name, docCount, isCompoundFile, null, diagnostics, Collections.UnmodifiableMap(attributes));

            info.SetFiles(files);

            SegmentCommitInfo infoPerCommit = new SegmentCommitInfo(info, delCount, delGen, -1);

            return(infoPerCommit);
        }
Exemple #26
0
 /// <summary>
 /// Returns (but does not validate) the checksum previously written by <see cref="CheckFooter(ChecksumIndexInput)"/>. </summary>
 /// <returns> actual checksum value </returns>
 /// <exception cref="IOException"> If the footer is invalid. </exception>
 public static long RetrieveChecksum(IndexInput @in)
 {
     @in.Seek(@in.Length - FooterLength());
     ValidateFooter(@in);
     return(@in.ReadInt64());
 }
        public override long Get(int index)
        {
            long majorBitPos = (long)index * m_bitsPerValue;
            long elementPos  = (long)((ulong)majorBitPos >> 3);

            try
            {
                @in.Seek(startPointer + elementPos);

                int bitPos = (int)(majorBitPos & 7);
                // round up bits to a multiple of 8 to find total bytes needed to read
                int roundedBits = ((bitPos + m_bitsPerValue + 7) & ~7);
                // the number of extra bits read at the end to shift out
                int shiftRightBits = roundedBits - bitPos - m_bitsPerValue;

                long rawValue;
                switch ((int)((uint)roundedBits >> 3))
                {
                case 1:
                    rawValue = @in.ReadByte();
                    break;

                case 2:
                    rawValue = @in.ReadInt16();
                    break;

                case 3:
                    rawValue = ((long)@in.ReadInt16() << 8) | (@in.ReadByte() & 0xFFL);
                    break;

                case 4:
                    rawValue = @in.ReadInt32();
                    break;

                case 5:
                    rawValue = ((long)@in.ReadInt32() << 8) | (@in.ReadByte() & 0xFFL);
                    break;

                case 6:
                    rawValue = ((long)@in.ReadInt32() << 16) | (@in.ReadInt16() & 0xFFFFL);
                    break;

                case 7:
                    rawValue = ((long)@in.ReadInt32() << 24) | ((@in.ReadInt16() & 0xFFFFL) << 8) | (@in.ReadByte() & 0xFFL);
                    break;

                case 8:
                    rawValue = @in.ReadInt64();
                    break;

                case 9:
                    // We must be very careful not to shift out relevant bits. So we account for right shift
                    // we would normally do on return here, and reset it.
                    rawValue       = (@in.ReadInt64() << (8 - shiftRightBits)) | (((uint)(@in.ReadByte() & 0xFFL) >> shiftRightBits));
                    shiftRightBits = 0;
                    break;

                default:
                    throw new InvalidOperationException("bitsPerValue too large: " + m_bitsPerValue);
                }
                return(((long)((ulong)rawValue >> shiftRightBits)) & valueMask);
            }
            catch (IOException ioe)
            {
                throw new InvalidOperationException("failed", ioe);
            }
        }
        private void ReadFields(IndexInput meta, FieldInfos infos)
        {
            int fieldNumber = meta.ReadVInt32();

            while (fieldNumber != -1)
            {
                // check should be: infos.fieldInfo(fieldNumber) != null, which incorporates negative check
                // but docvalues updates are currently buggy here (loading extra stuff, etc): LUCENE-5616
                if (fieldNumber < 0)
                {
                    // trickier to validate more: because we re-use for norms, because we use multiple entries
                    // for "composite" types like sortedset, etc.
                    throw new CorruptIndexException("Invalid field number: " + fieldNumber + ", input=" + meta);
                }
                int fieldType = meta.ReadByte();
                if (fieldType == NUMBER)
                {
                    var entry = new NumericEntry();
                    entry.Offset = meta.ReadInt64();
                    entry.Format = (sbyte)meta.ReadByte();
                    switch (entry.Format)
                    {
                    case DELTA_COMPRESSED:
                    case TABLE_COMPRESSED:
                    case GCD_COMPRESSED:
                    case UNCOMPRESSED:
                        break;

                    default:
                        throw new CorruptIndexException("Unknown format: " + entry.Format + ", input=" + meta);
                    }
                    if (entry.Format != UNCOMPRESSED)
                    {
                        entry.PackedInt32sVersion = meta.ReadVInt32();
                    }
                    numerics[fieldNumber] = entry;
                }
                else if (fieldType == BYTES)
                {
                    BinaryEntry entry = new BinaryEntry();
                    entry.Offset    = meta.ReadInt64();
                    entry.NumBytes  = meta.ReadInt64();
                    entry.MinLength = meta.ReadVInt32();
                    entry.MaxLength = meta.ReadVInt32();
                    if (entry.MinLength != entry.MaxLength)
                    {
                        entry.PackedInt32sVersion = meta.ReadVInt32();
                        entry.BlockSize           = meta.ReadVInt32();
                    }
                    binaries[fieldNumber] = entry;
                }
                else if (fieldType == FST)
                {
                    FSTEntry entry = new FSTEntry();
                    entry.Offset      = meta.ReadInt64();
                    entry.NumOrds     = meta.ReadVInt64();
                    fsts[fieldNumber] = entry;
                }
                else
                {
                    throw new CorruptIndexException("invalid entry type: " + fieldType + ", input=" + meta);
                }
                fieldNumber = meta.ReadVInt32();
            }
        }