예제 #1
0
        public override void VisitDocument(int n, StoredFieldVisitor visitor)
        {
            SeekIndex(n);
            FieldsStream.Seek(IndexStream.ReadLong());

            int numFields = FieldsStream.ReadVInt();

            for (int fieldIDX = 0; fieldIDX < numFields; fieldIDX++)
            {
                int       fieldNumber = FieldsStream.ReadVInt();
                FieldInfo fieldInfo   = FieldInfos.FieldInfo(fieldNumber);

                int bits = FieldsStream.ReadByte() & 0xFF;
                Debug.Assert(bits <= (Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_MASK | Lucene40StoredFieldsWriter.FIELD_IS_BINARY), "bits=" + bits.ToString("x"));

                switch (visitor.NeedsField(fieldInfo))
                {
                case StoredFieldVisitor.Status.YES:
                    ReadField(visitor, fieldInfo, bits);
                    break;

                case StoredFieldVisitor.Status.NO:
                    SkipField(bits);
                    break;

                case StoredFieldVisitor.Status.STOP:
                    return;
                }
            }
        }
        private NumericDocValues LoadVarIntsField(FieldInfo field, IndexInput input)
        {
            CodecUtil.CheckHeader(input, Lucene40DocValuesFormat.VAR_INTS_CODEC_NAME, Lucene40DocValuesFormat.VAR_INTS_VERSION_START, Lucene40DocValuesFormat.VAR_INTS_VERSION_CURRENT);
            byte header = input.ReadByte();

            if (header == Lucene40DocValuesFormat.VAR_INTS_FIXED_64)
            {
                int    maxDoc = State.SegmentInfo.DocCount;
                long[] values = new long[maxDoc];
                for (int i = 0; i < values.Length; i++)
                {
                    values[i] = input.ReadLong();
                }
                RamBytesUsed_Renamed.AddAndGet(RamUsageEstimator.SizeOf(values));
                return(new NumericDocValuesAnonymousInnerClassHelper(this, values));
            }
            else if (header == Lucene40DocValuesFormat.VAR_INTS_PACKED)
            {
                long minValue            = input.ReadLong();
                long defaultValue        = input.ReadLong();
                PackedInts.Reader reader = PackedInts.GetReader(input);
                RamBytesUsed_Renamed.AddAndGet(reader.RamBytesUsed());
                return(new NumericDocValuesAnonymousInnerClassHelper2(this, minValue, defaultValue, reader));
            }
            else
            {
                throw new CorruptIndexException("invalid VAR_INTS header byte: " + header + " (resource=" + input + ")");
            }
        }
예제 #3
0
        internal static BinaryEntry ReadBinaryEntry(IndexInput meta)
        {
            BinaryEntry entry = new BinaryEntry();

            entry.Format        = meta.ReadVInt();
            entry.MissingOffset = meta.ReadLong();
            entry.MinLength     = meta.ReadVInt();
            entry.MaxLength     = meta.ReadVInt();
            entry.Count         = meta.ReadVLong();
            entry.Offset        = meta.ReadLong();
            switch (entry.Format)
            {
            case Lucene45DocValuesConsumer.BINARY_FIXED_UNCOMPRESSED:
                break;

            case Lucene45DocValuesConsumer.BINARY_PREFIX_COMPRESSED:
                entry.AddressInterval   = meta.ReadVInt();
                entry.AddressesOffset   = meta.ReadLong();
                entry.PackedIntsVersion = meta.ReadVInt();
                entry.BlockSize         = meta.ReadVInt();
                break;

            case Lucene45DocValuesConsumer.BINARY_VARIABLE_UNCOMPRESSED:
                entry.AddressesOffset   = meta.ReadLong();
                entry.PackedIntsVersion = meta.ReadVInt();
                entry.BlockSize         = meta.ReadVInt();
                break;

            default:
                throw new Exception("Unknown format: " + entry.Format + ", input=" + meta);
            }
            return(entry);
        }
예제 #4
0
        /// <summary>
        /// Read the segments.gen file to get the generation number.
        /// </summary>
        private void ReadDirectory()
        {
            IndexInput genInput = null;

            try
            {
                genInput = directory.OpenInput(IndexFileNames.SEGMENTS_GEN);

                if (genInput != null)
                {
                    int version = genInput.ReadInt();
                    if (version == Lucene.Net.Index.SegmentInfos.FORMAT_LOCKLESS)
                    {
                        long gen0 = genInput.ReadLong();
                        long gen1 = genInput.ReadLong();
                        if (gen0 == gen1)
                        {
                            // The file is consistent, use the generation
                            this.generation = gen0;
                        }
                    }
                }
            }
            finally
            {
                genInput.Close();
            }
        }
예제 #5
0
        public /*internal*/ Document.Document Doc(int n, FieldSelector fieldSelector)
        {
            SeekIndex(n);
            long position = indexStream.ReadLong();

            fieldsStream.Seek(position);

            var doc       = new Document.Document();
            int numFields = fieldsStream.ReadVInt();

            for (int i = 0; i < numFields; i++)
            {
                int                 fieldNumber = fieldsStream.ReadVInt();
                FieldInfo           fi          = fieldInfos.FieldInfo(fieldNumber);
                FieldSelectorResult acceptField = fieldSelector == null?FieldSelectorResult.LOAD:fieldSelector.Accept(fi.name);

                byte bits = fieldsStream.ReadByte();
                System.Diagnostics.Debug.Assert(bits <= FieldsWriter.FIELD_IS_COMPRESSED + FieldsWriter.FIELD_IS_TOKENIZED + FieldsWriter.FIELD_IS_BINARY);

                bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
                System.Diagnostics.Debug.Assert(
                    (!compressed || (format < FieldsWriter.FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS)),
                    "compressed fields are only allowed in indexes of version <= 2.9");
                bool tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
                bool binary   = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
                //TODO: Find an alternative approach here if this list continues to grow beyond the
                //list of 5 or 6 currently here.  See Lucene 762 for discussion
                if (acceptField.Equals(FieldSelectorResult.LOAD))
                {
                    AddField(doc, fi, binary, compressed, tokenize);
                }
                else if (acceptField.Equals(FieldSelectorResult.LOAD_AND_BREAK))
                {
                    AddField(doc, fi, binary, compressed, tokenize);
                    break;                     //Get out of this loop
                }
                else if (acceptField.Equals(FieldSelectorResult.LAZY_LOAD))
                {
                    AddFieldLazy(doc, fi, binary, compressed, tokenize);
                }
                else if (acceptField.Equals(FieldSelectorResult.SIZE))
                {
                    SkipField(binary, compressed, AddFieldSize(doc, fi, binary, compressed));
                }
                else if (acceptField.Equals(FieldSelectorResult.SIZE_AND_BREAK))
                {
                    AddFieldSize(doc, fi, binary, compressed);
                    break;
                }
                else
                {
                    SkipField(binary, compressed);
                }
            }

            return(doc);
        }
예제 #6
0
        /// <summary>Retrieve the length (in bytes) of the tvd and tvf
        /// entries for the next numDocs starting with
        /// startDocID.  This is used for bulk copying when
        /// merging segments, if the field numbers are
        /// congruent.  Once this returns, the tvf &amp; tvd streams
        /// are seeked to the startDocID.
        /// </summary>
        internal void  RawDocs(int[] tvdLengths, int[] tvfLengths, int startDocID, int numDocs)
        {
            if (tvx == null)
            {
                for (int i = 0; i < tvdLengths.Length; i++)
                {
                    tvdLengths[i] = 0;
                }
                for (int i = 0; i < tvfLengths.Length; i++)
                {
                    tvfLengths[i] = 0;
                }
                return;
            }

            // SegmentMerger calls canReadRawDocs() first and should
            // not call us if that returns false.
            if (format < FORMAT_VERSION2)
            {
                throw new System.SystemException("cannot read raw docs with older term vector formats");
            }

            SeekTvx(startDocID);

            long tvdPosition = tvx.ReadLong();

            tvd.Seek(tvdPosition);

            long tvfPosition = tvx.ReadLong();

            tvf.Seek(tvfPosition);

            long lastTvdPosition = tvdPosition;
            long lastTvfPosition = tvfPosition;

            int count = 0;

            while (count < numDocs)
            {
                int docID = docStoreOffset + startDocID + count + 1;
                System.Diagnostics.Debug.Assert(docID <= numTotalDocs);
                if (docID < numTotalDocs)
                {
                    tvdPosition = tvx.ReadLong();
                    tvfPosition = tvx.ReadLong();
                }
                else
                {
                    tvdPosition = tvd.Length();
                    tvfPosition = tvf.Length();
                    System.Diagnostics.Debug.Assert(count == numDocs - 1);
                }
                tvdLengths[count] = (int)(tvdPosition - lastTvdPosition);
                tvfLengths[count] = (int)(tvfPosition - lastTvfPosition);
                count++;
                lastTvdPosition = tvdPosition;
                lastTvfPosition = tvfPosition;
            }
        }
예제 #7
0
 private void SeekDir(IndexInput input, long dirOffset)
 {
     if (_version >= BlockTermsWriter.VERSION_CHECKSUM)
     {
         input.Seek(input.Length() - CodecUtil.FooterLength() - 8);
         dirOffset = input.ReadLong();
     }
     else if (_version >= BlockTermsWriter.VERSION_APPEND_ONLY)
     {
         input.Seek(input.Length() - 8);
         dirOffset = input.ReadLong();
     }
     input.Seek(dirOffset);
 }
예제 #8
0
        protected override void SeekDir(IndexInput input, long dirOffset)
        {
            input.Seek(input.Length() - sizeof(long) / 8);
            long offset = input.ReadLong();

            input.Seek(offset);
        }
예제 #9
0
        public static void ReadLegacyInfos(SegmentInfos infos, Directory directory, IndexInput input, int format)
        {
            infos.Version = input.ReadLong(); // read version
            infos.Counter = input.ReadInt();  // read counter
            Lucene3xSegmentInfoReader reader = new Lucene3xSegmentInfoReader();

            for (int i = input.ReadInt(); i > 0; i--) // read segmentInfos
            {
                SegmentCommitInfo siPerCommit = reader.ReadLegacySegmentInfo(directory, format, input);
                SegmentInfo       si          = siPerCommit.Info;

                if (si.Version == null)
                {
                    // Could be a 3.0 - try to open the doc stores - if it fails, it's a
                    // 2.x segment, and an IndexFormatTooOldException will be thrown,
                    // which is what we want.
                    Directory dir = directory;
                    if (Lucene3xSegmentInfoFormat.GetDocStoreOffset(si) != -1)
                    {
                        if (Lucene3xSegmentInfoFormat.GetDocStoreIsCompoundFile(si))
                        {
                            dir = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(Lucene3xSegmentInfoFormat.GetDocStoreSegment(si), "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), IOContext.READONCE, false);
                        }
                    }
                    else if (si.UseCompoundFile)
                    {
                        dir = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), IOContext.READONCE, false);
                    }

                    try
                    {
                        Lucene3xStoredFieldsReader.CheckCodeVersion(dir, Lucene3xSegmentInfoFormat.GetDocStoreSegment(si));
                    }
                    finally
                    {
                        // If we opened the directory, close it
                        if (dir != directory)
                        {
                            dir.Dispose();
                        }
                    }

                    // Above call succeeded, so it's a 3.0 segment. Upgrade it so the next
                    // time the segment is read, its version won't be null and we won't
                    // need to open FieldsReader every time for each such segment.
                    si.Version = "3.0";
                }
                else if (si.Version.Equals("2.x"))
                {
                    // If it's a 3x index touched by 3.1+ code, then segments record their
                    // version, whether they are 2.x ones or not. We detect that and throw
                    // appropriate exception.
                    throw new IndexFormatTooOldException("segment " + si.Name + " in resource " + input, si.Version);
                }
                infos.Add(siPerCommit);
            }

            infos.UserData = input.ReadStringStringMap();
        }
        /// <summary>
        /// Retrieve the length (in bytes) of the tvd and tvf
        ///  entries for the next numDocs starting with
        ///  startDocID.  this is used for bulk copying when
        ///  merging segments, if the field numbers are
        ///  congruent.  Once this returns, the tvf & tvd streams
        ///  are seeked to the startDocID.
        /// </summary>
        internal void RawDocs(int[] tvdLengths, int[] tvfLengths, int startDocID, int numDocs)
        {
            if (Tvx == null)
            {
                CollectionsHelper.Fill(tvdLengths, 0);
                CollectionsHelper.Fill(tvfLengths, 0);
                return;
            }

            SeekTvx(startDocID);

            long tvdPosition = Tvx.ReadLong();

            Tvd.Seek(tvdPosition);

            long tvfPosition = Tvx.ReadLong();

            Tvf.Seek(tvfPosition);

            long lastTvdPosition = tvdPosition;
            long lastTvfPosition = tvfPosition;

            int count = 0;

            while (count < numDocs)
            {
                int docID = startDocID + count + 1;
                Debug.Assert(docID <= NumTotalDocs);
                if (docID < NumTotalDocs)
                {
                    tvdPosition = Tvx.ReadLong();
                    tvfPosition = Tvx.ReadLong();
                }
                else
                {
                    tvdPosition = Tvd.Length();
                    tvfPosition = Tvf.Length();
                    Debug.Assert(count == numDocs - 1);
                }
                tvdLengths[count] = (int)(tvdPosition - lastTvdPosition);
                tvfLengths[count] = (int)(tvfPosition - lastTvfPosition);
                count++;
                lastTvdPosition = tvdPosition;
                lastTvfPosition = tvfPosition;
            }
        }
        public static void ReadLegacyInfos(SegmentInfos infos, Directory directory, IndexInput input, int format)
        {
            infos.Version = input.ReadLong(); // read version
            infos.Counter = input.ReadInt(); // read counter
            Lucene3xSegmentInfoReader reader = new Lucene3xSegmentInfoReader();
            for (int i = input.ReadInt(); i > 0; i--) // read segmentInfos
            {
                SegmentCommitInfo siPerCommit = reader.ReadLegacySegmentInfo(directory, format, input);
                SegmentInfo si = siPerCommit.Info;

                if (si.Version == null)
                {
                    // Could be a 3.0 - try to open the doc stores - if it fails, it's a
                    // 2.x segment, and an IndexFormatTooOldException will be thrown,
                    // which is what we want.
                    Directory dir = directory;
                    if (Lucene3xSegmentInfoFormat.GetDocStoreOffset(si) != -1)
                    {
                        if (Lucene3xSegmentInfoFormat.GetDocStoreIsCompoundFile(si))
                        {
                            dir = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(Lucene3xSegmentInfoFormat.GetDocStoreSegment(si), "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), IOContext.READONCE, false);
                        }
                    }
                    else if (si.UseCompoundFile)
                    {
                        dir = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), IOContext.READONCE, false);
                    }

                    try
                    {
                        Lucene3xStoredFieldsReader.CheckCodeVersion(dir, Lucene3xSegmentInfoFormat.GetDocStoreSegment(si));
                    }
                    finally
                    {
                        // If we opened the directory, close it
                        if (dir != directory)
                        {
                            dir.Dispose();
                        }
                    }

                    // Above call succeeded, so it's a 3.0 segment. Upgrade it so the next
                    // time the segment is read, its version won't be null and we won't
                    // need to open FieldsReader every time for each such segment.
                    si.Version = "3.0";
                }
                else if (si.Version.Equals("2.x"))
                {
                    // If it's a 3x index touched by 3.1+ code, then segments record their
                    // version, whether they are 2.x ones or not. We detect that and throw
                    // appropriate exception.
                    throw new IndexFormatTooOldException("segment " + si.Name + " in resource " + input, si.Version);
                }
                infos.Add(siPerCommit);
            }

            infos.UserData = input.ReadStringStringMap();
        }
예제 #12
0
        private static NumericEntry ReadNumericEntry(IndexInput meta)
        {
            var entry = new NumericEntry {
                offset = meta.ReadLong(), count = meta.ReadInt(), missingOffset = meta.ReadLong()
            };

            if (entry.missingOffset != -1)
            {
                entry.missingBytes = meta.ReadLong();
            }
            else
            {
                entry.missingBytes = 0;
            }
            entry.byteWidth = meta.ReadByte();

            return(entry);
        }
예제 #13
0
        private BinaryEntry ReadBinaryEntry(IndexInput meta)
        {
            var entry = new BinaryEntry();

            entry.offset        = meta.ReadLong();
            entry.numBytes      = meta.ReadInt();
            entry.count         = meta.ReadInt();
            entry.missingOffset = meta.ReadLong();
            if (entry.missingOffset != -1)
            {
                entry.missingBytes = meta.ReadLong();
            }
            else
            {
                entry.missingBytes = 0;
            }

            return(entry);
        }
예제 #14
0
        private void ReadField(StoredFieldVisitor visitor, FieldInfo info, int bits)
        {
            int numeric = bits & FIELD_IS_NUMERIC_MASK;

            if (numeric != 0)
            {
                switch (numeric)
                {
                case FIELD_IS_NUMERIC_INT:
                    visitor.IntField(info, FieldsStream.ReadInt());
                    return;

                case FIELD_IS_NUMERIC_LONG:
                    visitor.LongField(info, FieldsStream.ReadLong());
                    return;

                case FIELD_IS_NUMERIC_FLOAT:
                    visitor.FloatField(info, Number.IntBitsToFloat(FieldsStream.ReadInt()));
                    return;

                case FIELD_IS_NUMERIC_DOUBLE:
                    visitor.DoubleField(info, BitConverter.Int64BitsToDouble(FieldsStream.ReadLong()));
                    return;

                default:
                    throw new CorruptIndexException("Invalid numeric type: " + numeric.ToString("x"));
                }
            }
            else
            {
                int length = FieldsStream.ReadVInt();
                var bytes  = new byte[length];
                FieldsStream.ReadBytes(bytes, 0, length);
                if ((bits & FIELD_IS_BINARY) != 0)
                {
                    visitor.BinaryField(info, bytes);
                }
                else
                {
                    visitor.StringField(info, IOUtils.CHARSET_UTF_8.GetString(bytes));
                }
            }
        }
예제 #15
0
        internal static NumericEntry ReadNumericEntry(IndexInput meta)
        {
            NumericEntry entry = new NumericEntry();

            entry.Format            = meta.ReadVInt();
            entry.MissingOffset     = meta.ReadLong();
            entry.PackedIntsVersion = meta.ReadVInt();
            entry.Offset            = meta.ReadLong();
            entry.Count             = meta.ReadVLong();
            entry.BlockSize         = meta.ReadVInt();
            switch (entry.Format)
            {
            case Lucene45DocValuesConsumer.GCD_COMPRESSED:
                entry.MinValue = meta.ReadLong();
                entry.Gcd      = meta.ReadLong();
                break;

            case Lucene45DocValuesConsumer.TABLE_COMPRESSED:
                if (entry.Count > int.MaxValue)
                {
                    throw new Exception("Cannot use TABLE_COMPRESSED with more than MAX_VALUE values, input=" + meta);
                }
                int uniqueValues = meta.ReadVInt();
                if (uniqueValues > 256)
                {
                    throw new Exception("TABLE_COMPRESSED cannot have more than 256 distinct values, input=" + meta);
                }
                entry.Table = new long[uniqueValues];
                for (int i = 0; i < uniqueValues; ++i)
                {
                    entry.Table[i] = meta.ReadLong();
                }
                break;

            case Lucene45DocValuesConsumer.DELTA_COMPRESSED:
                break;

            default:
                throw new Exception("Unknown format: " + entry.Format + ", input=" + meta);
            }
            return(entry);
        }
예제 #16
0
        private int ReadHeader(IndexInput input)
        {
            int version = CodecUtil.CheckHeader(input, VariableGapTermsIndexWriter.CODEC_NAME,
                                                VariableGapTermsIndexWriter.VERSION_START, VariableGapTermsIndexWriter.VERSION_CURRENT);

            if (version < VariableGapTermsIndexWriter.VERSION_APPEND_ONLY)
            {
                _dirOffset = input.ReadLong();
            }
            return(version);
        }
예제 #17
0
        public CompoundFileReader(Directory dir, System.String name, int readBufferSize)
        {
            directory           = dir;
            fileName            = name;
            this.readBufferSize = readBufferSize;

            bool success = false;

            try
            {
                stream = dir.OpenInput(name, readBufferSize);

                // read the directory and init files
                int       count = stream.ReadVInt();
                FileEntry entry = null;
                for (int i = 0; i < count; i++)
                {
                    long          offset = stream.ReadLong();
                    System.String id     = stream.ReadString();

                    if (entry != null)
                    {
                        // set length of the previous entry
                        entry.length = offset - entry.offset;
                    }

                    entry = new FileEntry {
                        offset = offset
                    };
                    entries[id] = entry;
                }

                // set the length of the final entry
                if (entry != null)
                {
                    entry.length = stream.Length() - entry.offset;
                }

                success = true;
            }
            finally
            {
                if (!success && (stream != null))
                {
                    try
                    {
                        stream.Close();
                    }
                    catch (System.IO.IOException)
                    {
                    }
                }
            }
        }
예제 #18
0
 private void SeekDir(IndexInput @in)
 {
     if (version >= FSTTermsWriter.TERMS_VERSION_CHECKSUM)
     {
         @in.Seek(@in.Length() - CodecUtil.FooterLength() - 8);
     }
     else
     {
         @in.Seek(@in.Length() - 8);
     }
     @in.Seek(@in.ReadLong());
 }
예제 #19
0
        public SegmentTermEnum(IndexInput i, FieldInfos fis, bool isi)
        {
            Input         = i;
            FieldInfos    = fis;
            IsIndex       = isi;
            MaxSkipLevels = 1; // use single-level skip lists for formats > -3

            int firstInt = Input.ReadInt();

            if (firstInt >= 0)
            {
                // original-format file, without explicit format version number
                Format = 0;
                Size   = firstInt;

                // back-compatible settings
                IndexInterval = 128;
                SkipInterval  = int.MaxValue; // switch off skipTo optimization
            }
            else
            {
                // we have a format version number
                Format = firstInt;

                // check that it is a format we can understand
                if (Format > FORMAT_MINIMUM)
                {
                    throw new IndexFormatTooOldException(Input, Format, FORMAT_MINIMUM, FORMAT_CURRENT);
                }
                if (Format < FORMAT_CURRENT)
                {
                    throw new IndexFormatTooNewException(Input, Format, FORMAT_MINIMUM, FORMAT_CURRENT);
                }

                Size = Input.ReadLong(); // read the size

                IndexInterval = Input.ReadInt();
                SkipInterval  = Input.ReadInt();
                MaxSkipLevels = Input.ReadInt();
                Debug.Assert(IndexInterval > 0, "indexInterval=" + IndexInterval + " is negative; must be > 0");
                Debug.Assert(SkipInterval > 0, "skipInterval=" + SkipInterval + " is negative; must be > 0");
            }
        }
        private NumericDocValues LoadDoubleField(FieldInfo field, IndexInput input)
        {
            CodecUtil.CheckHeader(input, Lucene40DocValuesFormat.FLOATS_CODEC_NAME, Lucene40DocValuesFormat.FLOATS_VERSION_START, Lucene40DocValuesFormat.FLOATS_VERSION_CURRENT);
            int valueSize = input.ReadInt();

            if (valueSize != 8)
            {
                throw new CorruptIndexException("invalid valueSize: " + valueSize);
            }
            int maxDoc = State.SegmentInfo.DocCount;

            long[] values = new long[maxDoc];
            for (int i = 0; i < values.Length; i++)
            {
                values[i] = input.ReadLong();
            }
            RamBytesUsed_Renamed.AddAndGet(RamUsageEstimator.SizeOf(values));
            return(new NumericDocValuesAnonymousInnerClassHelper8(this, values));
        }
        private SortedDocValues LoadBytesVarSorted(FieldInfo field, IndexInput data, IndexInput index)
        {
            CodecUtil.CheckHeader(data, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT);
            CodecUtil.CheckHeader(index, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT);

            long       maxAddress = index.ReadLong();
            PagedBytes bytes      = new PagedBytes(16);

            bytes.Copy(data, maxAddress);
            PagedBytes.Reader bytesReader   = bytes.Freeze(true);
            PackedInts.Reader addressReader = PackedInts.GetReader(index);
            PackedInts.Reader ordsReader    = PackedInts.GetReader(index);

            int valueCount = addressReader.Size() - 1;

            RamBytesUsed_Renamed.AddAndGet(bytes.RamBytesUsed() + addressReader.RamBytesUsed() + ordsReader.RamBytesUsed());

            return(CorrectBuggyOrds(new SortedDocValuesAnonymousInnerClassHelper2(this, bytesReader, addressReader, ordsReader, valueCount)));
        }
        private BinaryDocValues LoadBytesVarDeref(FieldInfo field)
        {
            string     dataName  = IndexFileNames.SegmentFileName(State.SegmentInfo.Name + "_" + Convert.ToString(field.Number), SegmentSuffix, "dat");
            string     indexName = IndexFileNames.SegmentFileName(State.SegmentInfo.Name + "_" + Convert.ToString(field.Number), SegmentSuffix, "idx");
            IndexInput data      = null;
            IndexInput index     = null;
            bool       success   = false;

            try
            {
                data = Dir.OpenInput(dataName, State.Context);
                CodecUtil.CheckHeader(data, Lucene40DocValuesFormat.BYTES_VAR_DEREF_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_CURRENT);
                index = Dir.OpenInput(indexName, State.Context);
                CodecUtil.CheckHeader(index, Lucene40DocValuesFormat.BYTES_VAR_DEREF_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_CURRENT);

                long       totalBytes = index.ReadLong();
                PagedBytes bytes      = new PagedBytes(16);
                bytes.Copy(data, totalBytes);
                PagedBytes.Reader bytesReader = bytes.Freeze(true);
                PackedInts.Reader reader      = PackedInts.GetReader(index);
                CodecUtil.CheckEOF(data);
                CodecUtil.CheckEOF(index);
                RamBytesUsed_Renamed.AddAndGet(bytes.RamBytesUsed() + reader.RamBytesUsed());
                success = true;
                return(new BinaryDocValuesAnonymousInnerClassHelper4(this, bytesReader, reader));
            }
            finally
            {
                if (success)
                {
                    IOUtils.Close(data, index);
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(data, index);
                }
            }
        }
        private SortedDocValues LoadBytesVarSorted(FieldInfo field, IndexInput data, IndexInput index)
        {
            CodecUtil.CheckHeader(data, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT);
            CodecUtil.CheckHeader(index, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT);

            long maxAddress = index.ReadLong();
            PagedBytes bytes = new PagedBytes(16);
            bytes.Copy(data, maxAddress);
            PagedBytes.Reader bytesReader = bytes.Freeze(true);
            PackedInts.Reader addressReader = PackedInts.GetReader(index);
            PackedInts.Reader ordsReader = PackedInts.GetReader(index);

            int valueCount = addressReader.Size() - 1;
            RamBytesUsed_Renamed.AddAndGet(bytes.RamBytesUsed() + addressReader.RamBytesUsed() + ordsReader.RamBytesUsed());

            return CorrectBuggyOrds(new SortedDocValuesAnonymousInnerClassHelper2(this, bytesReader, addressReader, ordsReader, valueCount));
        }
예제 #24
0
 /// <summary>
 /// Seek {@code input} to the directory offset. </summary>
 protected internal virtual void SeekDir(IndexInput input, long dirOffset)
 {
     if (Version >= BlockTreeTermsWriter.VERSION_CHECKSUM)
     {
         input.Seek(input.Length() - CodecUtil.FooterLength() - 8);
         dirOffset = input.ReadLong();
     }
     else if (Version >= BlockTreeTermsWriter.VERSION_APPEND_ONLY)
     {
         input.Seek(input.Length() - 8);
         dirOffset = input.ReadLong();
     }
     input.Seek(dirOffset);
 }
 private int ReadHeader(IndexInput input)
 {
     int version = CodecUtil.CheckHeader(input, VariableGapTermsIndexWriter.CODEC_NAME,
         VariableGapTermsIndexWriter.VERSION_START, VariableGapTermsIndexWriter.VERSION_CURRENT);
     if (version < VariableGapTermsIndexWriter.VERSION_APPEND_ONLY)
     {
         _dirOffset = input.ReadLong();
     }
     return version;
 }
예제 #26
0
 protected override void SeekDir(IndexInput input, long dirOffset)
 {
     input.Seek(input.Length() - sizeof(long)/8);
     long offset = input.ReadLong();
     input.Seek(offset);
 }
        /// <summary>
        /// reads from legacy 3.x segments_N </summary>
        private SegmentCommitInfo ReadLegacySegmentInfo(Directory dir, int format, IndexInput input)
        {
            // check that it is a format we can understand
            if (format > Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS)
            {
                throw new IndexFormatTooOldException(input, format, Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS, Lucene3xSegmentInfoFormat.FORMAT_3_1);
            }
            if (format < Lucene3xSegmentInfoFormat.FORMAT_3_1)
            {
                throw new IndexFormatTooNewException(input, format, Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS, Lucene3xSegmentInfoFormat.FORMAT_3_1);
            }
            string version;
            if (format <= Lucene3xSegmentInfoFormat.FORMAT_3_1)
            {
                version = input.ReadString();
            }
            else
            {
                version = null;
            }

            string name = input.ReadString();

            int docCount = input.ReadInt();
            long delGen = input.ReadLong();

            int docStoreOffset = input.ReadInt();
            IDictionary<string, string> attributes = new Dictionary<string, string>();

            // parse the docstore stuff and shove it into attributes
            string docStoreSegment;
            bool docStoreIsCompoundFile;
            if (docStoreOffset != -1)
            {
                docStoreSegment = input.ReadString();
                docStoreIsCompoundFile = input.ReadByte() == SegmentInfo.YES;
                attributes[Lucene3xSegmentInfoFormat.DS_OFFSET_KEY] = Convert.ToString(docStoreOffset);
                attributes[Lucene3xSegmentInfoFormat.DS_NAME_KEY] = docStoreSegment;
                attributes[Lucene3xSegmentInfoFormat.DS_COMPOUND_KEY] = Convert.ToString(docStoreIsCompoundFile);
            }
            else
            {
                docStoreSegment = name;
                docStoreIsCompoundFile = false;
            }

            // pre-4.0 indexes write a byte if there is a single norms file
            byte b = input.ReadByte();

            //System.out.println("version=" + version + " name=" + name + " docCount=" + docCount + " delGen=" + delGen + " dso=" + docStoreOffset + " dss=" + docStoreSegment + " dssCFs=" + docStoreIsCompoundFile + " b=" + b + " format=" + format);

            Debug.Assert(1 == b, "expected 1 but was: " + b + " format: " + format);
            int numNormGen = input.ReadInt();
            IDictionary<int, long> normGen;
            if (numNormGen == SegmentInfo.NO)
            {
                normGen = null;
            }
            else
            {
                normGen = new Dictionary<int, long>();
                for (int j = 0; j < numNormGen; j++)
                {
                    normGen[j] = input.ReadLong();
                }
            }
            bool isCompoundFile = input.ReadByte() == SegmentInfo.YES;

            int delCount = input.ReadInt();
            Debug.Assert(delCount <= docCount);

            bool hasProx = input.ReadByte() == 1;

            IDictionary<string, string> diagnostics = input.ReadStringStringMap();

            if (format <= Lucene3xSegmentInfoFormat.FORMAT_HAS_VECTORS)
            {
                // NOTE: unused
                int hasVectors = input.ReadByte();
            }

            // Replicate logic from 3.x's SegmentInfo.files():
            ISet<string> files = new HashSet<string>();
            if (isCompoundFile)
            {
                files.Add(IndexFileNames.SegmentFileName(name, "", IndexFileNames.COMPOUND_FILE_EXTENSION));
            }
            else
            {
                AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xFieldInfosReader.FIELD_INFOS_EXTENSION));
                AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.FREQ_EXTENSION));
                AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.PROX_EXTENSION));
                AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.TERMS_EXTENSION));
                AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION));
                AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xNormsProducer.NORMS_EXTENSION));
            }

            if (docStoreOffset != -1)
            {
                if (docStoreIsCompoundFile)
                {
                    files.Add(IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION));
                }
                else
                {
                    files.Add(IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION));
                    files.Add(IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION));
                    AddIfExists(dir, files, IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION));
                    AddIfExists(dir, files, IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION));
                    AddIfExists(dir, files, IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION));
                }
            }
            else if (!isCompoundFile)
            {
                files.Add(IndexFileNames.SegmentFileName(name, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION));
                files.Add(IndexFileNames.SegmentFileName(name, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION));
                AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION));
                AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION));
                AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION));
            }

            // parse the normgen stuff and shove it into attributes
            if (normGen != null)
            {
                attributes[Lucene3xSegmentInfoFormat.NORMGEN_KEY] = Convert.ToString(numNormGen);
                foreach (KeyValuePair<int, long> ent in normGen)
                {
                    long gen = ent.Value;
                    if (gen >= SegmentInfo.YES)
                    {
                        // Definitely a separate norm file, with generation:
                        files.Add(IndexFileNames.FileNameFromGeneration(name, "s" + ent.Key, gen));
                        attributes[Lucene3xSegmentInfoFormat.NORMGEN_PREFIX + ent.Key] = Convert.ToString(gen);
                    }
                    else if (gen == SegmentInfo.NO)
                    {
                        // No separate norm
                    }
                    else
                    {
                        // We should have already hit indexformat too old exception
                        Debug.Assert(false);
                    }
                }
            }

            SegmentInfo info = new SegmentInfo(dir, version, name, docCount, isCompoundFile, null, diagnostics, CollectionsHelper.UnmodifiableMap(attributes));
            info.Files = files;

            SegmentCommitInfo infoPerCommit = new SegmentCommitInfo(info, delCount, delGen, -1);
            return infoPerCommit;
        }
예제 #28
0
        /// <summary> Construct a new SegmentInfo instance by reading a
        /// previously saved SegmentInfo from input.
        ///
        /// </summary>
        /// <param name="dir">directory to load from
        /// </param>
        /// <param name="format">format of the segments info file
        /// </param>
        /// <param name="input">input handle to read segment info from
        /// </param>
        internal SegmentInfo(Directory dir, int format, IndexInput input)
        {
            this.dir = dir;
            name     = input.ReadString();
            docCount = input.ReadInt();
            if (format <= SegmentInfos.FORMAT_LOCKLESS)
            {
                delGen = input.ReadLong();
                if (format <= SegmentInfos.FORMAT_SHARED_DOC_STORE)
                {
                    docStoreOffset = input.ReadInt();
                    if (docStoreOffset != -1)
                    {
                        docStoreSegment        = input.ReadString();
                        docStoreIsCompoundFile = (1 == input.ReadByte());
                    }
                    else
                    {
                        docStoreSegment        = name;
                        docStoreIsCompoundFile = false;
                    }
                }
                else
                {
                    docStoreOffset         = -1;
                    docStoreSegment        = name;
                    docStoreIsCompoundFile = false;
                }
                if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE)
                {
                    hasSingleNormFile = (1 == input.ReadByte());
                }
                else
                {
                    hasSingleNormFile = false;
                }
                int numNormGen = input.ReadInt();
                if (numNormGen == NO)
                {
                    normGen = null;
                }
                else
                {
                    normGen = new long[numNormGen];
                    for (int j = 0; j < numNormGen; j++)
                    {
                        normGen[j] = input.ReadLong();
                    }
                }
                isCompoundFile = (sbyte)input.ReadByte();
                preLockless    = (isCompoundFile == CHECK_DIR);
                if (format <= SegmentInfos.FORMAT_DEL_COUNT)
                {
                    delCount = input.ReadInt();
                    System.Diagnostics.Debug.Assert(delCount <= docCount);
                }
                else
                {
                    delCount = -1;
                }
                if (format <= SegmentInfos.FORMAT_HAS_PROX)
                {
                    hasProx = input.ReadByte() == 1;
                }
                else
                {
                    hasProx = true;
                }

                if (format <= SegmentInfos.FORMAT_DIAGNOSTICS)
                {
                    diagnostics = input.ReadStringStringMap();
                }
                else
                {
                    diagnostics = new Dictionary <string, string>();
                }
            }
            else
            {
                delGen                 = CHECK_DIR;
                normGen                = null;
                isCompoundFile         = (sbyte)(CHECK_DIR);
                preLockless            = true;
                hasSingleNormFile      = false;
                docStoreOffset         = -1;
                docStoreIsCompoundFile = false;
                docStoreSegment        = null;
                delCount               = -1;
                hasProx                = true;
                diagnostics            = new Dictionary <string, string>();
            }
        }
예제 #29
0
        private void ReadFields(IndexInput meta, FieldInfos infos)
        {
            int fieldNumber = meta.ReadVInt();

            while (fieldNumber != -1)
            {
                // check should be: infos.fieldInfo(fieldNumber) != null, which incorporates negative check
                // but docvalues updates are currently buggy here (loading extra stuff, etc): LUCENE-5616
                if (fieldNumber < 0)
                {
                    // trickier to validate more: because we re-use for norms, because we use multiple entries
                    // for "composite" types like sortedset, etc.
                    throw new CorruptIndexException("Invalid field number: " + fieldNumber + ", input=" + meta);
                }
                int fieldType = meta.ReadByte();
                if (fieldType == NUMBER)
                {
                    var entry = new NumericEntry {
                        Offset = meta.ReadLong(), Format = (sbyte)meta.ReadByte()
                    };
                    switch (entry.Format)
                    {
                    case DELTA_COMPRESSED:
                    case TABLE_COMPRESSED:
                    case GCD_COMPRESSED:
                    case UNCOMPRESSED:
                        break;

                    default:
                        throw new CorruptIndexException("Unknown format: " + entry.Format + ", input=" + meta);
                    }
                    if (entry.Format != UNCOMPRESSED)
                    {
                        entry.PackedIntsVersion = meta.ReadVInt();
                    }
                    Numerics[fieldNumber] = entry;
                }
                else if (fieldType == BYTES)
                {
                    BinaryEntry entry = new BinaryEntry();
                    entry.Offset    = meta.ReadLong();
                    entry.NumBytes  = meta.ReadLong();
                    entry.MinLength = meta.ReadVInt();
                    entry.MaxLength = meta.ReadVInt();
                    if (entry.MinLength != entry.MaxLength)
                    {
                        entry.PackedIntsVersion = meta.ReadVInt();
                        entry.BlockSize         = meta.ReadVInt();
                    }
                    Binaries[fieldNumber] = entry;
                }
                else if (fieldType == FST)
                {
                    FSTEntry entry = new FSTEntry();
                    entry.Offset      = meta.ReadLong();
                    entry.NumOrds     = meta.ReadVLong();
                    Fsts[fieldNumber] = entry;
                }
                else
                {
                    throw new CorruptIndexException("invalid entry type: " + fieldType + ", input=" + meta);
                }
                fieldNumber = meta.ReadVInt();
            }
        }
예제 #30
0
파일: CodecUtil.cs 프로젝트: wwb/lucenenet
 /// <summary>
 /// Returns (but does not validate) the checksum previously written by <seealso cref="#checkFooter"/>. </summary>
 /// <returns> actual checksum value </returns>
 /// <exception cref="IOException"> if the footer is invalid </exception>
 public static long RetrieveChecksum(IndexInput @in)
 {
     @in.Seek(@in.Length() - FooterLength());
     ValidateFooter(@in);
     return(@in.ReadLong());
 }
예제 #31
0
        internal SegmentTermEnum(IndexInput i, FieldInfos fis, bool isi)
        {
            input         = i;
            fieldInfos    = fis;
            isIndex       = isi;
            maxSkipLevels = 1;             // use single-level skip lists for formats > -3

            int firstInt = input.ReadInt();

            if (firstInt >= 0)
            {
                // original-format file, without explicit format version number
                format = 0;
                size   = firstInt;

                // back-compatible settings
                indexInterval = 128;
                skipInterval  = System.Int32.MaxValue;                // switch off skipTo optimization
            }
            else
            {
                // we have a format version number
                format = firstInt;

                // check that it is a format we can understand
                if (format < TermInfosWriter.FORMAT_CURRENT)
                {
                    throw new CorruptIndexException("Unknown format version:" + format + " expected " + TermInfosWriter.FORMAT_CURRENT + " or higher");
                }

                size = input.ReadLong();                 // read the size

                if (format == -1)
                {
                    if (!isIndex)
                    {
                        indexInterval        = input.ReadInt();
                        formatM1SkipInterval = input.ReadInt();
                    }
                    // switch off skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in
                    // skipTo implementation of these versions
                    skipInterval = System.Int32.MaxValue;
                }
                else
                {
                    indexInterval = input.ReadInt();
                    skipInterval  = input.ReadInt();
                    if (format <= TermInfosWriter.FORMAT)
                    {
                        // this new format introduces multi-level skipping
                        maxSkipLevels = input.ReadInt();
                    }
                }
                System.Diagnostics.Debug.Assert(indexInterval > 0, "indexInterval=" + indexInterval + " is negative; must be > 0");
                System.Diagnostics.Debug.Assert(skipInterval > 0, "skipInterval=" + skipInterval + " is negative; must be > 0");
            }
            if (format > TermInfosWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
            {
                termBuffer.SetPreUTF8Strings();
                scanBuffer.SetPreUTF8Strings();
                prevBuffer.SetPreUTF8Strings();
            }
        }
예제 #32
0
 /// <summary>
 /// Reads index file header. </summary>
 protected internal virtual int ReadIndexHeader(IndexInput input)
 {
     int version = CodecUtil.CheckHeader(input, BlockTreeTermsWriter.TERMS_INDEX_CODEC_NAME, BlockTreeTermsWriter.VERSION_START, BlockTreeTermsWriter.VERSION_CURRENT);
     if (version < BlockTreeTermsWriter.VERSION_APPEND_ONLY)
     {
         IndexDirOffset = input.ReadLong();
     }
     return version;
 }
 private NumericDocValues LoadDoubleField(FieldInfo field, IndexInput input)
 {
     CodecUtil.CheckHeader(input, Lucene40DocValuesFormat.FLOATS_CODEC_NAME, Lucene40DocValuesFormat.FLOATS_VERSION_START, Lucene40DocValuesFormat.FLOATS_VERSION_CURRENT);
     int valueSize = input.ReadInt();
     if (valueSize != 8)
     {
         throw new CorruptIndexException("invalid valueSize: " + valueSize);
     }
     int maxDoc = State.SegmentInfo.DocCount;
     long[] values = new long[maxDoc];
     for (int i = 0; i < values.Length; i++)
     {
         values[i] = input.ReadLong();
     }
     RamBytesUsed_Renamed.AddAndGet(RamUsageEstimator.SizeOf(values));
     return new NumericDocValuesAnonymousInnerClassHelper8(this, values);
 }
예제 #34
0
        private void ReadFields(IndexInput meta, FieldInfos infos)
        {
            int fieldNumber = meta.ReadVInt();

            while (fieldNumber != -1)
            {
                int fieldType = meta.ReadByte();
                if (fieldType == NUMBER)
                {
                    var entry = new NumericEntry {
                        offset = meta.ReadLong(), missingOffset = meta.ReadLong()
                    };
                    if (entry.missingOffset != -1)
                    {
                        entry.missingBytes = meta.ReadLong();
                    }
                    else
                    {
                        entry.missingBytes = 0;
                    }
                    entry.format = meta.ReadByte();
                    switch (entry.format)
                    {
                    case DELTA_COMPRESSED:
                    case TABLE_COMPRESSED:
                    case GCD_COMPRESSED:
                    case UNCOMPRESSED:
                        break;

                    default:
                        throw new CorruptIndexException("Unknown format: " + entry.format + ", input=" + meta);
                    }
                    if (entry.format != UNCOMPRESSED)
                    {
                        entry.packedIntsVersion = meta.ReadVInt();
                    }
                    numerics[fieldNumber] = entry;
                }
                else if (fieldType == BYTES)
                {
                    var entry = new BinaryEntry
                    {
                        offset        = meta.ReadLong(),
                        numBytes      = meta.ReadLong(),
                        missingOffset = meta.ReadLong()
                    };
                    if (entry.missingOffset != -1)
                    {
                        entry.missingBytes = meta.ReadLong();
                    }
                    else
                    {
                        entry.missingBytes = 0;
                    }
                    entry.minLength = meta.ReadVInt();
                    entry.maxLength = meta.ReadVInt();
                    if (entry.minLength != entry.maxLength)
                    {
                        entry.packedIntsVersion = meta.ReadVInt();
                        entry.blockSize         = meta.ReadVInt();
                    }
                    binaries[fieldNumber] = entry;
                }
                else if (fieldType == FST)
                {
                    var entry = new FSTEntry {
                        offset = meta.ReadLong(), numOrds = meta.ReadVLong()
                    };
                    fsts[fieldNumber] = entry;
                }
                else
                {
                    throw new CorruptIndexException("invalid entry type: " + fieldType + ", input=" + meta);
                }
                fieldNumber = meta.ReadVInt();
            }
        }
예제 #35
0
        public SegmentTermEnum(IndexInput i, FieldInfos fis, bool isi)
        {
            Input = i;
            FieldInfos = fis;
            IsIndex = isi;
            MaxSkipLevels = 1; // use single-level skip lists for formats > -3

            int firstInt = Input.ReadInt();
            if (firstInt >= 0)
            {
                // original-format file, without explicit format version number
                Format = 0;
                Size = firstInt;

                // back-compatible settings
                IndexInterval = 128;
                SkipInterval = int.MaxValue; // switch off skipTo optimization
            }
            else
            {
                // we have a format version number
                Format = firstInt;

                // check that it is a format we can understand
                if (Format > FORMAT_MINIMUM)
                {
                    throw new IndexFormatTooOldException(Input, Format, FORMAT_MINIMUM, FORMAT_CURRENT);
                }
                if (Format < FORMAT_CURRENT)
                {
                    throw new IndexFormatTooNewException(Input, Format, FORMAT_MINIMUM, FORMAT_CURRENT);
                }

                Size = Input.ReadLong(); // read the size

                IndexInterval = Input.ReadInt();
                SkipInterval = Input.ReadInt();
                MaxSkipLevels = Input.ReadInt();
                Debug.Assert(IndexInterval > 0, "indexInterval=" + IndexInterval + " is negative; must be > 0");
                Debug.Assert(SkipInterval > 0, "skipInterval=" + SkipInterval + " is negative; must be > 0");
            }
        }
예제 #36
0
        /// <summary>
        /// reads from legacy 3.x segments_N </summary>
        private SegmentCommitInfo ReadLegacySegmentInfo(Directory dir, int format, IndexInput input)
        {
            // check that it is a format we can understand
            if (format > Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS)
            {
                throw new IndexFormatTooOldException(input, format, Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS, Lucene3xSegmentInfoFormat.FORMAT_3_1);
            }
            if (format < Lucene3xSegmentInfoFormat.FORMAT_3_1)
            {
                throw new IndexFormatTooNewException(input, format, Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS, Lucene3xSegmentInfoFormat.FORMAT_3_1);
            }
            string version;

            if (format <= Lucene3xSegmentInfoFormat.FORMAT_3_1)
            {
                version = input.ReadString();
            }
            else
            {
                version = null;
            }

            string name = input.ReadString();

            int  docCount = input.ReadInt();
            long delGen   = input.ReadLong();

            int docStoreOffset = input.ReadInt();
            IDictionary <string, string> attributes = new Dictionary <string, string>();

            // parse the docstore stuff and shove it into attributes
            string docStoreSegment;
            bool   docStoreIsCompoundFile;

            if (docStoreOffset != -1)
            {
                docStoreSegment        = input.ReadString();
                docStoreIsCompoundFile = input.ReadByte() == SegmentInfo.YES;
                attributes[Lucene3xSegmentInfoFormat.DS_OFFSET_KEY]   = Convert.ToString(docStoreOffset);
                attributes[Lucene3xSegmentInfoFormat.DS_NAME_KEY]     = docStoreSegment;
                attributes[Lucene3xSegmentInfoFormat.DS_COMPOUND_KEY] = Convert.ToString(docStoreIsCompoundFile);
            }
            else
            {
                docStoreSegment        = name;
                docStoreIsCompoundFile = false;
            }

            // pre-4.0 indexes write a byte if there is a single norms file
            byte b = input.ReadByte();

            //System.out.println("version=" + version + " name=" + name + " docCount=" + docCount + " delGen=" + delGen + " dso=" + docStoreOffset + " dss=" + docStoreSegment + " dssCFs=" + docStoreIsCompoundFile + " b=" + b + " format=" + format);

            Debug.Assert(1 == b, "expected 1 but was: " + b + " format: " + format);
            int numNormGen = input.ReadInt();
            IDictionary <int, long> normGen;

            if (numNormGen == SegmentInfo.NO)
            {
                normGen = null;
            }
            else
            {
                normGen = new Dictionary <int, long>();
                for (int j = 0; j < numNormGen; j++)
                {
                    normGen[j] = input.ReadLong();
                }
            }
            bool isCompoundFile = input.ReadByte() == SegmentInfo.YES;

            int delCount = input.ReadInt();

            Debug.Assert(delCount <= docCount);

            bool hasProx = input.ReadByte() == 1;

            IDictionary <string, string> diagnostics = input.ReadStringStringMap();

            if (format <= Lucene3xSegmentInfoFormat.FORMAT_HAS_VECTORS)
            {
                // NOTE: unused
                int hasVectors = input.ReadByte();
            }

            // Replicate logic from 3.x's SegmentInfo.files():
            ISet <string> files = new HashSet <string>();

            if (isCompoundFile)
            {
                files.Add(IndexFileNames.SegmentFileName(name, "", IndexFileNames.COMPOUND_FILE_EXTENSION));
            }
            else
            {
                AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xFieldInfosReader.FIELD_INFOS_EXTENSION));
                AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.FREQ_EXTENSION));
                AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.PROX_EXTENSION));
                AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.TERMS_EXTENSION));
                AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION));
                AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xNormsProducer.NORMS_EXTENSION));
            }

            if (docStoreOffset != -1)
            {
                if (docStoreIsCompoundFile)
                {
                    files.Add(IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION));
                }
                else
                {
                    files.Add(IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION));
                    files.Add(IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION));
                    AddIfExists(dir, files, IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION));
                    AddIfExists(dir, files, IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION));
                    AddIfExists(dir, files, IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION));
                }
            }
            else if (!isCompoundFile)
            {
                files.Add(IndexFileNames.SegmentFileName(name, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION));
                files.Add(IndexFileNames.SegmentFileName(name, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION));
                AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION));
                AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION));
                AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION));
            }

            // parse the normgen stuff and shove it into attributes
            if (normGen != null)
            {
                attributes[Lucene3xSegmentInfoFormat.NORMGEN_KEY] = Convert.ToString(numNormGen);
                foreach (KeyValuePair <int, long> ent in normGen)
                {
                    long gen = ent.Value;
                    if (gen >= SegmentInfo.YES)
                    {
                        // Definitely a separate norm file, with generation:
                        files.Add(IndexFileNames.FileNameFromGeneration(name, "s" + ent.Key, gen));
                        attributes[Lucene3xSegmentInfoFormat.NORMGEN_PREFIX + ent.Key] = Convert.ToString(gen);
                    }
                    else if (gen == SegmentInfo.NO)
                    {
                        // No separate norm
                    }
                    else
                    {
                        // We should have already hit indexformat too old exception
                        Debug.Assert(false);
                    }
                }
            }

            SegmentInfo info = new SegmentInfo(dir, version, name, docCount, isCompoundFile, null, diagnostics, CollectionsHelper.UnmodifiableMap(attributes));

            info.Files = files;

            SegmentCommitInfo infoPerCommit = new SegmentCommitInfo(info, delCount, delGen, -1);

            return(infoPerCommit);
        }
 private NumericDocValues LoadVarIntsField(FieldInfo field, IndexInput input)
 {
     CodecUtil.CheckHeader(input, Lucene40DocValuesFormat.VAR_INTS_CODEC_NAME, Lucene40DocValuesFormat.VAR_INTS_VERSION_START, Lucene40DocValuesFormat.VAR_INTS_VERSION_CURRENT);
     byte header = input.ReadByte();
     if (header == Lucene40DocValuesFormat.VAR_INTS_FIXED_64)
     {
         int maxDoc = State.SegmentInfo.DocCount;
         long[] values = new long[maxDoc];
         for (int i = 0; i < values.Length; i++)
         {
             values[i] = input.ReadLong();
         }
         RamBytesUsed_Renamed.AddAndGet(RamUsageEstimator.SizeOf(values));
         return new NumericDocValuesAnonymousInnerClassHelper(this, values);
     }
     else if (header == Lucene40DocValuesFormat.VAR_INTS_PACKED)
     {
         long minValue = input.ReadLong();
         long defaultValue = input.ReadLong();
         PackedInts.Reader reader = PackedInts.GetReader(input);
         RamBytesUsed_Renamed.AddAndGet(reader.RamBytesUsed());
         return new NumericDocValuesAnonymousInnerClassHelper2(this, minValue, defaultValue, reader);
     }
     else
     {
         throw new CorruptIndexException("invalid VAR_INTS header byte: " + header + " (resource=" + input + ")");
     }
 }
        private void SeekDir(IndexInput input, long dirOffset)
        {
            if (_version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM)
            {
                input.Seek(input.Length() - CodecUtil.FooterLength() - 8);
                dirOffset = input.ReadLong();

            }
            else if (_version >= FixedGapTermsIndexWriter.VERSION_APPEND_ONLY)
            {
                input.Seek(input.Length() - 8);
                dirOffset = input.ReadLong();
            }

            input.Seek(dirOffset);
        }
예제 #39
0
            public System.Object Run(IndexCommit commit)
            {
                if (commit != null)
                {
                    if (directory != commit.Directory)
                    {
                        throw new System.IO.IOException("the specified commit does not match the specified Directory");
                    }
                    return(DoBody(commit.SegmentsFileName));
                }

                System.String segmentFileName   = null;
                long          lastGen           = -1;
                long          gen               = 0;
                int           genLookaheadCount = 0;

                System.IO.IOException exc = null;
                bool retry = false;

                int method = 0;

                // Loop until we succeed in calling doBody() without
                // hitting an IOException.  An IOException most likely
                // means a commit was in process and has finished, in
                // the time it took us to load the now-old infos files
                // (and segments files).  It's also possible it's a
                // true error (corrupt index).  To distinguish these,
                // on each retry we must see "forward progress" on
                // which generation we are trying to load.  If we
                // don't, then the original error is real and we throw
                // it.

                // We have three methods for determining the current
                // generation.  We try the first two in parallel, and
                // fall back to the third when necessary.

                while (true)
                {
                    if (0 == method)
                    {
                        // Method 1: list the directory and use the highest
                        // segments_N file.  This method works well as long
                        // as there is no stale caching on the directory
                        // contents (NOTE: NFS clients often have such stale
                        // caching):
                        System.String[] files = null;

                        long genA = -1;

                        files = directory.ListAll();

                        if (files != null)
                        {
                            genA = Lucene.Net.Index.SegmentInfos.GetCurrentSegmentGeneration(files);
                        }

                        Lucene.Net.Index.SegmentInfos.Message("directory listing genA=" + genA);

                        // Method 2: open segments.gen and read its
                        // contents.  Then we take the larger of the two
                        // gens.  This way, if either approach is hitting
                        // a stale cache (NFS) we have a better chance of
                        // getting the right generation.
                        long genB = -1;
                        for (int i = 0; i < Lucene.Net.Index.SegmentInfos.defaultGenFileRetryCount; i++)
                        {
                            IndexInput genInput = null;
                            try
                            {
                                genInput = directory.OpenInput(IndexFileNames.SEGMENTS_GEN);
                            }
                            catch (System.IO.FileNotFoundException e)
                            {
                                Lucene.Net.Index.SegmentInfos.Message("segments.gen open: FileNotFoundException " + e);
                                break;
                            }
                            catch (System.IO.IOException e)
                            {
                                Lucene.Net.Index.SegmentInfos.Message("segments.gen open: IOException " + e);
                            }

                            if (genInput != null)
                            {
                                try
                                {
                                    int version = genInput.ReadInt();
                                    if (version == Lucene.Net.Index.SegmentInfos.FORMAT_LOCKLESS)
                                    {
                                        long gen0 = genInput.ReadLong();
                                        long gen1 = genInput.ReadLong();
                                        Lucene.Net.Index.SegmentInfos.Message("fallback check: " + gen0 + "; " + gen1);
                                        if (gen0 == gen1)
                                        {
                                            // The file is consistent.
                                            genB = gen0;
                                            break;
                                        }
                                    }
                                }
                                catch (System.IO.IOException)
                                {
                                    // will retry
                                }
                                finally
                                {
                                    genInput.Close();
                                }
                            }

                            System.Threading.Thread.Sleep(new TimeSpan((System.Int64) 10000 * Lucene.Net.Index.SegmentInfos.defaultGenFileRetryPauseMsec));
                        }

                        Lucene.Net.Index.SegmentInfos.Message(IndexFileNames.SEGMENTS_GEN + " check: genB=" + genB);

                        // Pick the larger of the two gen's:
                        if (genA > genB)
                        {
                            gen = genA;
                        }
                        else
                        {
                            gen = genB;
                        }

                        if (gen == -1)
                        {
                            throw new System.IO.FileNotFoundException("no segments* file found in " + directory + ": files:" + string.Join(" ", files));
                        }
                    }

                    // Third method (fallback if first & second methods
                    // are not reliable): since both directory cache and
                    // file contents cache seem to be stale, just
                    // advance the generation.
                    if (1 == method || (0 == method && lastGen == gen && retry))
                    {
                        method = 1;

                        if (genLookaheadCount < Lucene.Net.Index.SegmentInfos.defaultGenLookaheadCount)
                        {
                            gen++;
                            genLookaheadCount++;
                            Lucene.Net.Index.SegmentInfos.Message("look ahead increment gen to " + gen);
                        }
                    }

                    if (lastGen == gen)
                    {
                        // This means we're about to try the same
                        // segments_N last tried.  This is allowed,
                        // exactly once, because writer could have been in
                        // the process of writing segments_N last time.

                        if (retry)
                        {
                            // OK, we've tried the same segments_N file
                            // twice in a row, so this must be a real
                            // error.  We throw the original exception we
                            // got.
                            throw exc;
                        }

                        retry = true;
                    }
                    else if (0 == method)
                    {
                        // Segment file has advanced since our last loop, so
                        // reset retry:
                        retry = false;
                    }

                    lastGen = gen;

                    segmentFileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen);

                    try
                    {
                        System.Object v = DoBody(segmentFileName);
                        Lucene.Net.Index.SegmentInfos.Message("success on " + segmentFileName);

                        return(v);
                    }
                    catch (System.IO.IOException err)
                    {
                        // Save the original root cause:
                        if (exc == null)
                        {
                            exc = err;
                        }

                        Lucene.Net.Index.SegmentInfos.Message("primary Exception on '" + segmentFileName + "': " + err + "'; will retry: retry=" + retry + "; gen = " + gen);

                        if (!retry && gen > 1)
                        {
                            // This is our first time trying this segments
                            // file (because retry is false), and, there is
                            // possibly a segments_(N-1) (because gen > 1).
                            // So, check if the segments_(N-1) exists and
                            // try it if so:
                            System.String prevSegmentFileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen - 1);

                            bool prevExists;
                            prevExists = directory.FileExists(prevSegmentFileName);

                            if (prevExists)
                            {
                                Lucene.Net.Index.SegmentInfos.Message("fallback to prior segment file '" + prevSegmentFileName + "'");
                                try
                                {
                                    System.Object v = DoBody(prevSegmentFileName);
                                    if (exc != null)
                                    {
                                        Lucene.Net.Index.SegmentInfos.Message("success on fallback " + prevSegmentFileName);
                                    }
                                    return(v);
                                }
                                catch (System.IO.IOException err2)
                                {
                                    Lucene.Net.Index.SegmentInfos.Message("secondary Exception on '" + prevSegmentFileName + "': " + err2 + "'; will retry");
                                }
                            }
                        }
                    }
                }
            }