Example #1
0
        /// <summary>
        /// Retrieve the length (in bytes) of the tvd and tvf
        ///  entries for the next numDocs starting with
        ///  startDocID.  this is used for bulk copying when
        ///  merging segments, if the field numbers are
        ///  congruent.  Once this returns, the tvf & tvd streams
        ///  are seeked to the startDocID.
        /// </summary>
        internal void RawDocs(int[] tvdLengths, int[] tvfLengths, int startDocID, int numDocs)
        {
            if (Tvx == null)
            {
                CollectionsHelper.Fill(tvdLengths, 0);
                CollectionsHelper.Fill(tvfLengths, 0);
                return;
            }

            SeekTvx(startDocID);

            long tvdPosition = Tvx.ReadLong();

            Tvd.Seek(tvdPosition);

            long tvfPosition = Tvx.ReadLong();

            Tvf.Seek(tvfPosition);

            long lastTvdPosition = tvdPosition;
            long lastTvfPosition = tvfPosition;

            int count = 0;

            while (count < numDocs)
            {
                int docID = startDocID + count + 1;
                Debug.Assert(docID <= NumTotalDocs);
                if (docID < NumTotalDocs)
                {
                    tvdPosition = Tvx.ReadLong();
                    tvfPosition = Tvx.ReadLong();
                }
                else
                {
                    tvdPosition = Tvd.Length();
                    tvfPosition = Tvf.Length();
                    Debug.Assert(count == numDocs - 1);
                }
                tvdLengths[count] = (int)(tvdPosition - lastTvdPosition);
                tvfLengths[count] = (int)(tvfPosition - lastTvfPosition);
                count++;
                lastTvdPosition = tvdPosition;
                lastTvfPosition = tvfPosition;
            }
        }
Example #2
0
        /// <summary>
        /// Sole constructor. </summary>
        public Lucene40StoredFieldsReader(Directory d, SegmentInfo si, FieldInfos fn, IOContext context)
        {
            string segment = si.Name;
            bool   success = false;

            FieldInfos = fn;
            try
            {
                FieldsStream = d.OpenInput(IndexFileNames.SegmentFileName(segment, "", Lucene40StoredFieldsWriter.FIELDS_EXTENSION), context);
                string indexStreamFN = IndexFileNames.SegmentFileName(segment, "", Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION);
                IndexStream = d.OpenInput(indexStreamFN, context);

                CodecUtil.CheckHeader(IndexStream, Lucene40StoredFieldsWriter.CODEC_NAME_IDX, Lucene40StoredFieldsWriter.VERSION_START, Lucene40StoredFieldsWriter.VERSION_CURRENT);
                CodecUtil.CheckHeader(FieldsStream, Lucene40StoredFieldsWriter.CODEC_NAME_DAT, Lucene40StoredFieldsWriter.VERSION_START, Lucene40StoredFieldsWriter.VERSION_CURRENT);
                Debug.Assert(Lucene40StoredFieldsWriter.HEADER_LENGTH_DAT == FieldsStream.FilePointer);
                Debug.Assert(Lucene40StoredFieldsWriter.HEADER_LENGTH_IDX == IndexStream.FilePointer);
                long indexSize = IndexStream.Length() - Lucene40StoredFieldsWriter.HEADER_LENGTH_IDX;
                this.Size_Renamed = (int)(indexSize >> 3);
                // Verify two sources of "maxDoc" agree:
                if (this.Size_Renamed != si.DocCount)
                {
                    throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + this.Size_Renamed + " but segmentInfo shows " + si.DocCount);
                }
                NumTotalDocs = (int)(indexSize >> 3);
                success      = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above. In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    try
                    {
                        Dispose();
                    } // ensure we throw our original exception
                    catch (Exception)
                    {
                    }
                }
            }
        }
        public virtual void CopyFile(Directory dir, string src, string dest)
        {
            IndexInput  @in  = dir.OpenInput(src, NewIOContext(Random()));
            IndexOutput @out = dir.CreateOutput(dest, NewIOContext(Random()));

            sbyte[] b         = new sbyte[1024];
            long    remainder = @in.Length();

            while (remainder > 0)
            {
                int len = (int)Math.Min(b.Length, remainder);
                @in.ReadBytes(b, 0, len);
                @out.WriteBytes(b, len);
                remainder -= len;
            }
            @in.Dispose();
            @out.Dispose();
        }
Example #4
0
        public virtual void TestAppend()
        {
            Directory newDir = NewDirectory();
            CompoundFileDirectory csw = new CompoundFileDirectory(newDir, "d.cfs", NewIOContext(Random()), true);
            int size = 5 + Random().Next(128);
            for (int j = 0; j < 2; j++)
            {
                IndexOutput os = csw.CreateOutput("seg_" + j + "_foo.txt", NewIOContext(Random()));
                for (int i = 0; i < size; i++)
                {
                    os.WriteInt(i * j);
                }
                os.Dispose();
                string[] listAll = newDir.ListAll();
                Assert.AreEqual(1, listAll.Length);
                Assert.AreEqual("d.cfs", listAll[0]);
            }
            CreateSequenceFile(Dir, "d1", (sbyte)0, 15);
            Dir.Copy(csw, "d1", "d1", NewIOContext(Random()));
            string[] listAll_ = newDir.ListAll();
            Assert.AreEqual(1, listAll_.Length);
            Assert.AreEqual("d.cfs", listAll_[0]);
            csw.Dispose();
            CompoundFileDirectory csr = new CompoundFileDirectory(newDir, "d.cfs", NewIOContext(Random()), false);
            for (int j = 0; j < 2; j++)
            {
                IndexInput openInput = csr.OpenInput("seg_" + j + "_foo.txt", NewIOContext(Random()));
                Assert.AreEqual(size * 4, openInput.Length());
                for (int i = 0; i < size; i++)
                {
                    Assert.AreEqual(i * j, openInput.ReadInt());
                }

                openInput.Dispose();
            }
            IndexInput expected = Dir.OpenInput("d1", NewIOContext(Random()));
            IndexInput actual = csr.OpenInput("d1", NewIOContext(Random()));
            AssertSameStreams("d1", expected, actual);
            AssertSameSeekBehavior("d1", expected, actual);
            expected.Dispose();
            actual.Dispose();
            csr.Dispose();
            newDir.Dispose();
        }
Example #5
0
 public bool MoveNext()
 {
     if (input.FilePointer < input.Length())
     {
         int code = input.ReadVInt();
         if ((code & 1) != 0)
         {
             field = input.ReadString();
         }
         int prefix = Number.URShift(code, 1);
         int suffix = input.ReadVInt();
         bytes.Grow(prefix + suffix);
         input.ReadBytes(bytes.Bytes, prefix, suffix);
         bytes.Length = prefix + suffix;
         term.Set(field, bytes);
         return(true);
     }
     return(false);
 }
Example #6
0
        private MemoryStream CompressStream(string fileName, long originalLength)
        {
            // unfortunately, deflate stream doesn't allow seek, and we need a seekable stream
            // to pass to the blob storage stuff, so we compress into a memory stream
            MemoryStream compressedStream = new MemoryStream();

            IndexInput indexInput = null;

            try
            {
                indexInput = CacheDirectory.OpenInput(fileName);
                using (var compressor = new DeflateStream(compressedStream, CompressionMode.Compress, true))
                {
                    // compress to compressedOutputStream
                    byte[] bytes = new byte[indexInput.Length()];
                    indexInput.ReadBytes(bytes, 0, (int)bytes.Length);
                    compressor.Write(bytes, 0, (int)bytes.Length);
                }

                // seek back to beginning of comrpessed stream
                compressedStream.Seek(0, SeekOrigin.Begin);

                Debug.WriteLine(string.Format("COMPRESSED {0} -> {1} {2}% to {3}",
                                              originalLength,
                                              compressedStream.Length,
                                              ((float)compressedStream.Length / (float)originalLength) * 100,
                                              _name));
            }
            catch
            {
                // release the compressed stream resources if an error occurs
                compressedStream.Dispose();
                throw;
            }
            finally
            {
                if (indexInput != null)
                {
                    indexInput.Close();
                }
            }
            return(compressedStream);
        }
Example #7
0
        private void AssertSameStreams(string msg, IndexInput expected, IndexInput test)
        {
            Assert.IsNotNull(expected, msg + " null expected");
            Assert.IsNotNull(test, msg + " null test");
            Assert.AreEqual(expected.Length(), test.Length(), msg + " length");
            Assert.AreEqual(expected.FilePointer, test.FilePointer, msg + " position");

            var expectedBuffer = new byte[512];
            var testBuffer = new byte[expectedBuffer.Length];

            long remainder = expected.Length() - expected.FilePointer;
            while (remainder > 0)
            {
                int readLen = (int)Math.Min(remainder, expectedBuffer.Length);
                expected.ReadBytes(expectedBuffer, 0, readLen);
                test.ReadBytes(testBuffer, 0, readLen);
                AssertEqualArrays(msg + ", remainder " + remainder, expectedBuffer, testBuffer, 0, readLen);
                remainder -= readLen;
            }
        }
Example #8
0
        public virtual void TestDataInputOutput()
        {
            Random random = Random();

            for (int iter = 0; iter < 5 * RANDOM_MULTIPLIER; iter++)
            {
                BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("testOverflow"));
                if (dir is MockDirectoryWrapper)
                {
                    ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
                }
                int         blockBits = TestUtil.NextInt(random, 1, 20);
                int         blockSize = 1 << blockBits;
                PagedBytes  p         = new PagedBytes(blockBits);
                IndexOutput @out      = dir.CreateOutput("foo", IOContext.DEFAULT);
                int         numBytes  = TestUtil.NextInt(Random(), 2, 10000000);

                byte[] answer = new byte[numBytes];
                Random().NextBytes(answer);
                int written = 0;
                while (written < numBytes)
                {
                    if (Random().Next(10) == 7)
                    {
                        @out.WriteByte(answer[written++]);
                    }
                    else
                    {
                        int chunk = Math.Min(Random().Next(1000), numBytes - written);
                        @out.WriteBytes(answer, written, chunk);
                        written += chunk;
                    }
                }

                @out.Dispose();
                IndexInput input = dir.OpenInput("foo", IOContext.DEFAULT);
                DataInput  @in   = (DataInput)input.Clone();

                p.Copy(input, input.Length());
                PagedBytes.Reader reader = p.Freeze(random.NextBoolean());

                byte[] verify = new byte[numBytes];
                int    read   = 0;
                while (read < numBytes)
                {
                    if (Random().Next(10) == 7)
                    {
                        verify[read++] = @in.ReadByte();
                    }
                    else
                    {
                        int chunk = Math.Min(Random().Next(1000), numBytes - read);
                        @in.ReadBytes(verify, read, chunk);
                        read += chunk;
                    }
                }
                Assert.IsTrue(Arrays.Equals(answer, verify));

                BytesRef slice = new BytesRef();
                for (int iter2 = 0; iter2 < 100; iter2++)
                {
                    int pos = random.Next(numBytes - 1);
                    int len = random.Next(Math.Min(blockSize + 1, numBytes - pos));
                    reader.FillSlice(slice, pos, len);
                    for (int byteUpto = 0; byteUpto < len; byteUpto++)
                    {
                        Assert.AreEqual(answer[pos + byteUpto], (byte)slice.Bytes[slice.Offset + byteUpto]);
                    }
                }
                input.Dispose();
                dir.Dispose();
            }
        }
        public Lucene3xStoredFieldsReader(Directory d, SegmentInfo si, FieldInfos fn, IOContext context)
        {
            string segment = Lucene3xSegmentInfoFormat.GetDocStoreSegment(si);
            int docStoreOffset = Lucene3xSegmentInfoFormat.GetDocStoreOffset(si);
            int size = si.DocCount;
            bool success = false;
            FieldInfos = fn;
            try
            {
                if (docStoreOffset != -1 && Lucene3xSegmentInfoFormat.GetDocStoreIsCompoundFile(si))
                {
                    d = StoreCFSReader = new CompoundFileDirectory(si.Dir, IndexFileNames.SegmentFileName(segment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), context, false);
                }
                else
                {
                    StoreCFSReader = null;
                }
                FieldsStream = d.OpenInput(IndexFileNames.SegmentFileName(segment, "", FIELDS_EXTENSION), context);
                string indexStreamFN = IndexFileNames.SegmentFileName(segment, "", FIELDS_INDEX_EXTENSION);
                IndexStream = d.OpenInput(indexStreamFN, context);

                Format = IndexStream.ReadInt();

                if (Format < FORMAT_MINIMUM)
                {
                    throw new IndexFormatTooOldException(IndexStream, Format, FORMAT_MINIMUM, FORMAT_CURRENT);
                }
                if (Format > FORMAT_CURRENT)
                {
                    throw new IndexFormatTooNewException(IndexStream, Format, FORMAT_MINIMUM, FORMAT_CURRENT);
                }

                long indexSize = IndexStream.Length() - FORMAT_SIZE;

                if (docStoreOffset != -1)
                {
                    // We read only a slice out of this shared fields file
                    this.DocStoreOffset = docStoreOffset;
                    this.Size = size;

                    // Verify the file is long enough to hold all of our
                    // docs
                    Debug.Assert(((int)(indexSize / 8)) >= size + this.DocStoreOffset, "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset);
                }
                else
                {
                    this.DocStoreOffset = 0;
                    this.Size = (int)(indexSize >> 3);
                    // Verify two sources of "maxDoc" agree:
                    if (this.Size != si.DocCount)
                    {
                        throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + this.Size + " but segmentInfo shows " + si.DocCount);
                    }
                }
                NumTotalDocs = (int)(indexSize >> 3);
                success = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above. In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    try
                    {
                        Dispose();
                    } // keep our original exception
                    catch (Exception t)
                    {
                    }
                }
            }
        }
Example #10
0
 public override long Length()
 {
     return(ii.Length());
 }
        protected override void Dispose(bool disposing)
        {
            _fileMutex.WaitOne();
            try
            {
                string fileName = _name;

                // make sure it's all written out
                _indexOutput.Flush();

                long originalLength = _indexOutput.Length;
                _indexOutput.Dispose();

                Stream blobStream;
#if COMPRESSBLOBS
                // optionally put a compressor around the blob stream
                if (_azureDirectory.ShouldCompressFile(_name))
                {
                    // unfortunately, deflate stream doesn't allow seek, and we need a seekable stream
                    // to pass to the blob storage stuff, so we compress into a memory stream
                    MemoryStream compressedStream = new MemoryStream();

                    try
                    {
                        IndexInput indexInput = CacheDirectory.OpenInput(fileName);
                        using (DeflateStream compressor = new DeflateStream(compressedStream, CompressionMode.Compress, true))
                        {
                            // compress to compressedOutputStream
                            byte[] bytes = new byte[indexInput.Length()];
                            indexInput.ReadBytes(bytes, 0, (int)bytes.Length);
                            compressor.Write(bytes, 0, (int)bytes.Length);
                        }
                        indexInput.Close();

                        // seek back to beginning of comrpessed stream
                        compressedStream.Seek(0, SeekOrigin.Begin);

                        Debug.WriteLine(string.Format("COMPRESSED {0} -> {1} {2}% to {3}",
                                                      originalLength,
                                                      compressedStream.Length,
                                                      ((float)compressedStream.Length / (float)originalLength) * 100,
                                                      _name));
                    }
                    catch
                    {
                        // release the compressed stream resources if an error occurs
                        compressedStream.Dispose();
                        throw;
                    }

                    blobStream = compressedStream;
                }
                else
#endif
                {
                    blobStream = new StreamInput(CacheDirectory.OpenInput(fileName));
                }

                try
                {
                    // push the blobStream up to the cloud
                    _blob.UploadFromStream(blobStream);

                    // set the metadata with the original index file properties
                    _blob.Metadata["CachedLength"]       = originalLength.ToString();
                    _blob.Metadata["CachedLastModified"] = CacheDirectory.FileModified(fileName).ToString();
                    _blob.SetMetadata();

                    Debug.WriteLine(string.Format("PUT {1} bytes to {0} in cloud", _name, blobStream.Length));
                }
                finally
                {
                    blobStream.Dispose();
                }

#if FULLDEBUG
                Debug.WriteLine(string.Format("CLOSED WRITESTREAM {0}", _name));
#endif
                // clean up
                _indexOutput   = null;
                _blobContainer = null;
                _blob          = null;
                GC.SuppressFinalize(this);
            }
            finally
            {
                _fileMutex.ReleaseMutex();
            }
        }
        public override FieldInfos Read(Directory directory, string segmentName, string segmentSuffix, IOContext iocontext)
        {
            string     fileName = IndexFileNames.SegmentFileName(segmentName, "", FIELD_INFOS_EXTENSION);
            IndexInput input    = directory.OpenInput(fileName, iocontext);

            bool success = false;

            try
            {
                int format = input.ReadVInt();

                if (format > FORMAT_MINIMUM)
                {
                    throw new IndexFormatTooOldException(input, format, FORMAT_MINIMUM, FORMAT_CURRENT);
                }
                if (format < FORMAT_CURRENT)
                {
                    throw new IndexFormatTooNewException(input, format, FORMAT_MINIMUM, FORMAT_CURRENT);
                }

                int         size  = input.ReadVInt(); //read in the size
                FieldInfo[] infos = new FieldInfo[size];

                for (int i = 0; i < size; i++)
                {
                    string name            = input.ReadString();
                    int    fieldNumber     = i;
                    byte   bits            = input.ReadByte();
                    bool   isIndexed       = (bits & IS_INDEXED) != 0;
                    bool   storeTermVector = (bits & STORE_TERMVECTOR) != 0;
                    bool   omitNorms       = (bits & OMIT_NORMS) != 0;
                    bool   storePayloads   = (bits & STORE_PAYLOADS) != 0;
                    FieldInfo.IndexOptions indexOptions;
                    if (!isIndexed)
                    {
                        indexOptions = default(FieldInfo.IndexOptions);
                    }
                    else if ((bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0)
                    {
                        indexOptions = FieldInfo.IndexOptions.DOCS_ONLY;
                    }
                    else if ((bits & OMIT_POSITIONS) != 0)
                    {
                        if (format <= FORMAT_OMIT_POSITIONS)
                        {
                            indexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS;
                        }
                        else
                        {
                            throw new CorruptIndexException("Corrupt fieldinfos, OMIT_POSITIONS set but format=" + format + " (resource: " + input + ")");
                        }
                    }
                    else
                    {
                        indexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
                    }

                    // LUCENE-3027: past indices were able to write
                    // storePayloads=true when omitTFAP is also true,
                    // which is invalid.  We correct that, here:
                    if (indexOptions != FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
                    {
                        storePayloads = false;
                    }
                    // LUCENE TO-DO
                    infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, null, isIndexed && !omitNorms ? FieldInfo.DocValuesType_e.NUMERIC : default(FieldInfo.DocValuesType_e), CollectionsHelper.EmptyMap <string, string>());
                }

                if (input.FilePointer != input.Length())
                {
                    throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.FilePointer + " vs size " + input.Length() + " (resource: " + input + ")");
                }
                FieldInfos fieldInfos = new FieldInfos(infos);
                success = true;
                return(fieldInfos);
            }
            finally
            {
                if (success)
                {
                    input.Dispose();
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(input);
                }
            }
        }
        internal TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos, int readBufferSize, int docStoreOffset, int size)
        {
            bool success = false;

            try
            {
                if (d.FileExists(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION))
                {
                    tvx    = d.OpenInput(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION, readBufferSize);
                    format = CheckValidFormat(tvx);
                    tvd    = d.OpenInput(segment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION, readBufferSize);
                    int tvdFormat = CheckValidFormat(tvd);
                    tvf = d.OpenInput(segment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION, readBufferSize);
                    int tvfFormat = CheckValidFormat(tvf);

                    System.Diagnostics.Debug.Assert(format == tvdFormat);
                    System.Diagnostics.Debug.Assert(format == tvfFormat);

                    if (format >= FORMAT_VERSION2)
                    {
                        System.Diagnostics.Debug.Assert((tvx.Length() - FORMAT_SIZE) % 16 == 0);
                        numTotalDocs = (int)(tvx.Length() >> 4);
                    }
                    else
                    {
                        System.Diagnostics.Debug.Assert((tvx.Length() - FORMAT_SIZE) % 8 == 0);
                        numTotalDocs = (int)(tvx.Length() >> 3);
                    }

                    if (-1 == docStoreOffset)
                    {
                        this.docStoreOffset = 0;
                        this.size           = numTotalDocs;
                        System.Diagnostics.Debug.Assert(size == 0 || numTotalDocs == size);
                    }
                    else
                    {
                        this.docStoreOffset = docStoreOffset;
                        this.size           = size;
                        // Verify the file is long enough to hold all of our
                        // docs
                        System.Diagnostics.Debug.Assert(numTotalDocs >= size + docStoreOffset, "numTotalDocs=" + numTotalDocs + " size=" + size + " docStoreOffset=" + docStoreOffset);
                    }
                }
                else
                {
                    // If all documents flushed in a segment had hit
                    // non-aborting exceptions, it's possible that
                    // FieldInfos.hasVectors returns true yet the term
                    // vector files don't exist.
                    format = 0;
                }


                this.fieldInfos = fieldInfos;
                success         = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above. In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    Dispose();
                }
            }
        }
        /// <summary>
        /// Sole constructor. </summary>
        public CompressingStoredFieldsReader(Directory d, SegmentInfo si, string segmentSuffix, FieldInfos fn, IOContext context, string formatName, CompressionMode compressionMode)
        {
            this.CompressionMode_Renamed = compressionMode;
            string segment = si.Name;
            bool success = false;
            FieldInfos = fn;
            NumDocs = si.DocCount;
            ChecksumIndexInput indexStream = null;
            try
            {
                string indexStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION);
                string fieldsStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, Lucene40StoredFieldsWriter.FIELDS_EXTENSION);
                // Load the index into memory
                indexStream = d.OpenChecksumInput(indexStreamFN, context);
                string codecNameIdx = formatName + CompressingStoredFieldsWriter.CODEC_SFX_IDX;
                Version_Renamed = CodecUtil.CheckHeader(indexStream, codecNameIdx, CompressingStoredFieldsWriter.VERSION_START, CompressingStoredFieldsWriter.VERSION_CURRENT);
                Debug.Assert(CodecUtil.HeaderLength(codecNameIdx) == indexStream.FilePointer);
                IndexReader = new CompressingStoredFieldsIndexReader(indexStream, si);

                long maxPointer = -1;

                if (Version_Renamed >= CompressingStoredFieldsWriter.VERSION_CHECKSUM)
                {
                    maxPointer = indexStream.ReadVLong();
                    CodecUtil.CheckFooter(indexStream);
                }
                else
                {
                    CodecUtil.CheckEOF(indexStream);
                }
                indexStream.Dispose();
                indexStream = null;

                // Open the data file and read metadata
                FieldsStream = d.OpenInput(fieldsStreamFN, context);
                if (Version_Renamed >= CompressingStoredFieldsWriter.VERSION_CHECKSUM)
                {
                    if (maxPointer + CodecUtil.FooterLength() != FieldsStream.Length())
                    {
                        throw new CorruptIndexException("Invalid fieldsStream maxPointer (file truncated?): maxPointer=" + maxPointer + ", length=" + FieldsStream.Length());
                    }
                }
                else
                {
                    maxPointer = FieldsStream.Length();
                }
                this.MaxPointer = maxPointer;
                string codecNameDat = formatName + CompressingStoredFieldsWriter.CODEC_SFX_DAT;
                int fieldsVersion = CodecUtil.CheckHeader(FieldsStream, codecNameDat, CompressingStoredFieldsWriter.VERSION_START, CompressingStoredFieldsWriter.VERSION_CURRENT);
                if (Version_Renamed != fieldsVersion)
                {
                    throw new CorruptIndexException("Version mismatch between stored fields index and data: " + Version_Renamed + " != " + fieldsVersion);
                }
                Debug.Assert(CodecUtil.HeaderLength(codecNameDat) == FieldsStream.FilePointer);

                if (Version_Renamed >= CompressingStoredFieldsWriter.VERSION_BIG_CHUNKS)
                {
                    ChunkSize_Renamed = FieldsStream.ReadVInt();
                }
                else
                {
                    ChunkSize_Renamed = -1;
                }
                PackedIntsVersion = FieldsStream.ReadVInt();
                Decompressor = compressionMode.NewDecompressor();
                this.Bytes = new BytesRef();

                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.CloseWhileHandlingException(this, indexStream);
                }
            }
        }
Example #15
0
 public override long Length()
 {
     return(main.Length());
 }
        /// <summary>
        /// Sole constructor. </summary>
        public Lucene40StoredFieldsReader(Directory d, SegmentInfo si, FieldInfos fn, IOContext context)
        {
            string segment = si.Name;
            bool success = false;
            FieldInfos = fn;
            try
            {
                FieldsStream = d.OpenInput(IndexFileNames.SegmentFileName(segment, "", Lucene40StoredFieldsWriter.FIELDS_EXTENSION), context);
                string indexStreamFN = IndexFileNames.SegmentFileName(segment, "", Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION);
                IndexStream = d.OpenInput(indexStreamFN, context);

                CodecUtil.CheckHeader(IndexStream, Lucene40StoredFieldsWriter.CODEC_NAME_IDX, Lucene40StoredFieldsWriter.VERSION_START, Lucene40StoredFieldsWriter.VERSION_CURRENT);
                CodecUtil.CheckHeader(FieldsStream, Lucene40StoredFieldsWriter.CODEC_NAME_DAT, Lucene40StoredFieldsWriter.VERSION_START, Lucene40StoredFieldsWriter.VERSION_CURRENT);
                Debug.Assert(Lucene40StoredFieldsWriter.HEADER_LENGTH_DAT == FieldsStream.FilePointer);
                Debug.Assert(Lucene40StoredFieldsWriter.HEADER_LENGTH_IDX == IndexStream.FilePointer);
                long indexSize = IndexStream.Length() - Lucene40StoredFieldsWriter.HEADER_LENGTH_IDX;
                this.Size_Renamed = (int)(indexSize >> 3);
                // Verify two sources of "maxDoc" agree:
                if (this.Size_Renamed != si.DocCount)
                {
                    throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + this.Size_Renamed + " but segmentInfo shows " + si.DocCount);
                }
                NumTotalDocs = (int)(indexSize >> 3);
                success = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above. In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    try
                    {
                        Dispose();
                    } // ensure we throw our original exception
                    catch (Exception)
                    {
                    }
                }
            }
        }
Example #17
0
 public override long Length()
 {
     return(_cacheDirIndexInput.Length());
 }
 /// <summary>
 /// Seek {@code input} to the directory offset. </summary>
 protected internal virtual void SeekDir(IndexInput input, long dirOffset)
 {
     if (Version >= BlockTreeTermsWriter.VERSION_CHECKSUM)
     {
         input.Seek(input.Length() - CodecUtil.FooterLength() - 8);
         dirOffset = input.ReadLong();
     }
     else if (Version >= BlockTreeTermsWriter.VERSION_APPEND_ONLY)
     {
         input.Seek(input.Length() - 8);
         dirOffset = input.ReadLong();
     }
     input.Seek(dirOffset);
 }
Example #19
0
 /// <summary>
 /// Returns (but does not validate) the checksum previously written by <seealso cref="#checkFooter"/>. </summary>
 /// <returns> actual checksum value </returns>
 /// <exception cref="IOException"> if the footer is invalid </exception>
 public static long RetrieveChecksum(IndexInput @in)
 {
     @in.Seek(@in.Length() - FooterLength());
     ValidateFooter(@in);
     return(@in.ReadLong());
 }
Example #20
0
        internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size)
        {
            bool success = false;

            isOriginal = true;
            try
            {
                fieldInfos = fn;

                cloneableFieldsStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_EXTENSION, readBufferSize);
                cloneableIndexStream  = d.OpenInput(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION, readBufferSize);

                // First version of fdx did not include a format
                // header, but, the first int will always be 0 in that
                // case
                int firstInt = cloneableIndexStream.ReadInt();
                format = firstInt == 0 ? 0 : firstInt;

                if (format > FieldsWriter.FORMAT_CURRENT)
                {
                    throw new CorruptIndexException("Incompatible format version: " + format + " expected " + FieldsWriter.FORMAT_CURRENT + " or lower");
                }

                formatSize = format > FieldsWriter.FORMAT ? 4 : 0;

                if (format < FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
                {
                    cloneableFieldsStream.SetModifiedUTF8StringsMode();
                }

                fieldsStream = (IndexInput)cloneableFieldsStream.Clone();

                long indexSize = cloneableIndexStream.Length() - formatSize;

                if (docStoreOffset != -1)
                {
                    // We read only a slice out of this shared fields file
                    this.docStoreOffset = docStoreOffset;
                    this.size           = size;

                    // Verify the file is long enough to hold all of our
                    // docs
                    System.Diagnostics.Debug.Assert(((int)(indexSize / 8)) >= size + this.docStoreOffset, "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset);
                }
                else
                {
                    this.docStoreOffset = 0;
                    this.size           = (int)(indexSize >> 3);
                }

                indexStream  = (IndexInput)cloneableIndexStream.Clone();
                numTotalDocs = (int)(indexSize >> 3);
                success      = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above. In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    Dispose();
                }
            }
        }
 protected override void SeekDir(IndexInput input, long dirOffset)
 {
     input.Seek(input.Length() - sizeof(long)/8);
     long offset = input.ReadLong();
     input.Seek(offset);
 }
Example #22
0
        // note: just like segmentreader in 3.x, we open up all the files here (including separate norms) up front.
        // but we just don't do any seeks or reading yet.
        public Lucene3xNormsProducer(Directory dir, SegmentInfo info, FieldInfos fields, IOContext context)
        {
            Directory separateNormsDir = info.Dir; // separate norms are never inside CFS

            Maxdoc = info.DocCount;
            string segmentName = info.Name;
            bool   success     = false;

            try
            {
                long nextNormSeek = NORMS_HEADER.Length; //skip header (header unused for now)
                foreach (FieldInfo fi in fields)
                {
                    if (fi.HasNorms())
                    {
                        string    fileName = GetNormFilename(info, fi.Number);
                        Directory d        = HasSeparateNorms(info, fi.Number) ? separateNormsDir : dir;

                        // singleNormFile means multiple norms share this file
                        bool       singleNormFile = IndexFileNames.MatchesExtension(fileName, NORMS_EXTENSION);
                        IndexInput normInput      = null;
                        long       normSeek;

                        if (singleNormFile)
                        {
                            normSeek = nextNormSeek;
                            if (SingleNormStream == null)
                            {
                                SingleNormStream = d.OpenInput(fileName, context);
                                OpenFiles.Add(SingleNormStream);
                            }
                            // All norms in the .nrm file can share a single IndexInput since
                            // they are only used in a synchronized context.
                            // If this were to change in the future, a clone could be done here.
                            normInput = SingleNormStream;
                        }
                        else
                        {
                            normInput = d.OpenInput(fileName, context);
                            OpenFiles.Add(normInput);
                            // if the segment was created in 3.2 or after, we wrote the header for sure,
                            // and don't need to do the sketchy file size check. otherwise, we check
                            // if the size is exactly equal to maxDoc to detect a headerless file.
                            // NOTE: remove this check in Lucene 5.0!
                            string version       = info.Version;
                            bool   isUnversioned = (version == null || StringHelper.VersionComparator.Compare(version, "3.2") < 0) && normInput.Length() == Maxdoc;
                            if (isUnversioned)
                            {
                                normSeek = 0;
                            }
                            else
                            {
                                normSeek = NORMS_HEADER.Length;
                            }
                        }
                        NormsDocValues norm = new NormsDocValues(this, normInput, normSeek);
                        Norms[fi.Name] = norm;
                        nextNormSeek  += Maxdoc; // increment also if some norms are separate
                    }
                }
                // TODO: change to a real check? see LUCENE-3619
                Debug.Assert(SingleNormStream == null || nextNormSeek == SingleNormStream.Length(), SingleNormStream != null ? "len: " + SingleNormStream.Length() + " expected: " + nextNormSeek : "null");
                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.CloseWhileHandlingException(OpenFiles);
                }
            }
            ramBytesUsed = new AtomicLong();
        }
        private void SeekDir(IndexInput input, long dirOffset)
        {
            if (_version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM)
            {
                input.Seek(input.Length() - CodecUtil.FooterLength() - 8);
                dirOffset = input.ReadLong();

            }
            else if (_version >= FixedGapTermsIndexWriter.VERSION_APPEND_ONLY)
            {
                input.Seek(input.Length() - 8);
                dirOffset = input.ReadLong();
            }

            input.Seek(dirOffset);
        }
Example #24
0
 public override long Length()
 {
     return(_indexInput.Length());
 }
        public Lucene3xTermVectorsReader(Directory d, SegmentInfo si, FieldInfos fieldInfos, IOContext context)
        {
            string segment        = Lucene3xSegmentInfoFormat.GetDocStoreSegment(si);
            int    docStoreOffset = Lucene3xSegmentInfoFormat.GetDocStoreOffset(si);
            int    size           = si.DocCount;

            bool success = false;

            try
            {
                if (docStoreOffset != -1 && Lucene3xSegmentInfoFormat.GetDocStoreIsCompoundFile(si))
                {
                    d = StoreCFSReader = new CompoundFileDirectory(si.Dir, IndexFileNames.SegmentFileName(segment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), context, false);
                }
                else
                {
                    StoreCFSReader = null;
                }
                string idxName = IndexFileNames.SegmentFileName(segment, "", VECTORS_INDEX_EXTENSION);
                Tvx    = d.OpenInput(idxName, context);
                Format = CheckValidFormat(Tvx);
                string fn = IndexFileNames.SegmentFileName(segment, "", VECTORS_DOCUMENTS_EXTENSION);
                Tvd = d.OpenInput(fn, context);
                int tvdFormat = CheckValidFormat(Tvd);
                fn  = IndexFileNames.SegmentFileName(segment, "", VECTORS_FIELDS_EXTENSION);
                Tvf = d.OpenInput(fn, context);
                int tvfFormat = CheckValidFormat(Tvf);

                Debug.Assert(Format == tvdFormat);
                Debug.Assert(Format == tvfFormat);

                NumTotalDocs = (int)(Tvx.Length() >> 4);

                if (-1 == docStoreOffset)
                {
                    this.DocStoreOffset = 0;
                    this.Size_Renamed   = NumTotalDocs;
                    Debug.Assert(size == 0 || NumTotalDocs == size);
                }
                else
                {
                    this.DocStoreOffset = docStoreOffset;
                    this.Size_Renamed   = size;
                    // Verify the file is long enough to hold all of our
                    // docs
                    Debug.Assert(NumTotalDocs >= size + docStoreOffset, "numTotalDocs=" + NumTotalDocs + " size=" + size + " docStoreOffset=" + docStoreOffset);
                }

                this.FieldInfos = fieldInfos;
                success         = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above. In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    try
                    {
                        Dispose();
                    } // keep our original exception
                    catch (Exception)
                    {
                    }
                }
            }
        }
Example #26
0
 public override long Length()
 {
     return(@delegate.Length());
 }
        public Lucene3xStoredFieldsReader(Directory d, SegmentInfo si, FieldInfos fn, IOContext context)
        {
            string segment        = Lucene3xSegmentInfoFormat.GetDocStoreSegment(si);
            int    docStoreOffset = Lucene3xSegmentInfoFormat.GetDocStoreOffset(si);
            int    size           = si.DocCount;
            bool   success        = false;

            FieldInfos = fn;
            try
            {
                if (docStoreOffset != -1 && Lucene3xSegmentInfoFormat.GetDocStoreIsCompoundFile(si))
                {
                    d = StoreCFSReader = new CompoundFileDirectory(si.Dir, IndexFileNames.SegmentFileName(segment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), context, false);
                }
                else
                {
                    StoreCFSReader = null;
                }
                FieldsStream = d.OpenInput(IndexFileNames.SegmentFileName(segment, "", FIELDS_EXTENSION), context);
                string indexStreamFN = IndexFileNames.SegmentFileName(segment, "", FIELDS_INDEX_EXTENSION);
                IndexStream = d.OpenInput(indexStreamFN, context);

                Format = IndexStream.ReadInt();

                if (Format < FORMAT_MINIMUM)
                {
                    throw new IndexFormatTooOldException(IndexStream, Format, FORMAT_MINIMUM, FORMAT_CURRENT);
                }
                if (Format > FORMAT_CURRENT)
                {
                    throw new IndexFormatTooNewException(IndexStream, Format, FORMAT_MINIMUM, FORMAT_CURRENT);
                }

                long indexSize = IndexStream.Length() - FORMAT_SIZE;

                if (docStoreOffset != -1)
                {
                    // We read only a slice out of this shared fields file
                    this.DocStoreOffset = docStoreOffset;
                    this.Size           = size;

                    // Verify the file is long enough to hold all of our
                    // docs
                    Debug.Assert(((int)(indexSize / 8)) >= size + this.DocStoreOffset, "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset);
                }
                else
                {
                    this.DocStoreOffset = 0;
                    this.Size           = (int)(indexSize >> 3);
                    // Verify two sources of "maxDoc" agree:
                    if (this.Size != si.DocCount)
                    {
                        throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + this.Size + " but segmentInfo shows " + si.DocCount);
                    }
                }
                NumTotalDocs = (int)(indexSize >> 3);
                success      = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above. In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    try
                    {
                        Dispose();
                    } // keep our original exception
                    catch (Exception t)
                    {
                    }
                }
            }
        }
        /// <summary>
        /// Sole constructor. </summary>
        public Lucene40TermVectorsReader(Directory d, SegmentInfo si, FieldInfos fieldInfos, IOContext context)
        {
            string segment = si.Name;
            int size = si.DocCount;

            bool success = false;

            try
            {
                string idxName = IndexFileNames.SegmentFileName(segment, "", VECTORS_INDEX_EXTENSION);
                Tvx = d.OpenInput(idxName, context);
                int tvxVersion = CodecUtil.CheckHeader(Tvx, CODEC_NAME_INDEX, VERSION_START, VERSION_CURRENT);

                string fn = IndexFileNames.SegmentFileName(segment, "", VECTORS_DOCUMENTS_EXTENSION);
                Tvd = d.OpenInput(fn, context);
                int tvdVersion = CodecUtil.CheckHeader(Tvd, CODEC_NAME_DOCS, VERSION_START, VERSION_CURRENT);
                fn = IndexFileNames.SegmentFileName(segment, "", VECTORS_FIELDS_EXTENSION);
                Tvf = d.OpenInput(fn, context);
                int tvfVersion = CodecUtil.CheckHeader(Tvf, CODEC_NAME_FIELDS, VERSION_START, VERSION_CURRENT);
                Debug.Assert(HEADER_LENGTH_INDEX == Tvx.FilePointer);
                Debug.Assert(HEADER_LENGTH_DOCS == Tvd.FilePointer);
                Debug.Assert(HEADER_LENGTH_FIELDS == Tvf.FilePointer);
                Debug.Assert(tvxVersion == tvdVersion);
                Debug.Assert(tvxVersion == tvfVersion);

                NumTotalDocs = (int)(Tvx.Length() - HEADER_LENGTH_INDEX >> 4);

                this.Size_Renamed = NumTotalDocs;
                Debug.Assert(size == 0 || NumTotalDocs == size);

                this.FieldInfos = fieldInfos;
                success = true;
            }
            finally
            {
                // With lock-less commits, it's entirely possible (and
                // fine) to hit a FileNotFound exception above. In
                // this case, we want to explicitly close any subset
                // of things that were opened so that we don't have to
                // wait for a GC to do so.
                if (!success)
                {
                    try
                    {
                        Dispose();
                    } // ensure we throw our original exception
                    catch (Exception t)
                    {
                    }
                }
            }
        }
        /// <summary>
        /// Sole constructor. </summary>
        public CompressingStoredFieldsReader(Directory d, SegmentInfo si, string segmentSuffix, FieldInfos fn, IOContext context, string formatName, CompressionMode compressionMode)
        {
            this.CompressionMode_Renamed = compressionMode;
            string segment = si.Name;
            bool   success = false;

            FieldInfos = fn;
            NumDocs    = si.DocCount;
            ChecksumIndexInput indexStream = null;

            try
            {
                string indexStreamFN  = IndexFileNames.SegmentFileName(segment, segmentSuffix, Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION);
                string fieldsStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, Lucene40StoredFieldsWriter.FIELDS_EXTENSION);
                // Load the index into memory
                indexStream = d.OpenChecksumInput(indexStreamFN, context);
                string codecNameIdx = formatName + CompressingStoredFieldsWriter.CODEC_SFX_IDX;
                Version_Renamed = CodecUtil.CheckHeader(indexStream, codecNameIdx, CompressingStoredFieldsWriter.VERSION_START, CompressingStoredFieldsWriter.VERSION_CURRENT);
                Debug.Assert(CodecUtil.HeaderLength(codecNameIdx) == indexStream.FilePointer);
                IndexReader = new CompressingStoredFieldsIndexReader(indexStream, si);

                long maxPointer = -1;

                if (Version_Renamed >= CompressingStoredFieldsWriter.VERSION_CHECKSUM)
                {
                    maxPointer = indexStream.ReadVLong();
                    CodecUtil.CheckFooter(indexStream);
                }
                else
                {
                    CodecUtil.CheckEOF(indexStream);
                }
                indexStream.Dispose();
                indexStream = null;

                // Open the data file and read metadata
                FieldsStream = d.OpenInput(fieldsStreamFN, context);
                if (Version_Renamed >= CompressingStoredFieldsWriter.VERSION_CHECKSUM)
                {
                    if (maxPointer + CodecUtil.FooterLength() != FieldsStream.Length())
                    {
                        throw new CorruptIndexException("Invalid fieldsStream maxPointer (file truncated?): maxPointer=" + maxPointer + ", length=" + FieldsStream.Length());
                    }
                }
                else
                {
                    maxPointer = FieldsStream.Length();
                }
                this.MaxPointer = maxPointer;
                string codecNameDat  = formatName + CompressingStoredFieldsWriter.CODEC_SFX_DAT;
                int    fieldsVersion = CodecUtil.CheckHeader(FieldsStream, codecNameDat, CompressingStoredFieldsWriter.VERSION_START, CompressingStoredFieldsWriter.VERSION_CURRENT);
                if (Version_Renamed != fieldsVersion)
                {
                    throw new CorruptIndexException("Version mismatch between stored fields index and data: " + Version_Renamed + " != " + fieldsVersion);
                }
                Debug.Assert(CodecUtil.HeaderLength(codecNameDat) == FieldsStream.FilePointer);

                if (Version_Renamed >= CompressingStoredFieldsWriter.VERSION_BIG_CHUNKS)
                {
                    ChunkSize_Renamed = FieldsStream.ReadVInt();
                }
                else
                {
                    ChunkSize_Renamed = -1;
                }
                PackedIntsVersion = FieldsStream.ReadVInt();
                Decompressor      = compressionMode.NewDecompressor();
                this.Bytes        = new BytesRef();

                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.CloseWhileHandlingException(this, indexStream);
                }
            }
        }