private IDictionary <string, long?> ReadFields(IndexInput @in)
        {
            ChecksumIndexInput input = new BufferedChecksumIndexInput(@in);
            var scratch = new BytesRef(10);

            // LUCENENET specific: Use StringComparer.Ordinal to get the same ordering as Java
            var fields = new JCG.SortedDictionary <string, long?>(StringComparer.Ordinal);

            while (true)
            {
                SimpleTextUtil.ReadLine(input, scratch);
                if (scratch.Equals(SimpleTextFieldsWriter.END))
                {
                    SimpleTextUtil.CheckFooter(input);
                    return(fields);
                }

                if (StringHelper.StartsWith(scratch, SimpleTextFieldsWriter.FIELD))
                {
                    var fieldName = Encoding.UTF8.GetString(scratch.Bytes, scratch.Offset + SimpleTextFieldsWriter.FIELD.Length,
                                                            scratch.Length - SimpleTextFieldsWriter.FIELD.Length);
                    fields[fieldName] = input.GetFilePointer();
                }
            }
        }
示例#2
0
        /// <summary>
        /// Clones the provided input, reads all bytes from the file, and calls <see cref="CheckFooter(ChecksumIndexInput)"/>
        /// <para/>
        /// Note that this method may be slow, as it must process the entire file.
        /// If you just need to extract the checksum value, call <see cref="RetrieveChecksum(IndexInput)"/>.
        /// </summary>
        public static long ChecksumEntireFile(IndexInput input)
        {
            IndexInput clone = (IndexInput)input.Clone();

            clone.Seek(0);
            ChecksumIndexInput @in = new BufferedChecksumIndexInput(clone);

            Debug.Assert(@in.GetFilePointer() == 0);
            @in.Seek(@in.Length - FooterLength());
            return(CheckFooter(@in));
        }
示例#3
0
        /// <summary>
        /// Clones the provided input, reads all bytes from the file, and calls <see cref="CheckFooter(ChecksumIndexInput)"/>
        /// <para/>
        /// Note that this method may be slow, as it must process the entire file.
        /// If you just need to extract the checksum value, call <see cref="RetrieveChecksum(IndexInput)"/>.
        /// </summary>
        public static long ChecksumEntireFile(IndexInput input)
        {
            IndexInput clone = (IndexInput)input.Clone();

            clone.Seek(0);
            ChecksumIndexInput @in = new BufferedChecksumIndexInput(clone);

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(@in.Position == 0);                           // LUCENENET specific: Renamed from getFilePointer() to match FileStream
            }
            @in.Seek(@in.Length - FooterLength());
            return(CheckFooter(@in));
        }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: @Override public void checkIntegrity() throws java.io.IOException
        public override void checkIntegrity()
        {
            BytesRef   scratch = new BytesRef();
            IndexInput clone   = data.clone();

            clone.seek(0);
            ChecksumIndexInput input = new BufferedChecksumIndexInput(clone);

            while (true)
            {
                SimpleTextUtil.ReadLine(input, scratch);
                if (scratch.Equals(END))
                {
                    SimpleTextUtil.CheckFooter(input);
                    break;
                }
            }
        }
示例#5
0
        /// <remarks>
        /// we don't actually write a .fdx-like index, instead we read the
        /// stored fields file in entirety up-front and save the offsets
        /// so we can seek to the documents later.
        /// </remarks>
        private void ReadIndex(int size)
        {
            ChecksumIndexInput input = new BufferedChecksumIndexInput(_input);

            _offsets = new long[size];
            var upto = 0;

            while (!_scratch.Equals(SimpleTextStoredFieldsWriter.END))
            {
                SimpleTextUtil.ReadLine(input, _scratch);
                if (StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.DOC))
                {
                    _offsets[upto] = input.GetFilePointer();
                    upto++;
                }
            }
            SimpleTextUtil.CheckFooter(input);
            Debug.Assert(upto == _offsets.Length);
        }
示例#6
0
        // we don't actually write a .fdx-like index, instead we read the
        // stored fields file in entirety up-front and save the offsets
        // so we can seek to the documents later.
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: private void readIndex(int size) throws java.io.IOException
        private void readIndex(int size)
        {
            ChecksumIndexInput input = new BufferedChecksumIndexInput(@in);

            offsets = new long[size];
            int upto = 0;

            while (!scratch.Equals(END))
            {
                SimpleTextUtil.ReadLine(input, scratch);
                if (StringHelper.StartsWith(scratch, DOC))
                {
                    offsets[upto] = input.FilePointer;
                    upto++;
                }
            }
            SimpleTextUtil.CheckFooter(input);
            Debug.Assert(upto == offsets.Length);
        }
示例#7
0
        // we don't actually write a .tvx-like index, instead we read the
        // vectors file in entirety up-front and save the offsets
        // so we can seek to the data later.
        private void ReadIndex(int maxDoc)
        {
            ChecksumIndexInput input = new BufferedChecksumIndexInput(_input);

            _offsets = new long[maxDoc];
            int upto = 0;

            while (!_scratch.Equals(SimpleTextTermVectorsWriter.END))
            {
                SimpleTextUtil.ReadLine(input, _scratch);
                if (StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.DOC))
                {
                    _offsets[upto] = input.FilePointer;
                    upto++;
                }
            }
            SimpleTextUtil.CheckFooter(input);
            Debug.Assert(upto == _offsets.Length);
        }
        public override void CheckIntegrity()
        {
            var iScratch = new BytesRef();
            var clone    = (IndexInput)data.Clone();

            clone.Seek(0);
            ChecksumIndexInput input = new BufferedChecksumIndexInput(clone);

            while (true)
            {
                SimpleTextUtil.ReadLine(input, iScratch);
                if (!iScratch.Equals(SimpleTextDocValuesWriter.END))
                {
                    continue;
                }

                SimpleTextUtil.CheckFooter(input);
                break;
            }
        }
示例#9
0
        // we don't actually write a .tvx-like index, instead we read the
        // vectors file in entirety up-front and save the offsets
        // so we can seek to the data later.
        private void ReadIndex(int maxDoc)
        {
            ChecksumIndexInput input = new BufferedChecksumIndexInput(_input);

            _offsets = new long[maxDoc];
            int upto = 0;

            while (!_scratch.Equals(SimpleTextTermVectorsWriter.END))
            {
                SimpleTextUtil.ReadLine(input, _scratch);
                if (StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.DOC))
                {
                    _offsets[upto] = input.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream
                    upto++;
                }
            }
            SimpleTextUtil.CheckFooter(input);
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(upto == _offsets.Length);
            }
        }
示例#10
0
        private SortedDictionary <string, long?> ReadFields(IndexInput @in)
        {
            ChecksumIndexInput input   = new BufferedChecksumIndexInput(@in);
            BytesRef           scratch = new BytesRef(10);
            SortedDictionary <string, long?> fields = new SortedDictionary <string, long?>();

            while (true)
            {
                SimpleTextUtil.ReadLine(input, scratch);
                if (scratch.Equals(END))
                {
                    SimpleTextUtil.CheckFooter(input);
                    return(fields);
                }
                else if (StringHelper.StartsWith(scratch, FIELD))
                {
                    string fieldName = new string(scratch.Bytes, scratch.Offset + FIELD.length,
                                                  scratch.Length - FIELD.length, StandardCharsets.UTF_8);
                    fields[fieldName] = input.FilePointer;
                }
            }
        }
示例#11
0
        public override int Merge(MergeState mergeState)
        {
            int docCount = 0;
            int idx      = 0;

            foreach (AtomicReader reader in mergeState.Readers)
            {
                SegmentReader matchingSegmentReader = mergeState.MatchingSegmentReaders[idx++];
                CompressingTermVectorsReader matchingVectorsReader = null;
                if (matchingSegmentReader != null)
                {
                    TermVectorsReader vectorsReader = matchingSegmentReader.TermVectorsReader;
                    // we can only bulk-copy if the matching reader is also a CompressingTermVectorsReader
                    if (vectorsReader != null && vectorsReader is CompressingTermVectorsReader)
                    {
                        matchingVectorsReader = (CompressingTermVectorsReader)vectorsReader;
                    }
                }

                int  maxDoc   = reader.MaxDoc;
                Bits liveDocs = reader.LiveDocs;

                if (matchingVectorsReader == null || matchingVectorsReader.Version != VERSION_CURRENT || matchingVectorsReader.CompressionMode != CompressionMode || matchingVectorsReader.ChunkSize != ChunkSize || matchingVectorsReader.PackedIntsVersion != PackedInts.VERSION_CURRENT)
                {
                    // naive merge...
                    for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = NextLiveDoc(i + 1, liveDocs, maxDoc))
                    {
                        Fields vectors = reader.GetTermVectors(i);
                        AddAllDocVectors(vectors, mergeState);
                        ++docCount;
                        mergeState.checkAbort.Work(300);
                    }
                }
                else
                {
                    CompressingStoredFieldsIndexReader index = matchingVectorsReader.Index;
                    IndexInput vectorsStreamOrig             = matchingVectorsReader.VectorsStream;
                    vectorsStreamOrig.Seek(0);
                    ChecksumIndexInput vectorsStream = new BufferedChecksumIndexInput((IndexInput)vectorsStreamOrig.Clone());

                    for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc;)
                    {
                        // We make sure to move the checksum input in any case, otherwise the final
                        // integrity check might need to read the whole file a second time
                        long startPointer = index.GetStartPointer(i);
                        if (startPointer > vectorsStream.FilePointer)
                        {
                            vectorsStream.Seek(startPointer);
                        }
                        if ((PendingDocs.Count == 0) && (i == 0 || index.GetStartPointer(i - 1) < startPointer)) // start of a chunk
                        {
                            int docBase   = vectorsStream.ReadVInt();
                            int chunkDocs = vectorsStream.ReadVInt();
                            Debug.Assert(docBase + chunkDocs <= matchingSegmentReader.MaxDoc);
                            if (docBase + chunkDocs < matchingSegmentReader.MaxDoc && NextDeletedDoc(docBase, liveDocs, docBase + chunkDocs) == docBase + chunkDocs)
                            {
                                long chunkEnd    = index.GetStartPointer(docBase + chunkDocs);
                                long chunkLength = chunkEnd - vectorsStream.FilePointer;
                                IndexWriter.WriteIndex(chunkDocs, this.VectorsStream.FilePointer);
                                this.VectorsStream.WriteVInt(docCount);
                                this.VectorsStream.WriteVInt(chunkDocs);
                                this.VectorsStream.CopyBytes(vectorsStream, chunkLength);
                                docCount     += chunkDocs;
                                this.NumDocs += chunkDocs;
                                mergeState.checkAbort.Work(300 * chunkDocs);
                                i = NextLiveDoc(docBase + chunkDocs, liveDocs, maxDoc);
                            }
                            else
                            {
                                for (; i < docBase + chunkDocs; i = NextLiveDoc(i + 1, liveDocs, maxDoc))
                                {
                                    Fields vectors = reader.GetTermVectors(i);
                                    AddAllDocVectors(vectors, mergeState);
                                    ++docCount;
                                    mergeState.checkAbort.Work(300);
                                }
                            }
                        }
                        else
                        {
                            Fields vectors = reader.GetTermVectors(i);
                            AddAllDocVectors(vectors, mergeState);
                            ++docCount;
                            mergeState.checkAbort.Work(300);
                            i = NextLiveDoc(i + 1, liveDocs, maxDoc);
                        }
                    }

                    vectorsStream.Seek(vectorsStream.Length() - CodecUtil.FooterLength());
                    CodecUtil.CheckFooter(vectorsStream);
                }
            }
            Finish(mergeState.FieldInfos, docCount);
            return(docCount);
        }
 // we don't actually write a .tvx-like index, instead we read the 
 // vectors file in entirety up-front and save the offsets 
 // so we can seek to the data later.
 private void ReadIndex(int maxDoc)
 {
     ChecksumIndexInput input = new BufferedChecksumIndexInput(_input);
     _offsets = new long[maxDoc];
     int upto = 0;
     while (!_scratch.Equals(SimpleTextTermVectorsWriter.END))
     {
         SimpleTextUtil.ReadLine(input, _scratch);
         if (StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.DOC))
         {
             _offsets[upto] = input.FilePointer;
             upto++;
         }
     }
     SimpleTextUtil.CheckFooter(input);
     Debug.Assert(upto == _offsets.Length);
 }
        public override int Merge(MergeState mergeState)
        {
            int docCount = 0;
            int idx = 0;

            foreach (AtomicReader reader in mergeState.Readers)
            {
                SegmentReader matchingSegmentReader = mergeState.MatchingSegmentReaders[idx++];
                CompressingTermVectorsReader matchingVectorsReader = null;
                if (matchingSegmentReader != null)
                {
                    TermVectorsReader vectorsReader = matchingSegmentReader.TermVectorsReader;
                    // we can only bulk-copy if the matching reader is also a CompressingTermVectorsReader
                    if (vectorsReader != null && vectorsReader is CompressingTermVectorsReader)
                    {
                        matchingVectorsReader = (CompressingTermVectorsReader)vectorsReader;
                    }
                }

                int maxDoc = reader.MaxDoc;
                Bits liveDocs = reader.LiveDocs;

                if (matchingVectorsReader == null || matchingVectorsReader.Version != VERSION_CURRENT || matchingVectorsReader.CompressionMode != CompressionMode || matchingVectorsReader.ChunkSize != ChunkSize || matchingVectorsReader.PackedIntsVersion != PackedInts.VERSION_CURRENT)
                {
                    // naive merge...
                    for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = NextLiveDoc(i + 1, liveDocs, maxDoc))
                    {
                        Fields vectors = reader.GetTermVectors(i);
                        AddAllDocVectors(vectors, mergeState);
                        ++docCount;
                        mergeState.checkAbort.Work(300);
                    }
                }
                else
                {
                    CompressingStoredFieldsIndexReader index = matchingVectorsReader.Index;
                    IndexInput vectorsStreamOrig = matchingVectorsReader.VectorsStream;
                    vectorsStreamOrig.Seek(0);
                    ChecksumIndexInput vectorsStream = new BufferedChecksumIndexInput((IndexInput)vectorsStreamOrig.Clone());

                    for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; )
                    {
                        // We make sure to move the checksum input in any case, otherwise the final
                        // integrity check might need to read the whole file a second time
                        long startPointer = index.GetStartPointer(i);
                        if (startPointer > vectorsStream.FilePointer)
                        {
                            vectorsStream.Seek(startPointer);
                        }
                        if ((PendingDocs.Count == 0) && (i == 0 || index.GetStartPointer(i - 1) < startPointer)) // start of a chunk
                        {
                            int docBase = vectorsStream.ReadVInt();
                            int chunkDocs = vectorsStream.ReadVInt();
                            Debug.Assert(docBase + chunkDocs <= matchingSegmentReader.MaxDoc);
                            if (docBase + chunkDocs < matchingSegmentReader.MaxDoc && NextDeletedDoc(docBase, liveDocs, docBase + chunkDocs) == docBase + chunkDocs)
                            {
                                long chunkEnd = index.GetStartPointer(docBase + chunkDocs);
                                long chunkLength = chunkEnd - vectorsStream.FilePointer;
                                IndexWriter.WriteIndex(chunkDocs, this.VectorsStream.FilePointer);
                                this.VectorsStream.WriteVInt(docCount);
                                this.VectorsStream.WriteVInt(chunkDocs);
                                this.VectorsStream.CopyBytes(vectorsStream, chunkLength);
                                docCount += chunkDocs;
                                this.NumDocs += chunkDocs;
                                mergeState.checkAbort.Work(300 * chunkDocs);
                                i = NextLiveDoc(docBase + chunkDocs, liveDocs, maxDoc);
                            }
                            else
                            {
                                for (; i < docBase + chunkDocs; i = NextLiveDoc(i + 1, liveDocs, maxDoc))
                                {
                                    Fields vectors = reader.GetTermVectors(i);
                                    AddAllDocVectors(vectors, mergeState);
                                    ++docCount;
                                    mergeState.checkAbort.Work(300);
                                }
                            }
                        }
                        else
                        {
                            Fields vectors = reader.GetTermVectors(i);
                            AddAllDocVectors(vectors, mergeState);
                            ++docCount;
                            mergeState.checkAbort.Work(300);
                            i = NextLiveDoc(i + 1, liveDocs, maxDoc);
                        }
                    }

                    vectorsStream.Seek(vectorsStream.Length() - CodecUtil.FooterLength());
                    CodecUtil.CheckFooter(vectorsStream);
                }
            }
            Finish(mergeState.FieldInfos, docCount);
            return docCount;
        }