Пример #1
0
 protected override MonotonicBlockPackedReader GetAddressInstance(IndexInput data, FieldInfo field,
     BinaryEntry bytes)
 {
     data.Seek(bytes.AddressesOffset);
     return new MonotonicBlockPackedReader((IndexInput)data.Clone(), bytes.PackedIntsVersion, bytes.BlockSize, bytes.Count,
         true);
 }
Пример #2
0
 protected override MonotonicBlockPackedReader GetOrdIndexInstance(IndexInput data, FieldInfo field,
     NumericEntry entry)
 {
     data.Seek(entry.Offset);
     return new MonotonicBlockPackedReader((IndexInput)data.Clone(), entry.PackedIntsVersion, entry.BlockSize, entry.Count,
         true);
 }
        public override void VisitDocument(int n, StoredFieldVisitor visitor)
        {
            _input.Seek(_offsets[n]);
            ReadLine();
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.NUM));
            }
            var numFields = ParseInt32At(SimpleTextStoredFieldsWriter.NUM.Length);

            for (var i = 0; i < numFields; i++)
            {
                ReadLine();
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.FIELD));
                }
                int       fieldNumber = ParseInt32At(SimpleTextStoredFieldsWriter.FIELD.Length);
                FieldInfo fieldInfo   = _fieldInfos.FieldInfo(fieldNumber);
                ReadLine();
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.NAME));
                }
                ReadLine();
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.TYPE));
                }

                BytesRef type;
                if (EqualsAt(SimpleTextStoredFieldsWriter.TYPE_STRING, _scratch, SimpleTextStoredFieldsWriter.TYPE.Length))
                {
                    type = SimpleTextStoredFieldsWriter.TYPE_STRING;
                }
                else if (EqualsAt(SimpleTextStoredFieldsWriter.TYPE_BINARY, _scratch, SimpleTextStoredFieldsWriter.TYPE.Length))
                {
                    type = SimpleTextStoredFieldsWriter.TYPE_BINARY;
                }
                else if (EqualsAt(SimpleTextStoredFieldsWriter.TYPE_INT, _scratch, SimpleTextStoredFieldsWriter.TYPE.Length))
                {
                    type = SimpleTextStoredFieldsWriter.TYPE_INT;
                }
                else if (EqualsAt(SimpleTextStoredFieldsWriter.TYPE_LONG, _scratch, SimpleTextStoredFieldsWriter.TYPE.Length))
                {
                    type = SimpleTextStoredFieldsWriter.TYPE_LONG;
                }
                else if (EqualsAt(SimpleTextStoredFieldsWriter.TYPE_FLOAT, _scratch, SimpleTextStoredFieldsWriter.TYPE.Length))
                {
                    type = SimpleTextStoredFieldsWriter.TYPE_FLOAT;
                }
                else if (EqualsAt(SimpleTextStoredFieldsWriter.TYPE_DOUBLE, _scratch, SimpleTextStoredFieldsWriter.TYPE.Length))
                {
                    type = SimpleTextStoredFieldsWriter.TYPE_DOUBLE;
                }
                else
                {
                    throw new Exception("unknown field type");
                }

                switch (visitor.NeedsField(fieldInfo))
                {
                case StoredFieldVisitor.Status.YES:
                    ReadField(type, fieldInfo, visitor);
                    break;

                case StoredFieldVisitor.Status.NO:
                    ReadLine();
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.VALUE));
                    }
                    break;

                case StoredFieldVisitor.Status.STOP:
                    return;
                }
            }
        }
Пример #4
0
            public override int NextPosition()
            {
                if (lazyProxPointer != -1)
                {
                    proxIn.Seek(lazyProxPointer);
                    lazyProxPointer = -1;
                }

                if (payloadPending && payloadLength > 0)
                {
                    // payload of last position was never retrieved -- skip it
                    proxIn.Seek(proxIn.Position + payloadLength); // LUCENENET specific: Renamed from getFilePointer() to match FileStream
                    payloadPending = false;
                }

                // scan over any docs that were iterated without their positions
                while (posPendingCount > freq)
                {
                    int code = proxIn.ReadVInt32();

                    if (storePayloads)
                    {
                        if ((code & 1) != 0)
                        {
                            // new payload length
                            payloadLength = proxIn.ReadVInt32();
                            if (Debugging.AssertsEnabled)
                            {
                                Debugging.Assert(payloadLength >= 0);
                            }
                        }
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(payloadLength != -1);
                        }
                    }

                    if (storeOffsets)
                    {
                        if ((proxIn.ReadVInt32() & 1) != 0)
                        {
                            // new offset length
                            offsetLength = proxIn.ReadVInt32();
                        }
                    }

                    if (storePayloads)
                    {
                        proxIn.Seek(proxIn.Position + payloadLength); // LUCENENET specific: Renamed from getFilePointer() to match FileStream
                    }

                    posPendingCount--;
                    position       = 0;
                    startOffset    = 0;
                    payloadPending = false;
                    //System.out.println("StandardR.D&PE skipPos");
                }

                // read next position
                if (payloadPending && payloadLength > 0)
                {
                    // payload wasn't retrieved for last position
                    proxIn.Seek(proxIn.Position + payloadLength); // LUCENENET specific: Renamed from getFilePointer() to match FileStream
                }

                int code_ = proxIn.ReadVInt32();

                if (storePayloads)
                {
                    if ((code_ & 1) != 0)
                    {
                        // new payload length
                        payloadLength = proxIn.ReadVInt32();
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(payloadLength >= 0);
                        }
                    }
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(payloadLength != -1);
                    }

                    payloadPending = true;
                    code_          = code_.TripleShift(1);
                }
                position += code_;

                if (storeOffsets)
                {
                    int offsetCode = proxIn.ReadVInt32();
                    if ((offsetCode & 1) != 0)
                    {
                        // new offset length
                        offsetLength = proxIn.ReadVInt32();
                    }
                    startOffset += offsetCode.TripleShift(1);
                }

                posPendingCount--;

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(posPendingCount >= 0, "NextPosition() was called too many times (more than Freq times) posPendingCount={0}", posPendingCount);
                }

                //System.out.println("StandardR.D&PE nextPos   return pos=" + position);
                return(position);
            }
Пример #5
0
 protected override void ReadInternal(byte[] b, int offset, int length)
 {
     SimOutage();
     @delegate.Seek(GetFilePointer());
     @delegate.ReadBytes(b, offset, length);
 }
Пример #6
0
            public override int NextDoc()
            {
                bool first = true;

                _in.Seek(_nextDocStart);
                long posStart = 0;

                while (true)
                {
                    long lineStart = _in.GetFilePointer();
                    SimpleTextUtil.ReadLine(_in, _scratch);
                    //System.out.println("NEXT DOC: " + scratch.utf8ToString());
                    if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.DOC))
                    {
                        if (!first && (_liveDocs == null || _liveDocs.Get(_docId)))
                        {
                            _nextDocStart = lineStart;
                            _in.Seek(posStart);
                            return(_docId);
                        }
                        UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.DOC.Length, _scratch.Length - SimpleTextFieldsWriter.DOC.Length,
                                                _scratchUtf16);
                        _docId = ArrayUtil.ParseInt32(_scratchUtf16.Chars, 0, _scratchUtf16.Length);
                        _tf    = 0;
                        first  = false;
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FREQ))
                    {
                        UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.FREQ.Length,
                                                _scratch.Length - SimpleTextFieldsWriter.FREQ.Length, _scratchUtf16);
                        _tf      = ArrayUtil.ParseInt32(_scratchUtf16.Chars, 0, _scratchUtf16.Length);
                        posStart = _in.GetFilePointer();
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.POS))
                    {
                        // skip
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.START_OFFSET))
                    {
                        // skip
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END_OFFSET))
                    {
                        // skip
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.PAYLOAD))
                    {
                        // skip
                    }
                    else
                    {
                        Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.TERM) || StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FIELD) ||
                                     StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END));

                        if (!first && (_liveDocs == null || _liveDocs.Get(_docId)))
                        {
                            _nextDocStart = lineStart;
                            _in.Seek(posStart);
                            return(_docId);
                        }
                        return(_docId = NO_MORE_DOCS);
                    }
                }
            }
Пример #7
0
 internal virtual void SeekTvx(int docNum)
 {
     tvx.Seek((docNum + docStoreOffset) * 16L + FORMAT_SIZE);
 }
Пример #8
0
 public override void Seek(long pos)
 {
     ii.Seek(pos);
 }
Пример #9
0
                public CoreFieldIndex(FieldIndexData outerInstance, long indexStart, long termsStart, long packedIndexStart, long packedOffsetsStart,
                                      int numIndexTerms)
                {
                    this.termsStart = termsStart;
                    termBytesStart  = outerInstance.outerInstance.termBytes.Pointer;

                    IndexInput clone = (IndexInput)outerInstance.outerInstance.input.Clone();

                    clone.Seek(indexStart);

                    // -1 is passed to mean "don't load term index", but
                    // if we are then later loaded it's overwritten with
                    // a real value
                    Debug.Assert(outerInstance.outerInstance.indexDivisor > 0);

                    this.numIndexTerms = 1 + (numIndexTerms - 1) / outerInstance.outerInstance.indexDivisor;

                    Debug.Assert(this.numIndexTerms > 0, "numIndexTerms=" + numIndexTerms + " indexDivisor=" + outerInstance.outerInstance.indexDivisor);

                    if (outerInstance.outerInstance.indexDivisor == 1)
                    {
                        // Default (load all index terms) is fast -- slurp in the images from disk:

                        try
                        {
                            long numTermBytes = packedIndexStart - indexStart;
                            outerInstance.outerInstance.termBytes.Copy(clone, numTermBytes);

                            // records offsets into main terms dict file
                            termsDictOffsets = PackedInt32s.GetReader(clone);
                            Debug.Assert(termsDictOffsets.Count == numIndexTerms);

                            // records offsets into byte[] term data
                            termOffsets = PackedInt32s.GetReader(clone);
                            Debug.Assert(termOffsets.Count == 1 + numIndexTerms);
                        }
                        finally
                        {
                            clone.Dispose();
                        }
                    }
                    else
                    {
                        // Get packed iterators
                        IndexInput clone1 = (IndexInput)outerInstance.outerInstance.input.Clone();
                        IndexInput clone2 = (IndexInput)outerInstance.outerInstance.input.Clone();

                        try
                        {
                            // Subsample the index terms
                            clone1.Seek(packedIndexStart);
                            PackedInt32s.IReaderIterator termsDictOffsetsIter = PackedInt32s.GetReaderIterator(clone1, PackedInt32s.DEFAULT_BUFFER_SIZE);

                            clone2.Seek(packedOffsetsStart);
                            PackedInt32s.IReaderIterator termOffsetsIter = PackedInt32s.GetReaderIterator(clone2, PackedInt32s.DEFAULT_BUFFER_SIZE);

                            // TODO: often we can get by w/ fewer bits per
                            // value, below.. .but this'd be more complex:
                            // we'd have to try @ fewer bits and then grow
                            // if we overflowed it.

                            PackedInt32s.Mutable termsDictOffsetsM = PackedInt32s.GetMutable(this.numIndexTerms, termsDictOffsetsIter.BitsPerValue, PackedInt32s.DEFAULT);
                            PackedInt32s.Mutable termOffsetsM      = PackedInt32s.GetMutable(this.numIndexTerms + 1, termOffsetsIter.BitsPerValue, PackedInt32s.DEFAULT);

                            termsDictOffsets = termsDictOffsetsM;
                            termOffsets      = termOffsetsM;

                            int upto = 0;

                            long termOffsetUpto = 0;

                            while (upto < this.numIndexTerms)
                            {
                                // main file offset copies straight over
                                termsDictOffsetsM.Set(upto, termsDictOffsetsIter.Next());

                                termOffsetsM.Set(upto, termOffsetUpto);

                                long termOffset     = termOffsetsIter.Next();
                                long nextTermOffset = termOffsetsIter.Next();
                                int  numTermBytes   = (int)(nextTermOffset - termOffset);

                                clone.Seek(indexStart + termOffset);
                                Debug.Assert(indexStart + termOffset < clone.Length, "indexStart=" + indexStart + " termOffset=" + termOffset + " len=" + clone.Length);
                                Debug.Assert(indexStart + termOffset + numTermBytes < clone.Length);

                                outerInstance.outerInstance.termBytes.Copy(clone, numTermBytes);
                                termOffsetUpto += numTermBytes;

                                upto++;
                                if (upto == this.numIndexTerms)
                                {
                                    break;
                                }

                                // skip terms:
                                termsDictOffsetsIter.Next();
                                for (int i = 0; i < outerInstance.outerInstance.indexDivisor - 2; i++)
                                {
                                    termOffsetsIter.Next();
                                    termsDictOffsetsIter.Next();
                                }
                            }
                            termOffsetsM.Set(upto, termOffsetUpto);
                        }
                        finally
                        {
                            clone1.Dispose();
                            clone2.Dispose();
                            clone.Dispose();
                        }
                    }
                }
Пример #10
0
            public override int NextPosition()
            {
                if (LazyProxPointer != -1)
                {
                    ProxIn.Seek(LazyProxPointer);
                    LazyProxPointer = -1;
                }

                if (PayloadPending && PayloadLength > 0)
                {
                    // payload of last position was never retrieved -- skip it
                    ProxIn.Seek(ProxIn.FilePointer + PayloadLength);
                    PayloadPending = false;
                }

                // scan over any docs that were iterated without their positions
                while (PosPendingCount > Freq_Renamed)
                {
                    int code = ProxIn.ReadVInt();

                    if (StorePayloads)
                    {
                        if ((code & 1) != 0)
                        {
                            // new payload length
                            PayloadLength = ProxIn.ReadVInt();
                            Debug.Assert(PayloadLength >= 0);
                        }
                        Debug.Assert(PayloadLength != -1);
                    }

                    if (StoreOffsets)
                    {
                        if ((ProxIn.ReadVInt() & 1) != 0)
                        {
                            // new offset length
                            OffsetLength = ProxIn.ReadVInt();
                        }
                    }

                    if (StorePayloads)
                    {
                        ProxIn.Seek(ProxIn.FilePointer + PayloadLength);
                    }

                    PosPendingCount--;
                    Position            = 0;
                    StartOffset_Renamed = 0;
                    PayloadPending      = false;
                    //System.out.println("StandardR.D&PE skipPos");
                }

                // read next position
                if (PayloadPending && PayloadLength > 0)
                {
                    // payload wasn't retrieved for last position
                    ProxIn.Seek(ProxIn.FilePointer + PayloadLength);
                }

                int code_ = ProxIn.ReadVInt();

                if (StorePayloads)
                {
                    if ((code_ & 1) != 0)
                    {
                        // new payload length
                        PayloadLength = ProxIn.ReadVInt();
                        Debug.Assert(PayloadLength >= 0);
                    }
                    Debug.Assert(PayloadLength != -1);

                    PayloadPending = true;
                    code_          = (int)((uint)code_ >> 1);
                }
                Position += code_;

                if (StoreOffsets)
                {
                    int offsetCode = ProxIn.ReadVInt();
                    if ((offsetCode & 1) != 0)
                    {
                        // new offset length
                        OffsetLength = ProxIn.ReadVInt();
                    }
                    StartOffset_Renamed += (int)((uint)offsetCode >> 1);
                }

                PosPendingCount--;

                Debug.Assert(PosPendingCount >= 0, "nextPosition() was called too many times (more than freq() times) posPendingCount=" + PosPendingCount);

                //System.out.println("StandardR.D&PE nextPos   return pos=" + position);
                return(Position);
            }
Пример #11
0
 public override void Seek(long pos)
 {
     _cacheDirIndexInput.Seek(pos);
 }
Пример #12
0
            internal virtual void LoadTerms()
            {
                PositiveIntOutputs posIntOutputs = PositiveIntOutputs.Singleton;
                Builder <PairOutputs.Pair <long?, PairOutputs.Pair <long?, long?> > > b;
                PairOutputs <long?, long?> outputsInner = new PairOutputs <long?, long?>(posIntOutputs, posIntOutputs);
                PairOutputs <long?, PairOutputs.Pair <long?, long?> > outputs =
                    new PairOutputs <long?, PairOutputs.Pair <long?, long?> >(posIntOutputs, outputsInner);

                b = new Builder <>(FST.INPUT_TYPE.BYTE1, outputs);
                IndexInput @in = (IndexInput)outerInstance._input.Clone();

                @in.Seek(termsStart);

                BytesRef    lastTerm      = new BytesRef(10);
                long        lastDocsStart = -1;
                int         docFreq       = 0;
                long        totalTermFreq = 0;
                FixedBitSet visitedDocs   = new FixedBitSet(maxDoc);

                IntsRef scratchIntsRef = new IntsRef();

                while (true)
                {
                    SimpleTextUtil.ReadLine(@in, scratch);
                    if (scratch.Equals(END) || StringHelper.StartsWith(scratch, FIELD))
                    {
                        if (lastDocsStart != -1)
                        {
                            b.Add(Util.ToIntsRef(lastTerm, scratchIntsRef),
                                  outputs.NewPair(lastDocsStart, outputsInner.NewPair((long)docFreq, totalTermFreq)));
                            sumTotalTermFreq += totalTermFreq;
                        }
                        break;
                    }
                    else if (StringHelper.StartsWith(scratch, DOC))
                    {
                        docFreq++;
                        sumDocFreq++;
                        UnicodeUtil.UTF8toUTF16(scratch.Bytes, scratch.Offset + DOC.length, scratch.Length - DOC.length,
                                                scratchUTF16);
                        int docID = ArrayUtil.ParseInt(scratchUTF16.Chars, 0, scratchUTF16.length);
                        visitedDocs.Set(docID);
                    }
                    else if (StringHelper.StartsWith(scratch, FREQ))
                    {
                        UnicodeUtil.UTF8toUTF16(scratch.Bytes, scratch.Offset + FREQ.length,
                                                scratch.Length - FREQ.length, scratchUTF16);
                        totalTermFreq += ArrayUtil.ParseInt(scratchUTF16.Chars, 0, scratchUTF16.length);
                    }
                    else if (StringHelper.StartsWith(scratch, TERM))
                    {
                        if (lastDocsStart != -1)
                        {
                            b.Add(Util.ToIntsRef(lastTerm, scratchIntsRef),
                                  outputs.NewPair(lastDocsStart, outputsInner.NewPair((long)docFreq, totalTermFreq)));
                        }
                        lastDocsStart = @in.FilePointer;
                        int len = scratch.Length - TERM.length;
                        if (len > lastTerm.Length)
                        {
                            lastTerm.Grow(len);
                        }
                        Array.Copy(scratch.Bytes, TERM.length, lastTerm.Bytes, 0, len);
                        lastTerm.Length   = len;
                        docFreq           = 0;
                        sumTotalTermFreq += totalTermFreq;
                        totalTermFreq     = 0;
                        termCount++;
                    }
                }
                docCount = visitedDocs.Cardinality();
                fst      = b.Finish();
            }
Пример #13
0
            public override int NextDoc()
            {
                bool first = true;

                @in.Seek(nextDocStart);
                long posStart = 0;

                while (true)
                {
                    long lineStart = @in.FilePointer;
                    SimpleTextUtil.ReadLine(@in, scratch);
                    //System.out.println("NEXT DOC: " + scratch.utf8ToString());
                    if (StringHelper.StartsWith(scratch, DOC))
                    {
                        if (!first && (liveDocs == null || liveDocs.Get(docID_Renamed)))
                        {
                            nextDocStart = lineStart;
                            @in.Seek(posStart);
                            return(docID_Renamed);
                        }
                        UnicodeUtil.UTF8toUTF16(scratch.Bytes, scratch.Offset + DOC.length, scratch.Length - DOC.length,
                                                scratchUTF16);
                        docID_Renamed = ArrayUtil.ParseInt(scratchUTF16.Chars, 0, scratchUTF16.length);
                        tf            = 0;
                        first         = false;
                    }
                    else if (StringHelper.StartsWith(scratch, FREQ))
                    {
                        UnicodeUtil.UTF8toUTF16(scratch.Bytes, scratch.Offset + FREQ.length,
                                                scratch.Length - FREQ.length, scratchUTF16);
                        tf       = ArrayUtil.ParseInt(scratchUTF16.Chars, 0, scratchUTF16.length);
                        posStart = @in.FilePointer;
                    }
                    else if (StringHelper.StartsWith(scratch, POS))
                    {
                        // skip
                    }
                    else if (StringHelper.StartsWith(scratch, START_OFFSET))
                    {
                        // skip
                    }
                    else if (StringHelper.StartsWith(scratch, END_OFFSET))
                    {
                        // skip
                    }
                    else if (StringHelper.StartsWith(scratch, PAYLOAD))
                    {
                        // skip
                    }
                    else
                    {
                        Debug.Assert(StringHelper.StartsWith(scratch, TERM) || StringHelper.StartsWith(scratch, FIELD) ||
                                     StringHelper.StartsWith(scratch, END));

                        if (!first && (liveDocs == null || liveDocs.Get(docID_Renamed)))
                        {
                            nextDocStart = lineStart;
                            @in.Seek(posStart);
                            return(docID_Renamed);
                        }
                        return(docID_Renamed = NO_MORE_DOCS);
                    }
                }
            }
Пример #14
0
 private void  SeekIndex(int docID)
 {
     indexStream.Seek(formatSize + (docID + docStoreOffset) * 8L);
 }
Пример #15
0
                /// <remarks>
                /// TODO: we may want an alternate mode here which is
                /// "if you are about to return NOT_FOUND I won't use
                /// the terms data from that"; eg FuzzyTermsEnum will
                /// (usually) just immediately call seek again if we
                /// return NOT_FOUND so it's a waste for us to fill in
                /// the term that was actually NOT_FOUND
                /// </remarks>
                public override SeekStatus SeekCeil(BytesRef target)
                {
                    if (_indexEnum == null)
                    {
                        throw new InvalidOperationException("terms index was not loaded");
                    }

                    var doSeek = true;

                    // See if we can avoid seeking, because target term
                    // is after current term but before next index term:
                    if (_indexIsCurrent)
                    {
                        var cmp = BytesRef.UTF8SortedAsUnicodeComparer.Compare(_term, target);

                        if (cmp == 0)
                        {
                            return(SeekStatus.FOUND);     // Already at the requested term
                        }
                        if (cmp < 0)
                        {
                            // Target term is after current term
                            if (!_didIndexNext)
                            {
                                _nextIndexTerm = _indexEnum.Next == -1 ? null : _indexEnum.Term;
                                _didIndexNext  = true;
                            }

                            if (_nextIndexTerm == null ||
                                BytesRef.UTF8SortedAsUnicodeComparer.Compare(target, _nextIndexTerm) < 0)
                            {
                                // Optimization: requested term is within the
                                // same term block we are now in; skip seeking
                                // (but do scanning):
                                doSeek = false;
                            }
                        }
                    }

                    if (doSeek)
                    {
                        //System.out.println("  seek");

                        // Ask terms index to find biggest indexed term (=
                        // first term in a block) that's <= our text:
                        _input.Seek(_indexEnum.Seek(target).Value);
                        var result = NextBlock();

                        // Block must exist since, at least, the indexed term
                        // is in the block:
                        Debug.Assert(result);

                        _indexIsCurrent  = true;
                        _didIndexNext    = false;
                        _blocksSinceSeek = 0;

                        if (_doOrd)
                        {
                            _state.Ord = _indexEnum.Ord - 1;
                        }

                        _term.CopyBytes(_indexEnum.Term);
                    }
                    else
                    {
                        if (_state.TermBlockOrd == _blockTermCount && !NextBlock())
                        {
                            _indexIsCurrent = false;
                            return(SeekStatus.END);
                        }
                    }

                    _seekPending = false;

                    var common = 0;

                    // Scan within block.  We could do this by calling
                    // _next() and testing the resulting term, but this
                    // is wasteful.  Instead, we first confirm the
                    // target matches the common prefix of this block,
                    // and then we scan the term bytes directly from the
                    // termSuffixesreader's byte[], saving a copy into
                    // the BytesRef term per term.  Only when we return
                    // do we then copy the bytes into the term.

                    while (true)
                    {
                        // First, see if target term matches common prefix
                        // in this block:
                        if (common < _termBlockPrefix)
                        {
                            var cmp = (_term.Bytes[common] & 0xFF) - (target.Bytes[target.Offset + common] & 0xFF);
                            if (cmp < 0)
                            {
                                // TODO: maybe we should store common prefix
                                // in block header?  (instead of relying on
                                // last term of previous block)

                                // Target's prefix is after the common block
                                // prefix, so term cannot be in this block
                                // but it could be in next block.  We
                                // must scan to end-of-block to set common
                                // prefix for next block:
                                if (_state.TermBlockOrd < _blockTermCount)
                                {
                                    while (_state.TermBlockOrd < _blockTermCount - 1)
                                    {
                                        _state.TermBlockOrd++;
                                        _state.Ord++;
                                        _termSuffixesReader.SkipBytes(_termSuffixesReader.ReadVInt());
                                    }
                                    var suffix = _termSuffixesReader.ReadVInt();
                                    _term.Length = _termBlockPrefix + suffix;
                                    if (_term.Bytes.Length < _term.Length)
                                    {
                                        _term.Grow(_term.Length);
                                    }
                                    _termSuffixesReader.ReadBytes(_term.Bytes, _termBlockPrefix, suffix);
                                }
                                _state.Ord++;

                                if (!NextBlock())
                                {
                                    _indexIsCurrent = false;
                                    return(SeekStatus.END);
                                }
                                common = 0;
                            }
                            else if (cmp > 0)
                            {
                                // Target's prefix is before the common prefix
                                // of this block, so we position to start of
                                // block and return NOT_FOUND:
                                Debug.Assert(_state.TermBlockOrd == 0);

                                var suffix = _termSuffixesReader.ReadVInt();
                                _term.Length = _termBlockPrefix + suffix;
                                if (_term.Bytes.Length < _term.Length)
                                {
                                    _term.Grow(_term.Length);
                                }
                                _termSuffixesReader.ReadBytes(_term.Bytes, _termBlockPrefix, suffix);
                                return(SeekStatus.NOT_FOUND);
                            }
                            else
                            {
                                common++;
                            }

                            continue;
                        }

                        // Test every term in this block
                        while (true)
                        {
                            _state.TermBlockOrd++;
                            _state.Ord++;

                            var suffix = _termSuffixesReader.ReadVInt();

                            // We know the prefix matches, so just compare the new suffix:

                            var termLen = _termBlockPrefix + suffix;
                            var bytePos = _termSuffixesReader.Position;

                            var next = false;

                            var limit     = target.Offset + (termLen < target.Length ? termLen : target.Length);
                            var targetPos = target.Offset + _termBlockPrefix;
                            while (targetPos < limit)
                            {
                                var cmp = (_termSuffixes[bytePos++] & 0xFF) - (target.Bytes[targetPos++] & 0xFF);
                                if (cmp < 0)
                                {
                                    // Current term is still before the target;
                                    // keep scanning
                                    next = true;
                                    break;
                                }

                                if (cmp <= 0)
                                {
                                    continue;
                                }

                                // Done!  Current term is after target. Stop
                                // here, fill in real term, return NOT_FOUND.
                                _term.Length = _termBlockPrefix + suffix;
                                if (_term.Bytes.Length < _term.Length)
                                {
                                    _term.Grow(_term.Length);
                                }
                                _termSuffixesReader.ReadBytes(_term.Bytes, _termBlockPrefix, suffix);
                                return(SeekStatus.NOT_FOUND);
                            }

                            if (!next && target.Length <= termLen)
                            {
                                _term.Length = _termBlockPrefix + suffix;
                                if (_term.Bytes.Length < _term.Length)
                                {
                                    _term.Grow(_term.Length);
                                }
                                _termSuffixesReader.ReadBytes(_term.Bytes, _termBlockPrefix, suffix);

                                return(target.Length == termLen ? SeekStatus.FOUND : SeekStatus.NOT_FOUND);
                            }

                            if (_state.TermBlockOrd == _blockTermCount)
                            {
                                // Must pre-fill term for next block's common prefix
                                _term.Length = _termBlockPrefix + suffix;
                                if (_term.Bytes.Length < _term.Length)
                                {
                                    _term.Grow(_term.Length);
                                }
                                _termSuffixesReader.ReadBytes(_term.Bytes, _termBlockPrefix, suffix);
                                break;
                            }

                            _termSuffixesReader.SkipBytes(suffix);
                        }

                        // The purpose of the terms dict index is to seek
                        // the enum to the closest index term before the
                        // term we are looking for.  So, we should never
                        // cross another index term (besides the first
                        // one) while we are scanning:

                        Debug.Assert(_indexIsCurrent);

                        if (!NextBlock())
                        {
                            _indexIsCurrent = false;
                            return(SeekStatus.END);
                        }
                        common = 0;
                    }
                }
Пример #16
0
 protected override void SeekDir(IndexInput input, long dirOffset)
 {
     input.Seek(input.Length() - sizeof(long)/8);
     long offset = input.ReadLong();
     input.Seek(offset);
 }
Пример #17
0
                public SegmentTermsEnum(FieldReader fieldReader, BlockTermsReader blockTermsReader)
                {
                    _fieldReader = fieldReader;
                    _blockTermsReader = blockTermsReader;

                    _input = (IndexInput) _blockTermsReader._input.Clone();
                    _input.Seek(_fieldReader._termsStartPointer);
                    _indexEnum = _blockTermsReader._indexReader.GetFieldEnum(_fieldReader._fieldInfo);
                    _doOrd = _blockTermsReader._indexReader.SupportsOrd;
                    _fieldTerm.Field = _fieldReader._fieldInfo.Name;
                    _state = _blockTermsReader._postingsReader.NewTermState();
                    _state.TotalTermFreq = -1;
                    _state.Ord = -1;

                    _termSuffixes = new byte[128];
                    _docFreqBytes = new byte[64];
                    _longs = new long[_fieldReader._longsSize];
                }
Пример #18
0
        public override int Merge(MergeState mergeState)
        {
            int docCount = 0;
            int idx      = 0;

            foreach (AtomicReader reader in mergeState.Readers)
            {
                SegmentReader matchingSegmentReader = mergeState.MatchingSegmentReaders[idx++];
                CompressingTermVectorsReader matchingVectorsReader = null;
                if (matchingSegmentReader != null)
                {
                    TermVectorsReader vectorsReader = matchingSegmentReader.TermVectorsReader;
                    // we can only bulk-copy if the matching reader is also a CompressingTermVectorsReader
                    if (vectorsReader != null && vectorsReader is CompressingTermVectorsReader compressingTermVectorsReader)
                    {
                        matchingVectorsReader = compressingTermVectorsReader;
                    }
                }

                int   maxDoc   = reader.MaxDoc;
                IBits liveDocs = reader.LiveDocs;

                if (matchingVectorsReader == null || matchingVectorsReader.Version != VERSION_CURRENT || matchingVectorsReader.CompressionMode != compressionMode || matchingVectorsReader.ChunkSize != chunkSize || matchingVectorsReader.PackedInt32sVersion != PackedInt32s.VERSION_CURRENT)
                {
                    // naive merge...
                    for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = NextLiveDoc(i + 1, liveDocs, maxDoc))
                    {
                        Fields vectors = reader.GetTermVectors(i);
                        AddAllDocVectors(vectors, mergeState);
                        ++docCount;
                        mergeState.CheckAbort.Work(300);
                    }
                }
                else
                {
                    CompressingStoredFieldsIndexReader index = matchingVectorsReader.Index;
                    IndexInput vectorsStreamOrig             = matchingVectorsReader.VectorsStream;
                    vectorsStreamOrig.Seek(0);
                    ChecksumIndexInput vectorsStream = new BufferedChecksumIndexInput((IndexInput)vectorsStreamOrig.Clone());

                    for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc;)
                    {
                        // We make sure to move the checksum input in any case, otherwise the final
                        // integrity check might need to read the whole file a second time
                        long startPointer = index.GetStartPointer(i);
                        if (startPointer > vectorsStream.GetFilePointer())
                        {
                            vectorsStream.Seek(startPointer);
                        }
                        if ((pendingDocs.Count == 0) && (i == 0 || index.GetStartPointer(i - 1) < startPointer)) // start of a chunk
                        {
                            int docBase   = vectorsStream.ReadVInt32();
                            int chunkDocs = vectorsStream.ReadVInt32();
                            if (Debugging.AssertsEnabled)
                            {
                                Debugging.Assert(docBase + chunkDocs <= matchingSegmentReader.MaxDoc);
                            }
                            if (docBase + chunkDocs < matchingSegmentReader.MaxDoc && NextDeletedDoc(docBase, liveDocs, docBase + chunkDocs) == docBase + chunkDocs)
                            {
                                long chunkEnd    = index.GetStartPointer(docBase + chunkDocs);
                                long chunkLength = chunkEnd - vectorsStream.GetFilePointer();
                                indexWriter.WriteIndex(chunkDocs, this.vectorsStream.GetFilePointer());
                                this.vectorsStream.WriteVInt32(docCount);
                                this.vectorsStream.WriteVInt32(chunkDocs);
                                this.vectorsStream.CopyBytes(vectorsStream, chunkLength);
                                docCount     += chunkDocs;
                                this.numDocs += chunkDocs;
                                mergeState.CheckAbort.Work(300 * chunkDocs);
                                i = NextLiveDoc(docBase + chunkDocs, liveDocs, maxDoc);
                            }
                            else
                            {
                                for (; i < docBase + chunkDocs; i = NextLiveDoc(i + 1, liveDocs, maxDoc))
                                {
                                    Fields vectors = reader.GetTermVectors(i);
                                    AddAllDocVectors(vectors, mergeState);
                                    ++docCount;
                                    mergeState.CheckAbort.Work(300);
                                }
                            }
                        }
                        else
                        {
                            Fields vectors = reader.GetTermVectors(i);
                            AddAllDocVectors(vectors, mergeState);
                            ++docCount;
                            mergeState.CheckAbort.Work(300);
                            i = NextLiveDoc(i + 1, liveDocs, maxDoc);
                        }
                    }

                    vectorsStream.Seek(vectorsStream.Length - CodecUtil.FooterLength());
                    CodecUtil.CheckFooter(vectorsStream);
                }
            }
            Finish(mergeState.FieldInfos, docCount);
            return(docCount);
        }
Пример #19
0
        // LUCENENET NOTE: Changed from public to internal because the class had to be made public, but is not for public use.
        internal SimpleTextDocValuesReader(SegmentReadState state, string ext)
        {
            data = state.Directory.OpenInput(
                IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, ext), state.Context);
            maxDoc = state.SegmentInfo.DocCount;

            while (true)
            {
                ReadLine();
                if (scratch.Equals(SimpleTextDocValuesWriter.END))
                {
                    break;
                }
                Debug.Assert(StartsWith(SimpleTextDocValuesWriter.FIELD), scratch.Utf8ToString());
                var fieldName = StripPrefix(SimpleTextDocValuesWriter.FIELD);
                var field     = new OneField();

                fields[fieldName] = field;

                ReadLine();
                Debug.Assert(StartsWith(SimpleTextDocValuesWriter.TYPE), scratch.Utf8ToString());

                var dvType =
                    (DocValuesType)
                    Enum.Parse(typeof(DocValuesType), StripPrefix(SimpleTextDocValuesWriter.TYPE));

                if (dvType == DocValuesType.NUMERIC)
                {
                    ReadLine();
                    Debug.Assert(StartsWith(SimpleTextDocValuesWriter.MINVALUE),
                                 "got " + scratch.Utf8ToString() + " field=" + fieldName + " ext=" + ext);
                    field.MinValue = Convert.ToInt64(StripPrefix(SimpleTextDocValuesWriter.MINVALUE), CultureInfo.InvariantCulture);
                    ReadLine();
                    Debug.Assert(StartsWith(SimpleTextDocValuesWriter.PATTERN));
                    field.Pattern = StripPrefix(SimpleTextDocValuesWriter.PATTERN);
                    field.DataStartFilePointer = data.GetFilePointer();
                    data.Seek(data.GetFilePointer() + (1 + field.Pattern.Length + 2) * maxDoc);
                }
                else if (dvType == DocValuesType.BINARY)
                {
                    ReadLine();
                    Debug.Assert(StartsWith(SimpleTextDocValuesWriter.MAXLENGTH));
                    field.MaxLength = Convert.ToInt32(StripPrefix(SimpleTextDocValuesWriter.MAXLENGTH), CultureInfo.InvariantCulture);
                    ReadLine();
                    Debug.Assert(StartsWith(SimpleTextDocValuesWriter.PATTERN));
                    field.Pattern = StripPrefix(SimpleTextDocValuesWriter.PATTERN);
                    field.DataStartFilePointer = data.GetFilePointer();
                    data.Seek(data.GetFilePointer() + (9 + field.Pattern.Length + field.MaxLength + 2) * maxDoc);
                }
                else if (dvType == DocValuesType.SORTED || dvType == DocValuesType.SORTED_SET)
                {
                    ReadLine();
                    Debug.Assert(StartsWith(SimpleTextDocValuesWriter.NUMVALUES));
                    field.NumValues = Convert.ToInt64(StripPrefix(SimpleTextDocValuesWriter.NUMVALUES), CultureInfo.InvariantCulture);
                    ReadLine();
                    Debug.Assert(StartsWith(SimpleTextDocValuesWriter.MAXLENGTH));
                    field.MaxLength = Convert.ToInt32(StripPrefix(SimpleTextDocValuesWriter.MAXLENGTH), CultureInfo.InvariantCulture);
                    ReadLine();
                    Debug.Assert(StartsWith(SimpleTextDocValuesWriter.PATTERN));
                    field.Pattern = StripPrefix(SimpleTextDocValuesWriter.PATTERN);
                    ReadLine();
                    Debug.Assert(StartsWith(SimpleTextDocValuesWriter.ORDPATTERN));
                    field.OrdPattern           = StripPrefix(SimpleTextDocValuesWriter.ORDPATTERN);
                    field.DataStartFilePointer = data.GetFilePointer();
                    data.Seek(data.GetFilePointer() + (9 + field.Pattern.Length + field.MaxLength) * field.NumValues +
                              (1 + field.OrdPattern.Length) * maxDoc);
                }
                else
                {
                    throw new ArgumentOutOfRangeException();
                }
            }

            // We should only be called from above if at least one
            // field has DVs:
            Debug.Assert(fields.Count > 0);
        }
Пример #20
0
 public override void Seek(long pos)
 {
     cacheInput.Seek(pos);
 }
Пример #21
0
            internal virtual TermsEnum GetTermsEnum(IndexInput input)
            {
                input.Seek(bytes.offset);

                return(new TermsEnumAnonymousClass(this, input));
            }
Пример #22
0
 // Not private to avoid synthetic access$NNN methods
 internal virtual void SeekTvx(int docNum)
 {
     tvx.Seek(docNum * 16L + HEADER_LENGTH_INDEX);
 }
Пример #23
0
 private void SeekIndex(int docID)
 {
     indexStream.Seek(FORMAT_SIZE + (docID + docStoreOffset) * 8L);
 }
Пример #24
0
        public override long Get(int index)
        {
            long majorBitPos = (long)index * m_bitsPerValue;
            long elementPos  = majorBitPos.TripleShift(3);

            try
            {
                @in.Seek(startPointer + elementPos);

                int bitPos = (int)(majorBitPos & 7);
                // round up bits to a multiple of 8 to find total bytes needed to read
                int roundedBits = ((bitPos + m_bitsPerValue + 7) & ~7);
                // the number of extra bits read at the end to shift out
                int shiftRightBits = roundedBits - bitPos - m_bitsPerValue;

                long rawValue;
                switch (roundedBits.TripleShift(3))
                {
                case 1:
                    rawValue = @in.ReadByte();
                    break;

                case 2:
                    rawValue = @in.ReadInt16();
                    break;

                case 3:
                    rawValue = ((long)@in.ReadInt16() << 8) | (@in.ReadByte() & 0xFFL);
                    break;

                case 4:
                    rawValue = @in.ReadInt32();
                    break;

                case 5:
                    rawValue = ((long)@in.ReadInt32() << 8) | (@in.ReadByte() & 0xFFL);
                    break;

                case 6:
                    rawValue = ((long)@in.ReadInt32() << 16) | (@in.ReadInt16() & 0xFFFFL);
                    break;

                case 7:
                    rawValue = ((long)@in.ReadInt32() << 24) | ((@in.ReadInt16() & 0xFFFFL) << 8) | (@in.ReadByte() & 0xFFL);
                    break;

                case 8:
                    rawValue = @in.ReadInt64();
                    break;

                case 9:
                    // We must be very careful not to shift out relevant bits. So we account for right shift
                    // we would normally do on return here, and reset it.
                    rawValue       = (@in.ReadInt64() << (8 - shiftRightBits)) | (((uint)(@in.ReadByte() & 0xFFL) >> shiftRightBits));
                    shiftRightBits = 0;
                    break;

                default:
                    throw new InvalidOperationException("bitsPerValue too large: " + m_bitsPerValue);
                }
                return((rawValue.TripleShift(shiftRightBits)) & valueMask);
            }
            catch (IOException ioe)
            {
                throw new InvalidOperationException("failed", ioe);
            }
        }
Пример #25
0
            public override int NextPosition()
            {
                if (lazyProxPointer != -1)
                {
                    proxIn.Seek(lazyProxPointer);
                    lazyProxPointer = -1;
                }

                if (payloadPending && payloadLength > 0)
                {
                    // payload of last position was never retrieved -- skip it
                    proxIn.Seek(proxIn.GetFilePointer() + payloadLength);
                    payloadPending = false;
                }

                // scan over any docs that were iterated without their positions
                while (posPendingCount > freq)
                {
                    int code = proxIn.ReadVInt32();

                    if (storePayloads)
                    {
                        if ((code & 1) != 0)
                        {
                            // new payload length
                            payloadLength = proxIn.ReadVInt32();
                            Debug.Assert(payloadLength >= 0);
                        }
                        Debug.Assert(payloadLength != -1);
                    }

                    if (storeOffsets)
                    {
                        if ((proxIn.ReadVInt32() & 1) != 0)
                        {
                            // new offset length
                            offsetLength = proxIn.ReadVInt32();
                        }
                    }

                    if (storePayloads)
                    {
                        proxIn.Seek(proxIn.GetFilePointer() + payloadLength);
                    }

                    posPendingCount--;
                    position       = 0;
                    startOffset    = 0;
                    payloadPending = false;
                    //System.out.println("StandardR.D&PE skipPos");
                }

                // read next position
                if (payloadPending && payloadLength > 0)
                {
                    // payload wasn't retrieved for last position
                    proxIn.Seek(proxIn.GetFilePointer() + payloadLength);
                }

                int code_ = proxIn.ReadVInt32();

                if (storePayloads)
                {
                    if ((code_ & 1) != 0)
                    {
                        // new payload length
                        payloadLength = proxIn.ReadVInt32();
                        Debug.Assert(payloadLength >= 0);
                    }
                    Debug.Assert(payloadLength != -1);

                    payloadPending = true;
                    code_          = (int)((uint)code_ >> 1);
                }
                position += code_;

                if (storeOffsets)
                {
                    int offsetCode = proxIn.ReadVInt32();
                    if ((offsetCode & 1) != 0)
                    {
                        // new offset length
                        offsetLength = proxIn.ReadVInt32();
                    }
                    startOffset += (int)((uint)offsetCode >> 1);
                }

                posPendingCount--;

                Debug.Assert(posPendingCount >= 0, "nextPosition() was called too many times (more than freq() times) posPendingCount=" + posPendingCount);

                //System.out.println("StandardR.D&PE nextPos   return pos=" + position);
                return(position);
            }
Пример #26
0
        public override void VisitDocument(int docID, StoredFieldVisitor visitor)
        {
            fieldsStream.Seek(indexReader.GetStartPointer(docID));

            int docBase   = fieldsStream.ReadVInt32();
            int chunkDocs = fieldsStream.ReadVInt32();

            if (docID < docBase || docID >= docBase + chunkDocs || docBase + chunkDocs > numDocs)
            {
                throw new CorruptIndexException("Corrupted: docID=" + docID + ", docBase=" + docBase + ", chunkDocs=" + chunkDocs + ", numDocs=" + numDocs + " (resource=" + fieldsStream + ")");
            }

            int numStoredFields, offset, length, totalLength;

            if (chunkDocs == 1)
            {
                numStoredFields = fieldsStream.ReadVInt32();
                offset          = 0;
                length          = fieldsStream.ReadVInt32();
                totalLength     = length;
            }
            else
            {
                int bitsPerStoredFields = fieldsStream.ReadVInt32();
                if (bitsPerStoredFields == 0)
                {
                    numStoredFields = fieldsStream.ReadVInt32();
                }
                else if (bitsPerStoredFields > 31)
                {
                    throw new CorruptIndexException("bitsPerStoredFields=" + bitsPerStoredFields + " (resource=" + fieldsStream + ")");
                }
                else
                {
                    long filePointer           = fieldsStream.GetFilePointer();
                    PackedInt32s.Reader reader = PackedInt32s.GetDirectReaderNoHeader(fieldsStream, PackedInt32s.Format.PACKED, packedIntsVersion, chunkDocs, bitsPerStoredFields);
                    numStoredFields = (int)(reader.Get(docID - docBase));
                    fieldsStream.Seek(filePointer + PackedInt32s.Format.PACKED.ByteCount(packedIntsVersion, chunkDocs, bitsPerStoredFields));
                }

                int bitsPerLength = fieldsStream.ReadVInt32();
                if (bitsPerLength == 0)
                {
                    length      = fieldsStream.ReadVInt32();
                    offset      = (docID - docBase) * length;
                    totalLength = chunkDocs * length;
                }
                else if (bitsPerStoredFields > 31)
                {
                    throw new CorruptIndexException("bitsPerLength=" + bitsPerLength + " (resource=" + fieldsStream + ")");
                }
                else
                {
                    PackedInt32s.IReaderIterator it = PackedInt32s.GetReaderIteratorNoHeader(fieldsStream, PackedInt32s.Format.PACKED, packedIntsVersion, chunkDocs, bitsPerLength, 1);
                    int off = 0;
                    for (int i = 0; i < docID - docBase; ++i)
                    {
                        off += (int)it.Next();
                    }
                    offset = off;
                    length = (int)it.Next();
                    off   += length;
                    for (int i = docID - docBase + 1; i < chunkDocs; ++i)
                    {
                        off += (int)it.Next();
                    }
                    totalLength = off;
                }
            }

            if ((length == 0) != (numStoredFields == 0))
            {
                throw new CorruptIndexException("length=" + length + ", numStoredFields=" + numStoredFields + " (resource=" + fieldsStream + ")");
            }
            if (numStoredFields == 0)
            {
                // nothing to do
                return;
            }

            DataInput documentInput;

            if (version >= CompressingStoredFieldsWriter.VERSION_BIG_CHUNKS && totalLength >= 2 * chunkSize)
            {
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(chunkSize > 0);
                    Debugging.Assert(offset < chunkSize);
                }

                decompressor.Decompress(fieldsStream, chunkSize, offset, Math.Min(length, chunkSize - offset), bytes);
                documentInput = new DataInputAnonymousClass(this, length);
            }
            else
            {
                BytesRef bytes = totalLength <= BUFFER_REUSE_THRESHOLD ? this.bytes : new BytesRef();
                decompressor.Decompress(fieldsStream, totalLength, offset, length, bytes);
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(bytes.Length == length);
                }
                documentInput = new ByteArrayDataInput(bytes.Bytes, bytes.Offset, bytes.Length);
            }

            for (int fieldIDX = 0; fieldIDX < numStoredFields; fieldIDX++)
            {
                long      infoAndBits = documentInput.ReadVInt64();
                int       fieldNumber = (int)infoAndBits.TripleShift(CompressingStoredFieldsWriter.TYPE_BITS);
                FieldInfo fieldInfo   = fieldInfos.FieldInfo(fieldNumber);

                int bits = (int)(infoAndBits & CompressingStoredFieldsWriter.TYPE_MASK);
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(bits <= CompressingStoredFieldsWriter.NUMERIC_DOUBLE, "bits={0:x}", bits);
                }

                switch (visitor.NeedsField(fieldInfo))
                {
                case StoredFieldVisitor.Status.YES:
                    ReadField(documentInput, visitor, fieldInfo, bits);
                    break;

                case StoredFieldVisitor.Status.NO:
                    SkipField(documentInput, bits);
                    break;

                case StoredFieldVisitor.Status.STOP:
                    return;
                }
            }
        }
Пример #27
0
 /// <summary>
 /// Returns (but does not validate) the checksum previously written by <see cref="CheckFooter(ChecksumIndexInput)"/>. </summary>
 /// <returns> actual checksum value </returns>
 /// <exception cref="IOException"> If the footer is invalid. </exception>
 public static long RetrieveChecksum(IndexInput @in)
 {
     @in.Seek(@in.Length - FooterLength());
     ValidateFooter(@in);
     return(@in.ReadInt64());
 }
Пример #28
0
        // LUCENENET NOTE: Changed from public to internal because the class had to be made public, but is not for public use.
        internal SimpleTextDocValuesReader(SegmentReadState state, string ext)
        {
            data = state.Directory.OpenInput(
                IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, ext), state.Context);
            maxDoc = state.SegmentInfo.DocCount;

            while (true)
            {
                ReadLine();
                if (scratch.Equals(SimpleTextDocValuesWriter.END))
                {
                    break;
                }
                // LUCENENET specific - use wrapper BytesRefFormatter struct to defer building the string unless string.Format() is called
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(StartsWith(SimpleTextDocValuesWriter.FIELD), "{0}", new BytesRefFormatter(scratch, BytesRefFormat.UTF8));
                }
                var fieldName = StripPrefix(SimpleTextDocValuesWriter.FIELD);
                var field     = new OneField();

                fields[fieldName] = field;

                ReadLine();
                // LUCENENET specific - use wrapper BytesRefFormatter struct to defer building the string unless string.Format() is called
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(StartsWith(SimpleTextDocValuesWriter.TYPE), "{0}", new BytesRefFormatter(scratch, BytesRefFormat.UTF8));
                }

                var dvType = (DocValuesType)Enum.Parse(typeof(DocValuesType), StripPrefix(SimpleTextDocValuesWriter.TYPE));
                // if (Debugging.AssertsEnabled) Debugging.Assert(dvType != null); // LUCENENET: Not possible for an enum to be null in .NET
                if (dvType == DocValuesType.NUMERIC)
                {
                    ReadLine();
                    // LUCENENET specific - use wrapper BytesRefFormatter struct to defer building the string unless string.Format() is called
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(StartsWith(SimpleTextDocValuesWriter.MINVALUE), "got {0} field={1} ext={2}", new BytesRefFormatter(scratch, BytesRefFormat.UTF8), fieldName, ext);
                    }
                    field.MinValue = Convert.ToInt64(StripPrefix(SimpleTextDocValuesWriter.MINVALUE), CultureInfo.InvariantCulture);
                    ReadLine();
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(StartsWith(SimpleTextDocValuesWriter.PATTERN));
                    }
                    field.Pattern = StripPrefix(SimpleTextDocValuesWriter.PATTERN);
                    field.DataStartFilePointer = data.Position;                         // LUCENENET specific: Renamed from getFilePointer() to match FileStream
                    data.Seek(data.Position + (1 + field.Pattern.Length + 2) * maxDoc); // LUCENENET specific: Renamed from getFilePointer() to match FileStream
                }
                else if (dvType == DocValuesType.BINARY)
                {
                    ReadLine();
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(StartsWith(SimpleTextDocValuesWriter.MAXLENGTH));
                    }
                    field.MaxLength = Convert.ToInt32(StripPrefix(SimpleTextDocValuesWriter.MAXLENGTH), CultureInfo.InvariantCulture);
                    ReadLine();
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(StartsWith(SimpleTextDocValuesWriter.PATTERN));
                    }
                    field.Pattern = StripPrefix(SimpleTextDocValuesWriter.PATTERN);
                    field.DataStartFilePointer = data.Position;
                    data.Seek(data.Position + (9 + field.Pattern.Length + field.MaxLength + 2) * maxDoc);
                }
                else if (dvType == DocValuesType.SORTED || dvType == DocValuesType.SORTED_SET)
                {
                    ReadLine();
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(StartsWith(SimpleTextDocValuesWriter.NUMVALUES));
                    }
                    field.NumValues = Convert.ToInt64(StripPrefix(SimpleTextDocValuesWriter.NUMVALUES), CultureInfo.InvariantCulture);
                    ReadLine();
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(StartsWith(SimpleTextDocValuesWriter.MAXLENGTH));
                    }
                    field.MaxLength = Convert.ToInt32(StripPrefix(SimpleTextDocValuesWriter.MAXLENGTH), CultureInfo.InvariantCulture);
                    ReadLine();
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(StartsWith(SimpleTextDocValuesWriter.PATTERN));
                    }
                    field.Pattern = StripPrefix(SimpleTextDocValuesWriter.PATTERN);
                    ReadLine();
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(StartsWith(SimpleTextDocValuesWriter.ORDPATTERN));
                    }
                    field.OrdPattern           = StripPrefix(SimpleTextDocValuesWriter.ORDPATTERN);
                    field.DataStartFilePointer = data.Position;        // LUCENENET specific: Renamed from getFilePointer() to match FileStream
                    data.Seek(data.Position + (9 + field.Pattern.Length + field.MaxLength) * field.NumValues +
                              (1 + field.OrdPattern.Length) * maxDoc); // LUCENENET specific: Renamed from getFilePointer() to match FileStream
                }
                else
                {
                    throw AssertionError.Create();
                }
            }

            // We should only be called from above if at least one
            // field has DVs:
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(fields.Count > 0);
            }
        }
        public override Fields Get(int doc)
        {
            EnsureOpen();

            // seek to the right place
            {
                long startPointer = indexReader.GetStartPointer(doc);
                vectorsStream.Seek(startPointer);
            }

            // decode
            // - docBase: first doc ID of the chunk
            // - chunkDocs: number of docs of the chunk
            int docBase   = vectorsStream.ReadVInt32();
            int chunkDocs = vectorsStream.ReadVInt32();

            if (doc < docBase || doc >= docBase + chunkDocs || docBase + chunkDocs > numDocs)
            {
                throw new CorruptIndexException("docBase=" + docBase + ",chunkDocs=" + chunkDocs + ",doc=" + doc + " (resource=" + vectorsStream + ")");
            }

            int skip;        // number of fields to skip
            int numFields;   // number of fields of the document we're looking for
            int totalFields; // total number of fields of the chunk (sum for all docs)

            if (chunkDocs == 1)
            {
                skip      = 0;
                numFields = totalFields = vectorsStream.ReadVInt32();
            }
            else
            {
                reader.Reset(vectorsStream, chunkDocs);
                int sum = 0;
                for (int i = docBase; i < doc; ++i)
                {
                    sum += (int)reader.Next();
                }
                skip      = sum;
                numFields = (int)reader.Next();
                sum      += numFields;
                for (int i = doc + 1; i < docBase + chunkDocs; ++i)
                {
                    sum += (int)reader.Next();
                }
                totalFields = sum;
            }

            if (numFields == 0)
            {
                // no vectors
                return(null);
            }

            // read field numbers that have term vectors
            int[] fieldNums;
            {
                int token = vectorsStream.ReadByte() & 0xFF;
                Debug.Assert(token != 0); // means no term vectors, cannot happen since we checked for numFields == 0
                int bitsPerFieldNum     = token & 0x1F;
                int totalDistinctFields = (int)((uint)token >> 5);
                if (totalDistinctFields == 0x07)
                {
                    totalDistinctFields += vectorsStream.ReadVInt32();
                }
                ++totalDistinctFields;
                PackedInt32s.IReaderIterator it = PackedInt32s.GetReaderIteratorNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, totalDistinctFields, bitsPerFieldNum, 1);
                fieldNums = new int[totalDistinctFields];
                for (int i = 0; i < totalDistinctFields; ++i)
                {
                    fieldNums[i] = (int)it.Next();
                }
            }

            // read field numbers and flags
            int[] fieldNumOffs = new int[numFields];
            PackedInt32s.Reader flags;
            {
                int bitsPerOff = PackedInt32s.BitsRequired(fieldNums.Length - 1);
                PackedInt32s.Reader allFieldNumOffs = PackedInt32s.GetReaderNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, totalFields, bitsPerOff);
                switch (vectorsStream.ReadVInt32())
                {
                case 0:
                    PackedInt32s.Reader  fieldFlags = PackedInt32s.GetReaderNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, fieldNums.Length, CompressingTermVectorsWriter.FLAGS_BITS);
                    PackedInt32s.Mutable f          = PackedInt32s.GetMutable(totalFields, CompressingTermVectorsWriter.FLAGS_BITS, PackedInt32s.COMPACT);
                    for (int i = 0; i < totalFields; ++i)
                    {
                        int fieldNumOff = (int)allFieldNumOffs.Get(i);
                        Debug.Assert(fieldNumOff >= 0 && fieldNumOff < fieldNums.Length);
                        int fgs = (int)fieldFlags.Get(fieldNumOff);
                        f.Set(i, fgs);
                    }
                    flags = f;
                    break;

                case 1:
                    flags = PackedInt32s.GetReaderNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, totalFields, CompressingTermVectorsWriter.FLAGS_BITS);
                    break;

                default:
                    throw new Exception();
                }
                for (int i = 0; i < numFields; ++i)
                {
                    fieldNumOffs[i] = (int)allFieldNumOffs.Get(skip + i);
                }
            }

            // number of terms per field for all fields
            PackedInt32s.Reader numTerms;
            int totalTerms;
            {
                int bitsRequired = vectorsStream.ReadVInt32();
                numTerms = PackedInt32s.GetReaderNoHeader(vectorsStream, PackedInt32s.Format.PACKED, packedIntsVersion, totalFields, bitsRequired);
                int sum = 0;
                for (int i = 0; i < totalFields; ++i)
                {
                    sum += (int)numTerms.Get(i);
                }
                totalTerms = sum;
            }

            // term lengths
            int docOff = 0, docLen = 0, totalLen;

            int[]   fieldLengths  = new int[numFields];
            int[][] prefixLengths = new int[numFields][];
            int[][] suffixLengths = new int[numFields][];
            {
                reader.Reset(vectorsStream, totalTerms);
                // skip
                int toSkip = 0;
                for (int i = 0; i < skip; ++i)
                {
                    toSkip += (int)numTerms.Get(i);
                }
                reader.Skip(toSkip);
                // read prefix lengths
                for (int i = 0; i < numFields; ++i)
                {
                    int   termCount          = (int)numTerms.Get(skip + i);
                    int[] fieldPrefixLengths = new int[termCount];
                    prefixLengths[i] = fieldPrefixLengths;
                    for (int j = 0; j < termCount;)
                    {
                        Int64sRef next = reader.Next(termCount - j);
                        for (int k = 0; k < next.Length; ++k)
                        {
                            fieldPrefixLengths[j++] = (int)next.Int64s[next.Offset + k];
                        }
                    }
                }
                reader.Skip(totalTerms - reader.Ord);

                reader.Reset(vectorsStream, totalTerms);
                // skip
                toSkip = 0;
                for (int i = 0; i < skip; ++i)
                {
                    for (int j = 0; j < numTerms.Get(i); ++j)
                    {
                        docOff += (int)reader.Next();
                    }
                }
                for (int i = 0; i < numFields; ++i)
                {
                    int   termCount          = (int)numTerms.Get(skip + i);
                    int[] fieldSuffixLengths = new int[termCount];
                    suffixLengths[i] = fieldSuffixLengths;
                    for (int j = 0; j < termCount;)
                    {
                        Int64sRef next = reader.Next(termCount - j);
                        for (int k = 0; k < next.Length; ++k)
                        {
                            fieldSuffixLengths[j++] = (int)next.Int64s[next.Offset + k];
                        }
                    }
                    fieldLengths[i] = Sum(suffixLengths[i]);
                    docLen         += fieldLengths[i];
                }
                totalLen = docOff + docLen;
                for (int i = skip + numFields; i < totalFields; ++i)
                {
                    for (int j = 0; j < numTerms.Get(i); ++j)
                    {
                        totalLen += (int)reader.Next();
                    }
                }
            }

            // term freqs
            int[] termFreqs = new int[totalTerms];
            {
                reader.Reset(vectorsStream, totalTerms);
                for (int i = 0; i < totalTerms;)
                {
                    Int64sRef next = reader.Next(totalTerms - i);
                    for (int k = 0; k < next.Length; ++k)
                    {
                        termFreqs[i++] = 1 + (int)next.Int64s[next.Offset + k];
                    }
                }
            }

            // total number of positions, offsets and payloads
            int totalPositions = 0, totalOffsets = 0, totalPayloads = 0;

            for (int i = 0, termIndex = 0; i < totalFields; ++i)
            {
                int f         = (int)flags.Get(i);
                int termCount = (int)numTerms.Get(i);
                for (int j = 0; j < termCount; ++j)
                {
                    int freq = termFreqs[termIndex++];
                    if ((f & CompressingTermVectorsWriter.POSITIONS) != 0)
                    {
                        totalPositions += freq;
                    }
                    if ((f & CompressingTermVectorsWriter.OFFSETS) != 0)
                    {
                        totalOffsets += freq;
                    }
                    if ((f & CompressingTermVectorsWriter.PAYLOADS) != 0)
                    {
                        totalPayloads += freq;
                    }
                }
                Debug.Assert(i != totalFields - 1 || termIndex == totalTerms, termIndex + " " + totalTerms);
            }

            int[][] positionIndex = PositionIndex(skip, numFields, numTerms, termFreqs);
            int[][] positions, startOffsets, lengths;
            if (totalPositions > 0)
            {
                positions = ReadPositions(skip, numFields, flags, numTerms, termFreqs, CompressingTermVectorsWriter.POSITIONS, totalPositions, positionIndex);
            }
            else
            {
                positions = new int[numFields][];
            }

            if (totalOffsets > 0)
            {
                // average number of chars per term
                float[] charsPerTerm = new float[fieldNums.Length];
                for (int i = 0; i < charsPerTerm.Length; ++i)
                {
                    charsPerTerm[i] = J2N.BitConversion.Int32BitsToSingle(vectorsStream.ReadInt32());
                }
                startOffsets = ReadPositions(skip, numFields, flags, numTerms, termFreqs, CompressingTermVectorsWriter.OFFSETS, totalOffsets, positionIndex);
                lengths      = ReadPositions(skip, numFields, flags, numTerms, termFreqs, CompressingTermVectorsWriter.OFFSETS, totalOffsets, positionIndex);

                for (int i = 0; i < numFields; ++i)
                {
                    int[] fStartOffsets = startOffsets[i];
                    int[] fPositions    = positions[i];
                    // patch offsets from positions
                    if (fStartOffsets != null && fPositions != null)
                    {
                        float fieldCharsPerTerm = charsPerTerm[fieldNumOffs[i]];
                        for (int j = 0; j < startOffsets[i].Length; ++j)
                        {
                            fStartOffsets[j] += (int)(fieldCharsPerTerm * fPositions[j]);
                        }
                    }
                    if (fStartOffsets != null)
                    {
                        int[] fPrefixLengths = prefixLengths[i];
                        int[] fSuffixLengths = suffixLengths[i];
                        int[] fLengths       = lengths[i];
                        for (int j = 0, end = (int)numTerms.Get(skip + i); j < end; ++j)
                        {
                            // delta-decode start offsets and  patch lengths using term lengths
                            int termLength = fPrefixLengths[j] + fSuffixLengths[j];
                            lengths[i][positionIndex[i][j]] += termLength;
                            for (int k = positionIndex[i][j] + 1; k < positionIndex[i][j + 1]; ++k)
                            {
                                fStartOffsets[k] += fStartOffsets[k - 1];
                                fLengths[k]      += termLength;
                            }
                        }
                    }
                }
            }
            else
            {
                startOffsets = lengths = new int[numFields][];
            }
            if (totalPositions > 0)
            {
                // delta-decode positions
                for (int i = 0; i < numFields; ++i)
                {
                    int[] fPositions     = positions[i];
                    int[] fpositionIndex = positionIndex[i];
                    if (fPositions != null)
                    {
                        for (int j = 0, end = (int)numTerms.Get(skip + i); j < end; ++j)
                        {
                            // delta-decode start offsets
                            for (int k = fpositionIndex[j] + 1; k < fpositionIndex[j + 1]; ++k)
                            {
                                fPositions[k] += fPositions[k - 1];
                            }
                        }
                    }
                }
            }

            // payload lengths
            int[][] payloadIndex       = new int[numFields][];
            int     totalPayloadLength = 0;
            int     payloadOff         = 0;
            int     payloadLen         = 0;

            if (totalPayloads > 0)
            {
                reader.Reset(vectorsStream, totalPayloads);
                // skip
                int termIndex = 0;
                for (int i = 0; i < skip; ++i)
                {
                    int f         = (int)flags.Get(i);
                    int termCount = (int)numTerms.Get(i);
                    if ((f & CompressingTermVectorsWriter.PAYLOADS) != 0)
                    {
                        for (int j = 0; j < termCount; ++j)
                        {
                            int freq = termFreqs[termIndex + j];
                            for (int k = 0; k < freq; ++k)
                            {
                                int l = (int)reader.Next();
                                payloadOff += l;
                            }
                        }
                    }
                    termIndex += termCount;
                }
                totalPayloadLength = payloadOff;
                // read doc payload lengths
                for (int i = 0; i < numFields; ++i)
                {
                    int f         = (int)flags.Get(skip + i);
                    int termCount = (int)numTerms.Get(skip + i);
                    if ((f & CompressingTermVectorsWriter.PAYLOADS) != 0)
                    {
                        int totalFreq = positionIndex[i][termCount];
                        payloadIndex[i] = new int[totalFreq + 1];
                        int posIdx = 0;
                        payloadIndex[i][posIdx] = payloadLen;
                        for (int j = 0; j < termCount; ++j)
                        {
                            int freq = termFreqs[termIndex + j];
                            for (int k = 0; k < freq; ++k)
                            {
                                int payloadLength = (int)reader.Next();
                                payloadLen += payloadLength;
                                payloadIndex[i][posIdx + 1] = payloadLen;
                                ++posIdx;
                            }
                        }
                        Debug.Assert(posIdx == totalFreq);
                    }
                    termIndex += termCount;
                }
                totalPayloadLength += payloadLen;
                for (int i = skip + numFields; i < totalFields; ++i)
                {
                    int f         = (int)flags.Get(i);
                    int termCount = (int)numTerms.Get(i);
                    if ((f & CompressingTermVectorsWriter.PAYLOADS) != 0)
                    {
                        for (int j = 0; j < termCount; ++j)
                        {
                            int freq = termFreqs[termIndex + j];
                            for (int k = 0; k < freq; ++k)
                            {
                                totalPayloadLength += (int)reader.Next();
                            }
                        }
                    }
                    termIndex += termCount;
                }
                Debug.Assert(termIndex == totalTerms, termIndex + " " + totalTerms);
            }

            // decompress data
            BytesRef suffixBytes = new BytesRef();

            decompressor.Decompress(vectorsStream, totalLen + totalPayloadLength, docOff + payloadOff, docLen + payloadLen, suffixBytes);
            suffixBytes.Length = docLen;
            BytesRef payloadBytes = new BytesRef(suffixBytes.Bytes, suffixBytes.Offset + docLen, payloadLen);

            int[] FieldFlags = new int[numFields];
            for (int i = 0; i < numFields; ++i)
            {
                FieldFlags[i] = (int)flags.Get(skip + i);
            }

            int[] fieldNumTerms = new int[numFields];
            for (int i = 0; i < numFields; ++i)
            {
                fieldNumTerms[i] = (int)numTerms.Get(skip + i);
            }

            int[][] fieldTermFreqs = new int[numFields][];
            {
                int termIdx = 0;
                for (int i = 0; i < skip; ++i)
                {
                    termIdx += (int)numTerms.Get(i);
                }
                for (int i = 0; i < numFields; ++i)
                {
                    int termCount = (int)numTerms.Get(skip + i);
                    fieldTermFreqs[i] = new int[termCount];
                    for (int j = 0; j < termCount; ++j)
                    {
                        fieldTermFreqs[i][j] = termFreqs[termIdx++];
                    }
                }
            }

            Debug.Assert(Sum(fieldLengths) == docLen, Sum(fieldLengths) + " != " + docLen);

            return(new TVFields(this, fieldNums, FieldFlags, fieldNumOffs, fieldNumTerms, fieldLengths, prefixLengths, suffixLengths, fieldTermFreqs, positionIndex, positions, startOffsets, lengths, payloadBytes, payloadIndex, suffixBytes));
        }
        public override Fields Get(int doc)
        {
            // LUCENENET specific: Use StringComparer.Ordinal to get the same ordering as Java
            var fields = new JCG.SortedDictionary <string, SimpleTVTerms>(StringComparer.Ordinal);

            _input.Seek(_offsets[doc]);
            ReadLine();
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.NUMFIELDS));
            }
            var numFields = ParseInt32At(SimpleTextTermVectorsWriter.NUMFIELDS.Length);

            if (numFields == 0)
            {
                return(null); // no vectors for this doc
            }
            for (var i = 0; i < numFields; i++)
            {
                ReadLine();
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.FIELD));
                }
                // skip fieldNumber:
                ParseInt32At(SimpleTextTermVectorsWriter.FIELD.Length);

                ReadLine();
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.FIELDNAME));
                }
                var fieldName = ReadString(SimpleTextTermVectorsWriter.FIELDNAME.Length, _scratch);

                ReadLine();
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.FIELDPOSITIONS));
                }
                var positions = Convert.ToBoolean(ReadString(SimpleTextTermVectorsWriter.FIELDPOSITIONS.Length, _scratch), CultureInfo.InvariantCulture);

                ReadLine();
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.FIELDOFFSETS));
                }
                var offsets = Convert.ToBoolean(ReadString(SimpleTextTermVectorsWriter.FIELDOFFSETS.Length, _scratch), CultureInfo.InvariantCulture);

                ReadLine();
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.FIELDPAYLOADS));
                }
                var payloads = Convert.ToBoolean(ReadString(SimpleTextTermVectorsWriter.FIELDPAYLOADS.Length, _scratch), CultureInfo.InvariantCulture);

                ReadLine();
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.FIELDTERMCOUNT));
                }
                var termCount = ParseInt32At(SimpleTextTermVectorsWriter.FIELDTERMCOUNT.Length);

                var terms = new SimpleTVTerms(offsets, positions, payloads);
                fields.Add(fieldName, terms);

                for (var j = 0; j < termCount; j++)
                {
                    ReadLine();
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.TERMTEXT));
                    }
                    var term       = new BytesRef();
                    var termLength = _scratch.Length - SimpleTextTermVectorsWriter.TERMTEXT.Length;
                    term.Grow(termLength);
                    term.Length = termLength;
                    Array.Copy(_scratch.Bytes, _scratch.Offset + SimpleTextTermVectorsWriter.TERMTEXT.Length, term.Bytes, term.Offset, termLength);

                    var postings = new SimpleTVPostings();
                    terms.terms.Add(term, postings);

                    ReadLine();
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.TERMFREQ));
                    }
                    postings.freq = ParseInt32At(SimpleTextTermVectorsWriter.TERMFREQ.Length);

                    if (!positions && !offsets)
                    {
                        continue;
                    }

                    if (positions)
                    {
                        postings.positions = new int[postings.freq];
                        if (payloads)
                        {
                            postings.payloads = new BytesRef[postings.freq];
                        }
                    }

                    if (offsets)
                    {
                        postings.startOffsets = new int[postings.freq];
                        postings.endOffsets   = new int[postings.freq];
                    }

                    for (var k = 0; k < postings.freq; k++)
                    {
                        if (positions)
                        {
                            ReadLine();
                            if (Debugging.AssertsEnabled)
                            {
                                Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.POSITION));
                            }
                            postings.positions[k] = ParseInt32At(SimpleTextTermVectorsWriter.POSITION.Length);
                            if (payloads)
                            {
                                ReadLine();
                                if (Debugging.AssertsEnabled)
                                {
                                    Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.PAYLOAD));
                                }
                                if (_scratch.Length - SimpleTextTermVectorsWriter.PAYLOAD.Length == 0)
                                {
                                    postings.payloads[k] = null;
                                }
                                else
                                {
                                    var payloadBytes = new byte[_scratch.Length - SimpleTextTermVectorsWriter.PAYLOAD.Length];
                                    Array.Copy(_scratch.Bytes, _scratch.Offset + SimpleTextTermVectorsWriter.PAYLOAD.Length, payloadBytes, 0,
                                               payloadBytes.Length);
                                    postings.payloads[k] = new BytesRef(payloadBytes);
                                }
                            }
                        }

                        if (!offsets)
                        {
                            continue;
                        }

                        ReadLine();
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.STARTOFFSET));
                        }
                        postings.startOffsets[k] = ParseInt32At(SimpleTextTermVectorsWriter.STARTOFFSET.Length);

                        ReadLine();
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.ENDOFFSET));
                        }
                        postings.endOffsets[k] = ParseInt32At(SimpleTextTermVectorsWriter.ENDOFFSET.Length);
                    }
                }
            }
            return(new SimpleTVFields(this, fields));
        }
Пример #31
0
 public override void Seek(long pos)
 {
     _indexInput?.Seek(pos);
 }
Пример #32
0
 /// <summary>
 /// Seek {@code input} to the directory offset. </summary>
 protected internal virtual void SeekDir(IndexInput input, long dirOffset)
 {
     if (Version >= BlockTreeTermsWriter.VERSION_CHECKSUM)
     {
         input.Seek(input.Length() - CodecUtil.FooterLength() - 8);
         dirOffset = input.ReadLong();
     }
     else if (Version >= BlockTreeTermsWriter.VERSION_APPEND_ONLY)
     {
         input.Seek(input.Length() - 8);
         dirOffset = input.ReadLong();
     }
     input.Seek(dirOffset);
 }
Пример #33
0
 private void SeekIndex(int docID)
 {
     IndexStream.Seek(Lucene40StoredFieldsWriter.HEADER_LENGTH_IDX + docID * 8L);
 }
Пример #34
0
        public virtual void TestRandomAccessClones()
        {
            SetUp_2();
            CompoundFileDirectory cr = new CompoundFileDirectory(dir, "f.comp", NewIOContext(Random), false);

            // Open two files
            IndexInput e1 = cr.OpenInput("f11", NewIOContext(Random));
            IndexInput e2 = cr.OpenInput("f3", NewIOContext(Random));

            IndexInput a1 = (IndexInput)e1.Clone();
            IndexInput a2 = (IndexInput)e2.Clone();

            // Seek the first pair
            e1.Seek(100);
            a1.Seek(100);
            Assert.AreEqual(100, e1.GetFilePointer());
            Assert.AreEqual(100, a1.GetFilePointer());
            byte be1 = e1.ReadByte();
            byte ba1 = a1.ReadByte();

            Assert.AreEqual(be1, ba1);

            // Now seek the second pair
            e2.Seek(1027);
            a2.Seek(1027);
            Assert.AreEqual(1027, e2.GetFilePointer());
            Assert.AreEqual(1027, a2.GetFilePointer());
            byte be2 = e2.ReadByte();
            byte ba2 = a2.ReadByte();

            Assert.AreEqual(be2, ba2);

            // Now make sure the first one didn't move
            Assert.AreEqual(101, e1.GetFilePointer());
            Assert.AreEqual(101, a1.GetFilePointer());
            be1 = e1.ReadByte();
            ba1 = a1.ReadByte();
            Assert.AreEqual(be1, ba1);

            // Now more the first one again, past the buffer length
            e1.Seek(1910);
            a1.Seek(1910);
            Assert.AreEqual(1910, e1.GetFilePointer());
            Assert.AreEqual(1910, a1.GetFilePointer());
            be1 = e1.ReadByte();
            ba1 = a1.ReadByte();
            Assert.AreEqual(be1, ba1);

            // Now make sure the second set didn't move
            Assert.AreEqual(1028, e2.GetFilePointer());
            Assert.AreEqual(1028, a2.GetFilePointer());
            be2 = e2.ReadByte();
            ba2 = a2.ReadByte();
            Assert.AreEqual(be2, ba2);

            // Move the second set back, again cross the buffer size
            e2.Seek(17);
            a2.Seek(17);
            Assert.AreEqual(17, e2.GetFilePointer());
            Assert.AreEqual(17, a2.GetFilePointer());
            be2 = e2.ReadByte();
            ba2 = a2.ReadByte();
            Assert.AreEqual(be2, ba2);

            // Finally, make sure the first set didn't move
            // Now make sure the first one didn't move
            Assert.AreEqual(1911, e1.GetFilePointer());
            Assert.AreEqual(1911, a1.GetFilePointer());
            be1 = e1.ReadByte();
            ba1 = a1.ReadByte();
            Assert.AreEqual(be1, ba1);

            e1.Dispose();
            e2.Dispose();
            a1.Dispose();
            a2.Dispose();
            cr.Dispose();
        }
        private void SeekDir(IndexInput input, long dirOffset)
        {
            if (_version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM)
            {
                input.Seek(input.Length() - CodecUtil.FooterLength() - 8);
                dirOffset = input.ReadLong();

            }
            else if (_version >= FixedGapTermsIndexWriter.VERSION_APPEND_ONLY)
            {
                input.Seek(input.Length() - 8);
                dirOffset = input.ReadLong();
            }

            input.Seek(dirOffset);
        }
Пример #36
0
            internal virtual TermsEnum GetTermsEnum(IndexInput input)
            {
                input.Seek(Bytes.Offset);

                return(new TermsEnumAnonymousInnerClassHelper(this, input));
            }