Seek() public abstract method

Seeks to "largest" indexed term that's less than or equal to term; returns file pointer index (into the main terms index file) for that term
public abstract Seek ( BytesRef term ) : long
term BytesRef
return long
示例#1
0
                /// <remarks>
                /// TODO: we may want an alternate mode here which is
                /// "if you are about to return NOT_FOUND I won't use
                /// the terms data from that"; eg FuzzyTermsEnum will
                /// (usually) just immediately call seek again if we
                /// return NOT_FOUND so it's a waste for us to fill in
                /// the term that was actually NOT_FOUND
                /// </remarks>
                public override SeekStatus SeekCeil(BytesRef target)
                {
                    if (indexEnum == null)
                    {
                        throw new InvalidOperationException("terms index was not loaded");
                    }

                    //System.out.println("BTR.seek seg=" + segment + " target=" + fieldInfo.name + ":" + target.utf8ToString() + " " + target + " current=" + term().utf8ToString() + " " + term() + " indexIsCurrent=" + indexIsCurrent + " didIndexNext=" + didIndexNext + " seekPending=" + seekPending + " divisor=" + indexReader.getDivisor() + " this="  + this);
                    if (didIndexNext)
                    {
                        if (nextIndexTerm == null)
                        {
                            //System.out.println("  nextIndexTerm=null");
                        }
                        else
                        {
                            //System.out.println("  nextIndexTerm=" + nextIndexTerm.utf8ToString());
                        }
                    }

                    bool doSeek = true;

                    // See if we can avoid seeking, because target term
                    // is after current term but before next index term:
                    if (indexIsCurrent)
                    {
                        int cmp = BytesRef.UTF8SortedAsUnicodeComparer.Compare(term, target);

                        if (cmp == 0)
                        {
                            // Already at the requested term
                            return(SeekStatus.FOUND);
                        }
                        else if (cmp < 0)
                        {
                            // Target term is after current term
                            if (!didIndexNext)
                            {
                                if (indexEnum.Next() == -1)
                                {
                                    nextIndexTerm = null;
                                }
                                else
                                {
                                    nextIndexTerm = indexEnum.Term;
                                }
                                //System.out.println("  now do index next() nextIndexTerm=" + (nextIndexTerm == null ? "null" : nextIndexTerm.utf8ToString()));
                                didIndexNext = true;
                            }

                            if (nextIndexTerm == null || BytesRef.UTF8SortedAsUnicodeComparer.Compare(target, nextIndexTerm) < 0)
                            {
                                // Optimization: requested term is within the
                                // same term block we are now in; skip seeking
                                // (but do scanning):
                                doSeek = false;
                                //System.out.println("  skip seek: nextIndexTerm=" + (nextIndexTerm == null ? "null" : nextIndexTerm.utf8ToString()));
                            }
                        }
                    }

                    if (doSeek)
                    {
                        //System.out.println("  seek");

                        // Ask terms index to find biggest indexed term (=
                        // first term in a block) that's <= our text:
                        input.Seek(indexEnum.Seek(target));
                        bool result = NextBlock();

                        // Block must exist since, at least, the indexed term
                        // is in the block:
                        Debug.Assert(result);

                        indexIsCurrent  = true;
                        didIndexNext    = false;
                        blocksSinceSeek = 0;

                        if (doOrd)
                        {
                            state.Ord = indexEnum.Ord - 1;
                        }

                        term.CopyBytes(indexEnum.Term);
                        //System.out.println("  seek: term=" + term.utf8ToString());
                    }
                    else
                    {
                        //System.out.println("  skip seek");
                        if (state.TermBlockOrd == blockTermCount && !NextBlock())
                        {
                            indexIsCurrent = false;
                            return(SeekStatus.END);
                        }
                    }

                    seekPending = false;

                    int common = 0;

                    // Scan within block.  We could do this by calling
                    // _next() and testing the resulting term, but this
                    // is wasteful.  Instead, we first confirm the
                    // target matches the common prefix of this block,
                    // and then we scan the term bytes directly from the
                    // termSuffixesreader's byte[], saving a copy into
                    // the BytesRef term per term.  Only when we return
                    // do we then copy the bytes into the term.

                    while (true)
                    {
                        // First, see if target term matches common prefix
                        // in this block:
                        if (common < termBlockPrefix)
                        {
                            int cmp = (term.Bytes[common] & 0xFF) - (target.Bytes[target.Offset + common] & 0xFF);
                            if (cmp < 0)
                            {
                                // TODO: maybe we should store common prefix
                                // in block header?  (instead of relying on
                                // last term of previous block)

                                // Target's prefix is after the common block
                                // prefix, so term cannot be in this block
                                // but it could be in next block.  We
                                // must scan to end-of-block to set common
                                // prefix for next block:
                                if (state.TermBlockOrd < blockTermCount)
                                {
                                    while (state.TermBlockOrd < blockTermCount - 1)
                                    {
                                        state.TermBlockOrd++;
                                        state.Ord++;
                                        termSuffixesReader.SkipBytes(termSuffixesReader.ReadVInt32());
                                    }
                                    int suffix = termSuffixesReader.ReadVInt32();
                                    term.Length = termBlockPrefix + suffix;
                                    if (term.Bytes.Length < term.Length)
                                    {
                                        term.Grow(term.Length);
                                    }
                                    termSuffixesReader.ReadBytes(term.Bytes, termBlockPrefix, suffix);
                                }
                                state.Ord++;

                                if (!NextBlock())
                                {
                                    indexIsCurrent = false;
                                    return(SeekStatus.END);
                                }
                                common = 0;
                            }
                            else if (cmp > 0)
                            {
                                // Target's prefix is before the common prefix
                                // of this block, so we position to start of
                                // block and return NOT_FOUND:
                                Debug.Assert(state.TermBlockOrd == 0);

                                int suffix = termSuffixesReader.ReadVInt32();
                                term.Length = termBlockPrefix + suffix;
                                if (term.Bytes.Length < term.Length)
                                {
                                    term.Grow(term.Length);
                                }
                                termSuffixesReader.ReadBytes(term.Bytes, termBlockPrefix, suffix);
                                return(SeekStatus.NOT_FOUND);
                            }
                            else
                            {
                                common++;
                            }

                            continue;
                        }

                        // Test every term in this block
                        while (true)
                        {
                            state.TermBlockOrd++;
                            state.Ord++;

                            int suffix = termSuffixesReader.ReadVInt32();

                            // We know the prefix matches, so just compare the new suffix:
                            int termLen = termBlockPrefix + suffix;
                            int bytePos = termSuffixesReader.Position;

                            bool next      = false;
                            int  limit     = target.Offset + (termLen < target.Length ? termLen : target.Length);
                            int  targetPos = target.Offset + termBlockPrefix;
                            while (targetPos < limit)
                            {
                                int cmp = (termSuffixes[bytePos++] & 0xFF) - (target.Bytes[targetPos++] & 0xFF);
                                if (cmp < 0)
                                {
                                    // Current term is still before the target;
                                    // keep scanning
                                    next = true;
                                    break;
                                }
                                else if (cmp > 0)
                                {
                                    // Done!  Current term is after target. Stop
                                    // here, fill in real term, return NOT_FOUND.
                                    term.Length = termBlockPrefix + suffix;
                                    if (term.Bytes.Length < term.Length)
                                    {
                                        term.Grow(term.Length);
                                    }
                                    termSuffixesReader.ReadBytes(term.Bytes, termBlockPrefix, suffix);
                                    //System.out.println("  NOT_FOUND");
                                    return(SeekStatus.NOT_FOUND);
                                }
                            }

                            if (!next && target.Length <= termLen)
                            {
                                term.Length = termBlockPrefix + suffix;
                                if (term.Bytes.Length < term.Length)
                                {
                                    term.Grow(term.Length);
                                }
                                termSuffixesReader.ReadBytes(term.Bytes, termBlockPrefix, suffix);

                                if (target.Length == termLen)
                                {
                                    // Done!  Exact match.  Stop here, fill in
                                    // real term, return FOUND.
                                    //System.out.println("  FOUND");
                                    return(SeekStatus.FOUND);
                                }
                                else
                                {
                                    //System.out.println("  NOT_FOUND");
                                    return(SeekStatus.NOT_FOUND);
                                }
                            }

                            if (state.TermBlockOrd == blockTermCount)
                            {
                                // Must pre-fill term for next block's common prefix
                                term.Length = termBlockPrefix + suffix;
                                if (term.Bytes.Length < term.Length)
                                {
                                    term.Grow(term.Length);
                                }
                                termSuffixesReader.ReadBytes(term.Bytes, termBlockPrefix, suffix);
                                break;
                            }
                            else
                            {
                                termSuffixesReader.SkipBytes(suffix);
                            }
                        }

                        // The purpose of the terms dict index is to seek
                        // the enum to the closest index term before the
                        // term we are looking for.  So, we should never
                        // cross another index term (besides the first
                        // one) while we are scanning:

                        Debug.Assert(indexIsCurrent);

                        if (!NextBlock())
                        {
                            //System.out.println("  END");
                            indexIsCurrent = false;
                            return(SeekStatus.END);
                        }
                        common = 0;
                    }
                }
示例#2
0
                /// <remarks>
                /// TODO: we may want an alternate mode here which is
                /// "if you are about to return NOT_FOUND I won't use
                /// the terms data from that"; eg FuzzyTermsEnum will
                /// (usually) just immediately call seek again if we
                /// return NOT_FOUND so it's a waste for us to fill in
                /// the term that was actually NOT_FOUND
                /// </remarks>
                public override SeekStatus SeekCeil(BytesRef target)
                {
                    if (_indexEnum == null)
                    {
                        throw new InvalidOperationException("terms index was not loaded");
                    }

                    var doSeek = true;

                    // See if we can avoid seeking, because target term
                    // is after current term but before next index term:
                    if (_indexIsCurrent)
                    {
                        var cmp = BytesRef.UTF8SortedAsUnicodeComparer.Compare(_term, target);

                        if (cmp == 0)
                        {
                            return(SeekStatus.FOUND);     // Already at the requested term
                        }
                        if (cmp < 0)
                        {
                            // Target term is after current term
                            if (!_didIndexNext)
                            {
                                _nextIndexTerm = _indexEnum.Next == -1 ? null : _indexEnum.Term;
                                _didIndexNext  = true;
                            }

                            if (_nextIndexTerm == null ||
                                BytesRef.UTF8SortedAsUnicodeComparer.Compare(target, _nextIndexTerm) < 0)
                            {
                                // Optimization: requested term is within the
                                // same term block we are now in; skip seeking
                                // (but do scanning):
                                doSeek = false;
                            }
                        }
                    }

                    if (doSeek)
                    {
                        //System.out.println("  seek");

                        // Ask terms index to find biggest indexed term (=
                        // first term in a block) that's <= our text:
                        _input.Seek(_indexEnum.Seek(target));
                        var result = NextBlock();

                        // Block must exist since, at least, the indexed term
                        // is in the block:
                        Debug.Assert(result);

                        _indexIsCurrent  = true;
                        _didIndexNext    = false;
                        _blocksSinceSeek = 0;

                        if (_doOrd)
                        {
                            _state.Ord = _indexEnum.Ord - 1;
                        }

                        _term.CopyBytes(_indexEnum.Term);
                    }
                    else
                    {
                        if (_state.TermBlockOrd == _blockTermCount && !NextBlock())
                        {
                            _indexIsCurrent = false;
                            return(SeekStatus.END);
                        }
                    }

                    _seekPending = false;

                    var common = 0;

                    // Scan within block.  We could do this by calling
                    // _next() and testing the resulting term, but this
                    // is wasteful.  Instead, we first confirm the
                    // target matches the common prefix of this block,
                    // and then we scan the term bytes directly from the
                    // termSuffixesreader's byte[], saving a copy into
                    // the BytesRef term per term.  Only when we return
                    // do we then copy the bytes into the term.

                    while (true)
                    {
                        // First, see if target term matches common prefix
                        // in this block:
                        if (common < _termBlockPrefix)
                        {
                            var cmp = (_term.Bytes[common] & 0xFF) - (target.Bytes[target.Offset + common] & 0xFF);
                            if (cmp < 0)
                            {
                                // TODO: maybe we should store common prefix
                                // in block header?  (instead of relying on
                                // last term of previous block)

                                // Target's prefix is after the common block
                                // prefix, so term cannot be in this block
                                // but it could be in next block.  We
                                // must scan to end-of-block to set common
                                // prefix for next block:
                                if (_state.TermBlockOrd < _blockTermCount)
                                {
                                    while (_state.TermBlockOrd < _blockTermCount - 1)
                                    {
                                        _state.TermBlockOrd++;
                                        _state.Ord++;
                                        _termSuffixesReader.SkipBytes(_termSuffixesReader.ReadVInt());
                                    }
                                    var suffix = _termSuffixesReader.ReadVInt();
                                    _term.Length = _termBlockPrefix + suffix;
                                    if (_term.Bytes.Length < _term.Length)
                                    {
                                        _term.Grow(_term.Length);
                                    }
                                    _termSuffixesReader.ReadBytes(_term.Bytes, _termBlockPrefix, suffix);
                                }
                                _state.Ord++;

                                if (!NextBlock())
                                {
                                    _indexIsCurrent = false;
                                    return(SeekStatus.END);
                                }
                                common = 0;
                            }
                            else if (cmp > 0)
                            {
                                // Target's prefix is before the common prefix
                                // of this block, so we position to start of
                                // block and return NOT_FOUND:
                                Debug.Assert(_state.TermBlockOrd == 0);

                                var suffix = _termSuffixesReader.ReadVInt();
                                _term.Length = _termBlockPrefix + suffix;
                                if (_term.Bytes.Length < _term.Length)
                                {
                                    _term.Grow(_term.Length);
                                }
                                _termSuffixesReader.ReadBytes(_term.Bytes, _termBlockPrefix, suffix);
                                return(SeekStatus.NOT_FOUND);
                            }
                            else
                            {
                                common++;
                            }

                            continue;
                        }

                        // Test every term in this block
                        while (true)
                        {
                            _state.TermBlockOrd++;
                            _state.Ord++;

                            var suffix = _termSuffixesReader.ReadVInt();

                            // We know the prefix matches, so just compare the new suffix:

                            var termLen = _termBlockPrefix + suffix;
                            var bytePos = _termSuffixesReader.Position;

                            var next = false;

                            var limit     = target.Offset + (termLen < target.Length ? termLen : target.Length);
                            var targetPos = target.Offset + _termBlockPrefix;
                            while (targetPos < limit)
                            {
                                var cmp = (_termSuffixes[bytePos++] & 0xFF) - (target.Bytes[targetPos++] & 0xFF);
                                if (cmp < 0)
                                {
                                    // Current term is still before the target;
                                    // keep scanning
                                    next = true;
                                    break;
                                }

                                if (cmp <= 0)
                                {
                                    continue;
                                }

                                // Done!  Current term is after target. Stop
                                // here, fill in real term, return NOT_FOUND.
                                _term.Length = _termBlockPrefix + suffix;
                                if (_term.Bytes.Length < _term.Length)
                                {
                                    _term.Grow(_term.Length);
                                }
                                _termSuffixesReader.ReadBytes(_term.Bytes, _termBlockPrefix, suffix);
                                return(SeekStatus.NOT_FOUND);
                            }

                            if (!next && target.Length <= termLen)
                            {
                                _term.Length = _termBlockPrefix + suffix;
                                if (_term.Bytes.Length < _term.Length)
                                {
                                    _term.Grow(_term.Length);
                                }
                                _termSuffixesReader.ReadBytes(_term.Bytes, _termBlockPrefix, suffix);

                                return(target.Length == termLen ? SeekStatus.FOUND : SeekStatus.NOT_FOUND);
                            }

                            if (_state.TermBlockOrd == _blockTermCount)
                            {
                                // Must pre-fill term for next block's common prefix
                                _term.Length = _termBlockPrefix + suffix;
                                if (_term.Bytes.Length < _term.Length)
                                {
                                    _term.Grow(_term.Length);
                                }
                                _termSuffixesReader.ReadBytes(_term.Bytes, _termBlockPrefix, suffix);
                                break;
                            }

                            _termSuffixesReader.SkipBytes(suffix);
                        }

                        // The purpose of the terms dict index is to seek
                        // the enum to the closest index term before the
                        // term we are looking for.  So, we should never
                        // cross another index term (besides the first
                        // one) while we are scanning:

                        Debug.Assert(_indexIsCurrent);

                        if (!NextBlock())
                        {
                            _indexIsCurrent = false;
                            return(SeekStatus.END);
                        }
                        common = 0;
                    }
                }