Ejemplo n.º 1
0
        /// <summary>
        /// decodes the payload at the current position
        /// </summary>
        protected internal virtual BytesRef DecodePayload(BytesRef scratch, ByteArrayDataInput tmpInput)
        {
            tmpInput.Reset(scratch.Bytes);
            tmpInput.SkipBytes(scratch.Length - 2);                 // skip to payload size
            ushort payloadLength = (ushort)tmpInput.ReadInt16();    // read payload size

            tmpInput.Position = scratch.Length - 2 - payloadLength; // setPosition to start of payload
            BytesRef payloadScratch = new BytesRef(payloadLength);

            tmpInput.ReadBytes(payloadScratch.Bytes, 0, payloadLength); // read payload
            payloadScratch.Length = payloadLength;
            scratch.Length       -= 2;                                  // payload length info (short)
            scratch.Length       -= payloadLength;                      // payload
            return(payloadScratch);
        }
            public override BytesRef Next()
            {
                if (ord == numTerms - 1)
                {
                    return(null);
                }
                else
                {
                    Debug.Assert(ord < numTerms);
                    ++ord;
                }

                // read term
                term.Offset = 0;
                term.Length = prefixLengths[ord] + suffixLengths[ord];
                if (term.Length > term.Bytes.Length)
                {
                    term.Bytes = ArrayUtil.Grow(term.Bytes, term.Length);
                }
                @in.ReadBytes(term.Bytes, prefixLengths[ord], suffixLengths[ord]);

                return(term);
            }
Ejemplo n.º 3
0
            public override BytesRef Next()
            {
                if (Ord_Renamed == NumTerms - 1)
                {
                    return(null);
                }
                else
                {
                    Debug.Assert(Ord_Renamed < NumTerms);
                    ++Ord_Renamed;
                }

                // read term
                Term_Renamed.Offset = 0;
                Term_Renamed.Length = PrefixLengths[Ord_Renamed] + SuffixLengths[Ord_Renamed];
                if (Term_Renamed.Length > Term_Renamed.Bytes.Length)
                {
                    Term_Renamed.Bytes = ArrayUtil.Grow(Term_Renamed.Bytes, Term_Renamed.Length);
                }
                @in.ReadBytes(Term_Renamed.Bytes, PrefixLengths[Ord_Renamed], SuffixLengths[Ord_Renamed]);

                return(Term_Renamed);
            }
Ejemplo n.º 4
0
        /// <summary>
        /// decodes the contexts at the current position </summary>
        protected internal virtual HashSet <BytesRef> DecodeContexts(BytesRef scratch, ByteArrayDataInput tmpInput)
        {
            tmpInput.Reset(scratch.Bytes);
            tmpInput.SkipBytes(scratch.Length - 2); //skip to context set size
            short ctxSetSize = tmpInput.ReadShort();

            scratch.Length -= 2;

            var contextSet = new HashSet <BytesRef>();

            for (short i = 0; i < ctxSetSize; i++)
            {
                tmpInput.Position = scratch.Length - 2;
                short curContextLength = tmpInput.ReadShort();
                scratch.Length   -= 2;
                tmpInput.Position = scratch.Length - curContextLength;
                BytesRef contextSpare = new BytesRef(curContextLength);
                tmpInput.ReadBytes(contextSpare.Bytes, 0, curContextLength);
                contextSpare.Length = curContextLength;
                contextSet.Add(contextSpare);
                scratch.Length -= curContextLength;
            }
            return(contextSet);
        }
Ejemplo n.º 5
0
                /// <remarks>
                /// TODO: we may want an alternate mode here which is
                /// "if you are about to return NOT_FOUND I won't use
                /// the terms data from that"; eg FuzzyTermsEnum will
                /// (usually) just immediately call seek again if we
                /// return NOT_FOUND so it's a waste for us to fill in
                /// the term that was actually NOT_FOUND
                /// </remarks>
                public override SeekStatus SeekCeil(BytesRef target)
                {
                    if (indexEnum == null)
                    {
                        throw new InvalidOperationException("terms index was not loaded");
                    }

                    //System.out.println("BTR.seek seg=" + segment + " target=" + fieldInfo.name + ":" + target.utf8ToString() + " " + target + " current=" + term().utf8ToString() + " " + term() + " indexIsCurrent=" + indexIsCurrent + " didIndexNext=" + didIndexNext + " seekPending=" + seekPending + " divisor=" + indexReader.getDivisor() + " this="  + this);
                    if (didIndexNext)
                    {
                        if (nextIndexTerm == null)
                        {
                            //System.out.println("  nextIndexTerm=null");
                        }
                        else
                        {
                            //System.out.println("  nextIndexTerm=" + nextIndexTerm.utf8ToString());
                        }
                    }

                    bool doSeek = true;

                    // See if we can avoid seeking, because target term
                    // is after current term but before next index term:
                    if (indexIsCurrent)
                    {
                        int cmp = BytesRef.UTF8SortedAsUnicodeComparer.Compare(term, target);

                        if (cmp == 0)
                        {
                            // Already at the requested term
                            return(SeekStatus.FOUND);
                        }
                        else if (cmp < 0)
                        {
                            // Target term is after current term
                            if (!didIndexNext)
                            {
                                if (indexEnum.Next() == -1)
                                {
                                    nextIndexTerm = null;
                                }
                                else
                                {
                                    nextIndexTerm = indexEnum.Term;
                                }
                                //System.out.println("  now do index next() nextIndexTerm=" + (nextIndexTerm == null ? "null" : nextIndexTerm.utf8ToString()));
                                didIndexNext = true;
                            }

                            if (nextIndexTerm == null || BytesRef.UTF8SortedAsUnicodeComparer.Compare(target, nextIndexTerm) < 0)
                            {
                                // Optimization: requested term is within the
                                // same term block we are now in; skip seeking
                                // (but do scanning):
                                doSeek = false;
                                //System.out.println("  skip seek: nextIndexTerm=" + (nextIndexTerm == null ? "null" : nextIndexTerm.utf8ToString()));
                            }
                        }
                    }

                    if (doSeek)
                    {
                        //System.out.println("  seek");

                        // Ask terms index to find biggest indexed term (=
                        // first term in a block) that's <= our text:
                        input.Seek(indexEnum.Seek(target));
                        bool result = NextBlock();

                        // Block must exist since, at least, the indexed term
                        // is in the block:
                        Debug.Assert(result);

                        indexIsCurrent  = true;
                        didIndexNext    = false;
                        blocksSinceSeek = 0;

                        if (doOrd)
                        {
                            state.Ord = indexEnum.Ord - 1;
                        }

                        term.CopyBytes(indexEnum.Term);
                        //System.out.println("  seek: term=" + term.utf8ToString());
                    }
                    else
                    {
                        //System.out.println("  skip seek");
                        if (state.TermBlockOrd == blockTermCount && !NextBlock())
                        {
                            indexIsCurrent = false;
                            return(SeekStatus.END);
                        }
                    }

                    seekPending = false;

                    int common = 0;

                    // Scan within block.  We could do this by calling
                    // _next() and testing the resulting term, but this
                    // is wasteful.  Instead, we first confirm the
                    // target matches the common prefix of this block,
                    // and then we scan the term bytes directly from the
                    // termSuffixesreader's byte[], saving a copy into
                    // the BytesRef term per term.  Only when we return
                    // do we then copy the bytes into the term.

                    while (true)
                    {
                        // First, see if target term matches common prefix
                        // in this block:
                        if (common < termBlockPrefix)
                        {
                            int cmp = (term.Bytes[common] & 0xFF) - (target.Bytes[target.Offset + common] & 0xFF);
                            if (cmp < 0)
                            {
                                // TODO: maybe we should store common prefix
                                // in block header?  (instead of relying on
                                // last term of previous block)

                                // Target's prefix is after the common block
                                // prefix, so term cannot be in this block
                                // but it could be in next block.  We
                                // must scan to end-of-block to set common
                                // prefix for next block:
                                if (state.TermBlockOrd < blockTermCount)
                                {
                                    while (state.TermBlockOrd < blockTermCount - 1)
                                    {
                                        state.TermBlockOrd++;
                                        state.Ord++;
                                        termSuffixesReader.SkipBytes(termSuffixesReader.ReadVInt32());
                                    }
                                    int suffix = termSuffixesReader.ReadVInt32();
                                    term.Length = termBlockPrefix + suffix;
                                    if (term.Bytes.Length < term.Length)
                                    {
                                        term.Grow(term.Length);
                                    }
                                    termSuffixesReader.ReadBytes(term.Bytes, termBlockPrefix, suffix);
                                }
                                state.Ord++;

                                if (!NextBlock())
                                {
                                    indexIsCurrent = false;
                                    return(SeekStatus.END);
                                }
                                common = 0;
                            }
                            else if (cmp > 0)
                            {
                                // Target's prefix is before the common prefix
                                // of this block, so we position to start of
                                // block and return NOT_FOUND:
                                Debug.Assert(state.TermBlockOrd == 0);

                                int suffix = termSuffixesReader.ReadVInt32();
                                term.Length = termBlockPrefix + suffix;
                                if (term.Bytes.Length < term.Length)
                                {
                                    term.Grow(term.Length);
                                }
                                termSuffixesReader.ReadBytes(term.Bytes, termBlockPrefix, suffix);
                                return(SeekStatus.NOT_FOUND);
                            }
                            else
                            {
                                common++;
                            }

                            continue;
                        }

                        // Test every term in this block
                        while (true)
                        {
                            state.TermBlockOrd++;
                            state.Ord++;

                            int suffix = termSuffixesReader.ReadVInt32();

                            // We know the prefix matches, so just compare the new suffix:
                            int termLen = termBlockPrefix + suffix;
                            int bytePos = termSuffixesReader.Position;

                            bool next      = false;
                            int  limit     = target.Offset + (termLen < target.Length ? termLen : target.Length);
                            int  targetPos = target.Offset + termBlockPrefix;
                            while (targetPos < limit)
                            {
                                int cmp = (termSuffixes[bytePos++] & 0xFF) - (target.Bytes[targetPos++] & 0xFF);
                                if (cmp < 0)
                                {
                                    // Current term is still before the target;
                                    // keep scanning
                                    next = true;
                                    break;
                                }
                                else if (cmp > 0)
                                {
                                    // Done!  Current term is after target. Stop
                                    // here, fill in real term, return NOT_FOUND.
                                    term.Length = termBlockPrefix + suffix;
                                    if (term.Bytes.Length < term.Length)
                                    {
                                        term.Grow(term.Length);
                                    }
                                    termSuffixesReader.ReadBytes(term.Bytes, termBlockPrefix, suffix);
                                    //System.out.println("  NOT_FOUND");
                                    return(SeekStatus.NOT_FOUND);
                                }
                            }

                            if (!next && target.Length <= termLen)
                            {
                                term.Length = termBlockPrefix + suffix;
                                if (term.Bytes.Length < term.Length)
                                {
                                    term.Grow(term.Length);
                                }
                                termSuffixesReader.ReadBytes(term.Bytes, termBlockPrefix, suffix);

                                if (target.Length == termLen)
                                {
                                    // Done!  Exact match.  Stop here, fill in
                                    // real term, return FOUND.
                                    //System.out.println("  FOUND");
                                    return(SeekStatus.FOUND);
                                }
                                else
                                {
                                    //System.out.println("  NOT_FOUND");
                                    return(SeekStatus.NOT_FOUND);
                                }
                            }

                            if (state.TermBlockOrd == blockTermCount)
                            {
                                // Must pre-fill term for next block's common prefix
                                term.Length = termBlockPrefix + suffix;
                                if (term.Bytes.Length < term.Length)
                                {
                                    term.Grow(term.Length);
                                }
                                termSuffixesReader.ReadBytes(term.Bytes, termBlockPrefix, suffix);
                                break;
                            }
                            else
                            {
                                termSuffixesReader.SkipBytes(suffix);
                            }
                        }

                        // The purpose of the terms dict index is to seek
                        // the enum to the closest index term before the
                        // term we are looking for.  So, we should never
                        // cross another index term (besides the first
                        // one) while we are scanning:

                        Debug.Assert(indexIsCurrent);

                        if (!NextBlock())
                        {
                            //System.out.println("  END");
                            indexIsCurrent = false;
                            return(SeekStatus.END);
                        }
                        common = 0;
                    }
                }
Ejemplo n.º 6
0
        public override void Build(IInputEnumerator enumerator)
        {
            if (enumerator.HasContexts)
            {
                throw new ArgumentException("this suggester doesn't support contexts");
            }
            string prefix     = this.GetType().Name;
            var    directory  = OfflineSorter.DefaultTempDir();
            var    tempInput  = FileSupport.CreateTempFile(prefix, ".input", directory);
            var    tempSorted = FileSupport.CreateTempFile(prefix, ".sorted", directory);

            hasPayloads = enumerator.HasPayloads;

            var writer = new OfflineSorter.ByteSequencesWriter(tempInput);

            OfflineSorter.ByteSequencesReader reader = null;
            var scratch = new BytesRef();

            TokenStreamToAutomaton ts2a = GetTokenStreamToAutomaton();

            bool success = false;

            count = 0;
            byte[] buffer = new byte[8];
            try
            {
                var      output = new ByteArrayDataOutput(buffer);
                BytesRef surfaceForm;

                while (enumerator.MoveNext())
                {
                    surfaceForm = enumerator.Current;
                    ISet <Int32sRef> paths = ToFiniteStrings(surfaceForm, ts2a);

                    maxAnalyzedPathsForOneInput = Math.Max(maxAnalyzedPathsForOneInput, paths.Count);

                    foreach (Int32sRef path in paths)
                    {
                        Util.Fst.Util.ToBytesRef(path, scratch);

                        // length of the analyzed text (FST input)
                        if (scratch.Length > ushort.MaxValue - 2)
                        {
                            throw new ArgumentException("cannot handle analyzed forms > " + (ushort.MaxValue - 2) +
                                                        " in length (got " + scratch.Length + ")");
                        }
                        ushort analyzedLength = (ushort)scratch.Length;

                        // compute the required length:
                        // analyzed sequence + weight (4) + surface + analyzedLength (short)
                        int requiredLength = analyzedLength + 4 + surfaceForm.Length + 2;

                        BytesRef payload;

                        if (hasPayloads)
                        {
                            if (surfaceForm.Length > (ushort.MaxValue - 2))
                            {
                                throw new ArgumentException("cannot handle surface form > " + (ushort.MaxValue - 2) +
                                                            " in length (got " + surfaceForm.Length + ")");
                            }
                            payload = enumerator.Payload;
                            // payload + surfaceLength (short)
                            requiredLength += payload.Length + 2;
                        }
                        else
                        {
                            payload = null;
                        }

                        buffer = ArrayUtil.Grow(buffer, requiredLength);

                        output.Reset(buffer);

                        output.WriteInt16((short)analyzedLength);

                        output.WriteBytes(scratch.Bytes, scratch.Offset, scratch.Length);

                        output.WriteInt32(EncodeWeight(enumerator.Weight));

                        if (hasPayloads)
                        {
                            for (int i = 0; i < surfaceForm.Length; i++)
                            {
                                if (surfaceForm.Bytes[i] == PAYLOAD_SEP)
                                {
                                    throw new ArgumentException(
                                              "surface form cannot contain unit separator character U+001F; this character is reserved");
                                }
                            }
                            output.WriteInt16((short)surfaceForm.Length);
                            output.WriteBytes(surfaceForm.Bytes, surfaceForm.Offset, surfaceForm.Length);
                            output.WriteBytes(payload.Bytes, payload.Offset, payload.Length);
                        }
                        else
                        {
                            output.WriteBytes(surfaceForm.Bytes, surfaceForm.Offset, surfaceForm.Length);
                        }

                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(output.Position == requiredLength, () => output.Position + " vs " + requiredLength);
                        }

                        writer.Write(buffer, 0, output.Position);
                    }
                    count++;
                }
                writer.Dispose();

                // Sort all input/output pairs (required by FST.Builder):
                (new OfflineSorter(new AnalyzingComparer(hasPayloads))).Sort(tempInput, tempSorted);

                // Free disk space:
                tempInput.Delete();

                reader = new OfflineSorter.ByteSequencesReader(tempSorted);

                var outputs = new PairOutputs <long?, BytesRef>(PositiveInt32Outputs.Singleton,
                                                                ByteSequenceOutputs.Singleton);
                var builder = new Builder <PairOutputs <long?, BytesRef> .Pair>(FST.INPUT_TYPE.BYTE1, outputs);

                // Build FST:
                BytesRef  previousAnalyzed = null;
                BytesRef  analyzed         = new BytesRef();
                BytesRef  surface          = new BytesRef();
                Int32sRef scratchInts      = new Int32sRef();
                var       input            = new ByteArrayDataInput();

                // Used to remove duplicate surface forms (but we
                // still index the hightest-weight one).  We clear
                // this when we see a new analyzed form, so it cannot
                // grow unbounded (at most 256 entries):
                var seenSurfaceForms = new JCG.HashSet <BytesRef>();

                var dedup = 0;
                while (reader.Read(scratch))
                {
                    input.Reset(scratch.Bytes, scratch.Offset, scratch.Length);
                    ushort analyzedLength = (ushort)input.ReadInt16();
                    analyzed.Grow(analyzedLength + 2);
                    input.ReadBytes(analyzed.Bytes, 0, analyzedLength);
                    analyzed.Length = analyzedLength;

                    long cost = input.ReadInt32();

                    surface.Bytes = scratch.Bytes;
                    if (hasPayloads)
                    {
                        surface.Length = (ushort)input.ReadInt16();
                        surface.Offset = input.Position;
                    }
                    else
                    {
                        surface.Offset = input.Position;
                        surface.Length = scratch.Length - surface.Offset;
                    }

                    if (previousAnalyzed == null)
                    {
                        previousAnalyzed = new BytesRef();
                        previousAnalyzed.CopyBytes(analyzed);
                        seenSurfaceForms.Add(BytesRef.DeepCopyOf(surface));
                    }
                    else if (analyzed.Equals(previousAnalyzed))
                    {
                        dedup++;
                        if (dedup >= maxSurfaceFormsPerAnalyzedForm)
                        {
                            // More than maxSurfaceFormsPerAnalyzedForm
                            // dups: skip the rest:
                            continue;
                        }
                        if (seenSurfaceForms.Contains(surface))
                        {
                            continue;
                        }
                        seenSurfaceForms.Add(BytesRef.DeepCopyOf(surface));
                    }
                    else
                    {
                        dedup = 0;
                        previousAnalyzed.CopyBytes(analyzed);
                        seenSurfaceForms.Clear();
                        seenSurfaceForms.Add(BytesRef.DeepCopyOf(surface));
                    }

                    // TODO: I think we can avoid the extra 2 bytes when
                    // there is no dup (dedup==0), but we'd have to fix
                    // the exactFirst logic ... which would be sort of
                    // hairy because we'd need to special case the two
                    // (dup/not dup)...

                    // NOTE: must be byte 0 so we sort before whatever
                    // is next
                    analyzed.Bytes[analyzed.Offset + analyzed.Length]     = 0;
                    analyzed.Bytes[analyzed.Offset + analyzed.Length + 1] = (byte)dedup;
                    analyzed.Length += 2;

                    Util.Fst.Util.ToInt32sRef(analyzed, scratchInts);
                    //System.out.println("ADD: " + scratchInts + " -> " + cost + ": " + surface.utf8ToString());
                    if (!hasPayloads)
                    {
                        builder.Add(scratchInts, outputs.NewPair(cost, BytesRef.DeepCopyOf(surface)));
                    }
                    else
                    {
                        int      payloadOffset = input.Position + surface.Length;
                        int      payloadLength = scratch.Length - payloadOffset;
                        BytesRef br            = new BytesRef(surface.Length + 1 + payloadLength);
                        Array.Copy(surface.Bytes, surface.Offset, br.Bytes, 0, surface.Length);
                        br.Bytes[surface.Length] = PAYLOAD_SEP;
                        Array.Copy(scratch.Bytes, payloadOffset, br.Bytes, surface.Length + 1, payloadLength);
                        br.Length = br.Bytes.Length;
                        builder.Add(scratchInts, outputs.NewPair(cost, br));
                    }
                }
                fst = builder.Finish();

                //Util.dotToFile(fst, "/tmp/suggest.dot");

                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Dispose(reader, writer);
                }
                else
                {
                    IOUtils.DisposeWhileHandlingException(reader, writer);
                }

                tempInput.Delete();
                tempSorted.Delete();
            }
        }
Ejemplo n.º 7
0
                /// <remarks>
                /// TODO: we may want an alternate mode here which is
                /// "if you are about to return NOT_FOUND I won't use
                /// the terms data from that"; eg FuzzyTermsEnum will
                /// (usually) just immediately call seek again if we
                /// return NOT_FOUND so it's a waste for us to fill in
                /// the term that was actually NOT_FOUND
                /// </remarks>
                public override SeekStatus SeekCeil(BytesRef target)
                {
                    if (_indexEnum == null)
                    {
                        throw new InvalidOperationException("terms index was not loaded");
                    }

                    var doSeek = true;

                    // See if we can avoid seeking, because target term
                    // is after current term but before next index term:
                    if (_indexIsCurrent)
                    {
                        var cmp = BytesRef.UTF8SortedAsUnicodeComparer.Compare(_term, target);

                        if (cmp == 0)
                        {
                            return(SeekStatus.FOUND);     // Already at the requested term
                        }
                        if (cmp < 0)
                        {
                            // Target term is after current term
                            if (!_didIndexNext)
                            {
                                _nextIndexTerm = _indexEnum.Next == -1 ? null : _indexEnum.Term;
                                _didIndexNext  = true;
                            }

                            if (_nextIndexTerm == null ||
                                BytesRef.UTF8SortedAsUnicodeComparer.Compare(target, _nextIndexTerm) < 0)
                            {
                                // Optimization: requested term is within the
                                // same term block we are now in; skip seeking
                                // (but do scanning):
                                doSeek = false;
                            }
                        }
                    }

                    if (doSeek)
                    {
                        //System.out.println("  seek");

                        // Ask terms index to find biggest indexed term (=
                        // first term in a block) that's <= our text:
                        _input.Seek(_indexEnum.Seek(target));
                        var result = NextBlock();

                        // Block must exist since, at least, the indexed term
                        // is in the block:
                        Debug.Assert(result);

                        _indexIsCurrent  = true;
                        _didIndexNext    = false;
                        _blocksSinceSeek = 0;

                        if (_doOrd)
                        {
                            _state.Ord = _indexEnum.Ord - 1;
                        }

                        _term.CopyBytes(_indexEnum.Term);
                    }
                    else
                    {
                        if (_state.TermBlockOrd == _blockTermCount && !NextBlock())
                        {
                            _indexIsCurrent = false;
                            return(SeekStatus.END);
                        }
                    }

                    _seekPending = false;

                    var common = 0;

                    // Scan within block.  We could do this by calling
                    // _next() and testing the resulting term, but this
                    // is wasteful.  Instead, we first confirm the
                    // target matches the common prefix of this block,
                    // and then we scan the term bytes directly from the
                    // termSuffixesreader's byte[], saving a copy into
                    // the BytesRef term per term.  Only when we return
                    // do we then copy the bytes into the term.

                    while (true)
                    {
                        // First, see if target term matches common prefix
                        // in this block:
                        if (common < _termBlockPrefix)
                        {
                            var cmp = (_term.Bytes[common] & 0xFF) - (target.Bytes[target.Offset + common] & 0xFF);
                            if (cmp < 0)
                            {
                                // TODO: maybe we should store common prefix
                                // in block header?  (instead of relying on
                                // last term of previous block)

                                // Target's prefix is after the common block
                                // prefix, so term cannot be in this block
                                // but it could be in next block.  We
                                // must scan to end-of-block to set common
                                // prefix for next block:
                                if (_state.TermBlockOrd < _blockTermCount)
                                {
                                    while (_state.TermBlockOrd < _blockTermCount - 1)
                                    {
                                        _state.TermBlockOrd++;
                                        _state.Ord++;
                                        _termSuffixesReader.SkipBytes(_termSuffixesReader.ReadVInt());
                                    }
                                    var suffix = _termSuffixesReader.ReadVInt();
                                    _term.Length = _termBlockPrefix + suffix;
                                    if (_term.Bytes.Length < _term.Length)
                                    {
                                        _term.Grow(_term.Length);
                                    }
                                    _termSuffixesReader.ReadBytes(_term.Bytes, _termBlockPrefix, suffix);
                                }
                                _state.Ord++;

                                if (!NextBlock())
                                {
                                    _indexIsCurrent = false;
                                    return(SeekStatus.END);
                                }
                                common = 0;
                            }
                            else if (cmp > 0)
                            {
                                // Target's prefix is before the common prefix
                                // of this block, so we position to start of
                                // block and return NOT_FOUND:
                                Debug.Assert(_state.TermBlockOrd == 0);

                                var suffix = _termSuffixesReader.ReadVInt();
                                _term.Length = _termBlockPrefix + suffix;
                                if (_term.Bytes.Length < _term.Length)
                                {
                                    _term.Grow(_term.Length);
                                }
                                _termSuffixesReader.ReadBytes(_term.Bytes, _termBlockPrefix, suffix);
                                return(SeekStatus.NOT_FOUND);
                            }
                            else
                            {
                                common++;
                            }

                            continue;
                        }

                        // Test every term in this block
                        while (true)
                        {
                            _state.TermBlockOrd++;
                            _state.Ord++;

                            var suffix = _termSuffixesReader.ReadVInt();

                            // We know the prefix matches, so just compare the new suffix:

                            var termLen = _termBlockPrefix + suffix;
                            var bytePos = _termSuffixesReader.Position;

                            var next = false;

                            var limit     = target.Offset + (termLen < target.Length ? termLen : target.Length);
                            var targetPos = target.Offset + _termBlockPrefix;
                            while (targetPos < limit)
                            {
                                var cmp = (_termSuffixes[bytePos++] & 0xFF) - (target.Bytes[targetPos++] & 0xFF);
                                if (cmp < 0)
                                {
                                    // Current term is still before the target;
                                    // keep scanning
                                    next = true;
                                    break;
                                }

                                if (cmp <= 0)
                                {
                                    continue;
                                }

                                // Done!  Current term is after target. Stop
                                // here, fill in real term, return NOT_FOUND.
                                _term.Length = _termBlockPrefix + suffix;
                                if (_term.Bytes.Length < _term.Length)
                                {
                                    _term.Grow(_term.Length);
                                }
                                _termSuffixesReader.ReadBytes(_term.Bytes, _termBlockPrefix, suffix);
                                return(SeekStatus.NOT_FOUND);
                            }

                            if (!next && target.Length <= termLen)
                            {
                                _term.Length = _termBlockPrefix + suffix;
                                if (_term.Bytes.Length < _term.Length)
                                {
                                    _term.Grow(_term.Length);
                                }
                                _termSuffixesReader.ReadBytes(_term.Bytes, _termBlockPrefix, suffix);

                                return(target.Length == termLen ? SeekStatus.FOUND : SeekStatus.NOT_FOUND);
                            }

                            if (_state.TermBlockOrd == _blockTermCount)
                            {
                                // Must pre-fill term for next block's common prefix
                                _term.Length = _termBlockPrefix + suffix;
                                if (_term.Bytes.Length < _term.Length)
                                {
                                    _term.Grow(_term.Length);
                                }
                                _termSuffixesReader.ReadBytes(_term.Bytes, _termBlockPrefix, suffix);
                                break;
                            }

                            _termSuffixesReader.SkipBytes(suffix);
                        }

                        // The purpose of the terms dict index is to seek
                        // the enum to the closest index term before the
                        // term we are looking for.  So, we should never
                        // cross another index term (besides the first
                        // one) while we are scanning:

                        Debug.Assert(_indexIsCurrent);

                        if (!NextBlock())
                        {
                            _indexIsCurrent = false;
                            return(SeekStatus.END);
                        }
                        common = 0;
                    }
                }