コード例 #1
0
ファイル: FSTOrdTermsReader.cs プロジェクト: ywscr/lucenenet
                internal IntersectTermsEnum(TermsReader outerInstance, CompiledAutomaton compiled, BytesRef startTerm) : base(outerInstance)
                {
                    //if (TEST) System.out.println("Enum init, startTerm=" + startTerm);
                    this.fst        = outerInstance.index;
                    this.fstReader  = fst.GetBytesReader();
                    this.fstOutputs = outerInstance.index.Outputs;
                    this.fsa        = compiled.RunAutomaton;
                    this.level      = -1;
                    this.stack      = new Frame[16];
                    for (int i = 0; i < stack.Length; i++)
                    {
                        this.stack[i] = new Frame();
                    }

                    Frame frame;

                    /*frame = */ LoadVirtualFrame(NewFrame()); // LUCENENET: IDE0059: Remove unnecessary value assignment
                    this.level++;
                    frame = LoadFirstFrame(NewFrame());
                    PushFrame(frame);

                    this.decoded = false;
                    this.pending = false;

                    if (startTerm == null)
                    {
                        pending = IsAccept(TopFrame());
                    }
                    else
                    {
                        DoSeekCeil(startTerm);
                        pending = !startTerm.Equals(term) && IsValid(TopFrame()) && IsAccept(TopFrame());
                    }
                }
コード例 #2
0
ファイル: Term.cs プロジェクト: freemsly/lucenenet
        public bool Equals(Term other)
        {
            if (object.ReferenceEquals(null, other))
            {
                return(object.ReferenceEquals(null, this));
            }
            if (object.ReferenceEquals(this, other))
            {
                return(true);
            }

            if (this.GetType() != other.GetType())
            {
                return(false);
            }

            if (string.Compare(this.Field_Renamed, other.Field_Renamed, StringComparison.Ordinal) != 0)
            {
                return(false);
            }

            if (Bytes_Renamed == null)
            {
                if (other.Bytes_Renamed != null)
                {
                    return(false);
                }
            }
            else if (!Bytes_Renamed.Equals(other.Bytes_Renamed))
            {
                return(false);
            }

            return(true);
        }
コード例 #3
0
        private IDictionary <string, long?> ReadFields(IndexInput @in)
        {
            ChecksumIndexInput input = new BufferedChecksumIndexInput(@in);
            var scratch = new BytesRef(10);

            // LUCENENET specific: Use StringComparer.Ordinal to get the same ordering as Java
            var fields = new JCG.SortedDictionary <string, long?>(StringComparer.Ordinal);

            while (true)
            {
                SimpleTextUtil.ReadLine(input, scratch);
                if (scratch.Equals(SimpleTextFieldsWriter.END))
                {
                    SimpleTextUtil.CheckFooter(input);
                    return(fields);
                }

                if (StringHelper.StartsWith(scratch, SimpleTextFieldsWriter.FIELD))
                {
                    var fieldName = Encoding.UTF8.GetString(scratch.Bytes, scratch.Offset + SimpleTextFieldsWriter.FIELD.Length,
                                                            scratch.Length - SimpleTextFieldsWriter.FIELD.Length);
                    fields[fieldName] = input.GetFilePointer();
                }
            }
        }
コード例 #4
0
            public override SeekStatus SeekCeil(BytesRef text)
            {
                //System.out.println("te.seek text=" + field.name + ":" + text.utf8ToString() + " this=" + this);
                current = fstEnum.SeekCeil(text);
                if (current == null)
                {
                    return(SeekStatus.END);
                }
                else
                {
                    // System.out.println("  got term=" + current.input.utf8ToString());
                    // for(int i=0;i<current.output.length;i++) {
                    //   System.out.println("    " + Integer.toHexString(current.output.bytes[i]&0xFF));
                    // }

                    didDecode = false;

                    if (text.Equals(current.Input))
                    {
                        //System.out.println("  found!");
                        return(SeekStatus.FOUND);
                    }
                    else
                    {
                        //System.out.println("  not found: " + current.input.utf8ToString());
                        return(SeekStatus.NOT_FOUND);
                    }
                }
            }
コード例 #5
0
 public override void seekExact(BytesRef target, TermState otherState)
 {
     if (!target.Equals(term_Renamed))
     {
         state.copyFrom(otherState);
         term_Renamed = BytesRef.deepCopyOf(target);
         seekPending  = true;
     }
 }
コード例 #6
0
 public override void SeekExact(BytesRef target, TermState otherState)
 {
     if (!target.Equals(term))
     {
         state.CopyFrom(otherState);
         term        = BytesRef.DeepCopyOf(target);
         seekPending = true;
     }
 }
コード例 #7
0
        public override IBits ReadLiveDocs(Directory dir, SegmentCommitInfo info, IOContext context)
        {
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(info.HasDeletions);
            }
            var scratch      = new BytesRef();
            var scratchUtf16 = new CharsRef();

            var fileName             = IndexFileNames.FileNameFromGeneration(info.Info.Name, LIVEDOCS_EXTENSION, info.DelGen);
            ChecksumIndexInput input = null;
            var success = false;

            try
            {
                input = dir.OpenChecksumInput(fileName, context);

                SimpleTextUtil.ReadLine(input, scratch);
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(StringHelper.StartsWith(scratch, SIZE));
                }
                var size = ParseInt32At(scratch, SIZE.Length, scratchUtf16);

                var bits = new BitSet(size);

                SimpleTextUtil.ReadLine(input, scratch);
                while (!scratch.Equals(END))
                {
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(StringHelper.StartsWith(scratch, DOC));
                    }
                    var docid = ParseInt32At(scratch, DOC.Length, scratchUtf16);
                    bits.Set(docid);
                    SimpleTextUtil.ReadLine(input, scratch);
                }

                SimpleTextUtil.CheckFooter(input);

                success = true;
                return(new SimpleTextBits(bits, size));
            }
            finally
            {
                if (success)
                {
                    IOUtils.Dispose(input);
                }
                else
                {
                    IOUtils.DisposeWhileHandlingException(input);
                }
            }
        }
コード例 #8
0
ファイル: FSTCompletion.cs プロジェクト: ywscr/lucenenet
 /// <summary>
 /// Checks if the list of
 /// <see cref="Lookup.LookupResult"/>s already has a
 /// <paramref name="key"/>. If so, reorders that
 /// <see cref="Lookup.LookupResult"/> to the first
 /// position.
 /// </summary>
 /// <returns>
 /// Returns <c>true</c> if and only if <paramref name="list"/> contained
 /// <paramref name="key"/>.
 /// </returns>
 private bool CheckExistingAndReorder(IList <Completion> list, BytesRef key)
 {
     // We assume list does not have duplicates (because of how the FST is created).
     for (int i = list.Count; --i >= 0;)
     {
         if (key.Equals(list[i].Utf8))
         {
             // Key found. Unless already at i==0, remove it and push up front so
             // that the ordering
             // remains identical with the exception of the exact match.
             if (key.Equals(list[i].Utf8))
             {
                 var element = list[i];
                 list.Remove(element);
                 list.Insert(0, element);
             }
             return(true);
         }
     }
     return(false);
 }
コード例 #9
0
        /// <summary>
        /// Returns grouped facet results that were computed over zero or more segments.
        /// Grouped facet counts are merged from zero or more segment results.
        /// </summary>
        /// <param name="size">The total number of facets to include. This is typically offset + limit</param>
        /// <param name="minCount">The minimum count a facet entry should have to be included in the grouped facet result</param>
        /// <param name="orderByCount">
        /// Whether to sort the facet entries by facet entry count. If <c>false</c> then the facets
        /// are sorted lexicographically in ascending order.
        /// </param>
        /// <returns>grouped facet results</returns>
        /// <exception cref="System.IO.IOException">If I/O related errors occur during merging segment grouped facet counts.</exception>
        public virtual GroupedFacetResult MergeSegmentResults(int size, int minCount, bool orderByCount)
        {
            if (m_segmentFacetCounts != null)
            {
                m_segmentResults.Add(CreateSegmentResult());
                m_segmentFacetCounts = null; // reset
            }

            int totalCount   = 0;
            int missingCount = 0;
            SegmentResultPriorityQueue segments = new SegmentResultPriorityQueue(m_segmentResults.Count);

            foreach (AbstractSegmentResult segmentResult in m_segmentResults)
            {
                missingCount += segmentResult.m_missing;
                if (segmentResult.m_mergePos >= segmentResult.m_maxTermPos)
                {
                    continue;
                }
                totalCount += segmentResult.m_total;
                segments.Add(segmentResult);
            }

            GroupedFacetResult facetResult = new GroupedFacetResult(size, minCount, orderByCount, totalCount, missingCount);

            while (segments.Count > 0)
            {
                AbstractSegmentResult segmentResult = segments.Top;
                BytesRef currentFacetValue          = BytesRef.DeepCopyOf(segmentResult.m_mergeTerm);
                int      count = 0;

                do
                {
                    count += segmentResult.m_counts[segmentResult.m_mergePos++];
                    if (segmentResult.m_mergePos < segmentResult.m_maxTermPos)
                    {
                        segmentResult.NextTerm();
                        segmentResult = segments.UpdateTop();
                    }
                    else
                    {
                        segments.Pop();
                        segmentResult = segments.Top;
                        if (segmentResult == null)
                        {
                            break;
                        }
                    }
                } while (currentFacetValue.Equals(segmentResult.m_mergeTerm));
                facetResult.AddFacetCount(currentFacetValue, count);
            }
            return(facetResult);
        }
コード例 #10
0
ファイル: TestBytesRef.cs プロジェクト: joyanta/lucene.net
        public virtual void TestFromBytes()
        {
            sbyte[] bytes = new sbyte[] { (sbyte)'a', (sbyte)'b', (sbyte)'c', (sbyte)'d' };
            BytesRef b = new BytesRef(bytes);
            Assert.AreEqual(bytes, b.Bytes);
            Assert.AreEqual(0, b.Offset);
            Assert.AreEqual(4, b.Length);

            BytesRef b2 = new BytesRef(bytes, 1, 3);
            Assert.AreEqual("bcd", b2.Utf8ToString());

            Assert.IsFalse(b.Equals(b2));
        }
コード例 #11
0
        public override bool Equals(object obj)
        {
            if (this == obj)
            {
                return(true);
            }
            if (!base.Equals(obj))
            {
                return(false);
            }
            if (this.GetType() != obj.GetType())
            {
                return(false);
            }
            TermRangeQuery other = (TermRangeQuery)obj;

            if (IncludeLower != other.IncludeLower)
            {
                return(false);
            }
            if (IncludeUpper != other.IncludeUpper)
            {
                return(false);
            }
            if (LowerTerm_Renamed == null)
            {
                if (other.LowerTerm_Renamed != null)
                {
                    return(false);
                }
            }
            else if (!LowerTerm_Renamed.Equals(other.LowerTerm_Renamed))
            {
                return(false);
            }
            if (UpperTerm_Renamed == null)
            {
                if (other.UpperTerm_Renamed != null)
                {
                    return(false);
                }
            }
            else if (!UpperTerm_Renamed.Equals(other.UpperTerm_Renamed))
            {
                return(false);
            }
            return(true);
        }
コード例 #12
0
ファイル: TermRangeQuery.cs プロジェクト: wow64bb/YAFNET
        public override bool Equals(object obj)
        {
            if (this == obj)
            {
                return(true);
            }
            if (!base.Equals(obj))
            {
                return(false);
            }
            if (this.GetType() != obj.GetType())
            {
                return(false);
            }
            TermRangeQuery other = (TermRangeQuery)obj;

            if (includeLower != other.includeLower)
            {
                return(false);
            }
            if (includeUpper != other.includeUpper)
            {
                return(false);
            }
            if (lowerTerm == null)
            {
                if (other.lowerTerm != null)
                {
                    return(false);
                }
            }
            else if (!lowerTerm.Equals(other.lowerTerm))
            {
                return(false);
            }
            if (upperTerm == null)
            {
                if (other.upperTerm != null)
                {
                    return(false);
                }
            }
            else if (!upperTerm.Equals(other.upperTerm))
            {
                return(false);
            }
            return(true);
        }
コード例 #13
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: @Override public void checkIntegrity() throws java.io.IOException
        public override void checkIntegrity()
        {
            BytesRef   scratch = new BytesRef();
            IndexInput clone   = data.clone();

            clone.seek(0);
            ChecksumIndexInput input = new BufferedChecksumIndexInput(clone);

            while (true)
            {
                SimpleTextUtil.ReadLine(input, scratch);
                if (scratch.Equals(END))
                {
                    SimpleTextUtil.CheckFooter(input);
                    break;
                }
            }
        }
コード例 #14
0
        /// <remarks>
        /// we don't actually write a .fdx-like index, instead we read the
        /// stored fields file in entirety up-front and save the offsets
        /// so we can seek to the documents later.
        /// </remarks>
        private void ReadIndex(int size)
        {
            ChecksumIndexInput input = new BufferedChecksumIndexInput(_input);

            _offsets = new long[size];
            var upto = 0;

            while (!_scratch.Equals(SimpleTextStoredFieldsWriter.END))
            {
                SimpleTextUtil.ReadLine(input, _scratch);
                if (StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.DOC))
                {
                    _offsets[upto] = input.GetFilePointer();
                    upto++;
                }
            }
            SimpleTextUtil.CheckFooter(input);
            Debug.Assert(upto == _offsets.Length);
        }
コード例 #15
0
        // we don't actually write a .fdx-like index, instead we read the
        // stored fields file in entirety up-front and save the offsets
        // so we can seek to the documents later.
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: private void readIndex(int size) throws java.io.IOException
        private void readIndex(int size)
        {
            ChecksumIndexInput input = new BufferedChecksumIndexInput(@in);

            offsets = new long[size];
            int upto = 0;

            while (!scratch.Equals(END))
            {
                SimpleTextUtil.ReadLine(input, scratch);
                if (StringHelper.StartsWith(scratch, DOC))
                {
                    offsets[upto] = input.FilePointer;
                    upto++;
                }
            }
            SimpleTextUtil.CheckFooter(input);
            Debug.Assert(upto == offsets.Length);
        }
コード例 #16
0
 public override void FinishTerm(BytesRef text, TermStats stats)
 {
     Debug.Assert(state == TermsConsumerState.START);
     state = TermsConsumerState.INITIAL;
     Debug.Assert(text.Equals(lastTerm));
     Debug.Assert(stats.DocFreq > 0); // otherwise, this method should not be called.
     Debug.Assert(stats.DocFreq == lastPostingsConsumer.docFreq);
     sumDocFreq += stats.DocFreq;
     if (fieldInfo.IndexOptions == IndexOptions.DOCS_ONLY)
     {
         Debug.Assert(stats.TotalTermFreq == -1);
     }
     else
     {
         Debug.Assert(stats.TotalTermFreq == lastPostingsConsumer.totalTermFreq);
         sumTotalTermFreq += stats.TotalTermFreq;
     }
     @in.FinishTerm(text, stats);
 }
コード例 #17
0
        public override void CheckIntegrity()
        {
            var iScratch = new BytesRef();
            var clone    = (IndexInput)data.Clone();

            clone.Seek(0);
            ChecksumIndexInput input = new BufferedChecksumIndexInput(clone);

            while (true)
            {
                SimpleTextUtil.ReadLine(input, iScratch);
                if (!iScratch.Equals(SimpleTextDocValuesWriter.END))
                {
                    continue;
                }

                SimpleTextUtil.CheckFooter(input);
                break;
            }
        }
コード例 #18
0
        private SortedDictionary <string, long?> ReadFields(IndexInput @in)
        {
            ChecksumIndexInput input   = new BufferedChecksumIndexInput(@in);
            BytesRef           scratch = new BytesRef(10);
            SortedDictionary <string, long?> fields = new SortedDictionary <string, long?>();

            while (true)
            {
                SimpleTextUtil.ReadLine(input, scratch);
                if (scratch.Equals(END))
                {
                    SimpleTextUtil.CheckFooter(input);
                    return(fields);
                }
                else if (StringHelper.StartsWith(scratch, FIELD))
                {
                    string fieldName = new string(scratch.Bytes, scratch.Offset + FIELD.length,
                                                  scratch.Length - FIELD.length, StandardCharsets.UTF_8);
                    fields[fieldName] = input.FilePointer;
                }
            }
        }
コード例 #19
0
        /// <remarks>
        /// We don't actually write a .fdx-like index, instead we read the
        /// stored fields file in entirety up-front and save the offsets
        /// so we can seek to the documents later.
        /// </remarks>
        private void ReadIndex(int size)
        {
            ChecksumIndexInput input = new BufferedChecksumIndexInput(_input);

            _offsets = new long[size];
            var upto = 0;

            while (!_scratch.Equals(SimpleTextStoredFieldsWriter.END))
            {
                SimpleTextUtil.ReadLine(input, _scratch);
                if (StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.DOC))
                {
                    _offsets[upto] = input.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream
                    upto++;
                }
            }
            SimpleTextUtil.CheckFooter(input);
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(upto == _offsets.Length);
            }
        }
コード例 #20
0
        // we don't actually write a .tvx-like index, instead we read the
        // vectors file in entirety up-front and save the offsets
        // so we can seek to the data later.
        private void ReadIndex(int maxDoc)
        {
            ChecksumIndexInput input = new BufferedChecksumIndexInput(_input);

            _offsets = new long[maxDoc];
            int upto = 0;

            while (!_scratch.Equals(SimpleTextTermVectorsWriter.END))
            {
                SimpleTextUtil.ReadLine(input, _scratch);
                if (StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.DOC))
                {
                    _offsets[upto] = input.GetFilePointer();
                    upto++;
                }
            }
            SimpleTextUtil.CheckFooter(input);
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(upto == _offsets.Length);
            }
        }
コード例 #21
0
        public override void Build(IInputIterator iterator)
        {
            if (iterator.HasPayloads)
            {
                throw new ArgumentException("this suggester doesn't support payloads");
            }
            if (iterator.HasContexts)
            {
                throw new ArgumentException("this suggester doesn't support contexts");
            }
            count = 0;
            var            scratch     = new BytesRef();
            IInputIterator iter        = new WFSTInputIterator(this, iterator);
            var            scratchInts = new Int32sRef();
            BytesRef       previous    = null;
            var            outputs     = PositiveInt32Outputs.Singleton;
            var            builder     = new Builder <long?>(FST.INPUT_TYPE.BYTE1, outputs);

            while ((scratch = iter.Next()) != null)
            {
                long cost = iter.Weight;

                if (previous == null)
                {
                    previous = new BytesRef();
                }
                else if (scratch.Equals(previous))
                {
                    continue; // for duplicate suggestions, the best weight is actually
                    // added
                }
                Lucene.Net.Util.Fst.Util.ToInt32sRef(scratch, scratchInts);
                builder.Add(scratchInts, cost);
                previous.CopyBytes(scratch);
                count++;
            }
            fst = builder.Finish();
        }
コード例 #22
0
        public override bool Equals(object obj)
        {
            if (this == obj)
            {
                return(true);
            }
            if (obj == null)
            {
                return(false);
            }
            if (this.GetType() != obj.GetType())
            {
                return(false);
            }
            CompiledAutomaton other = (CompiledAutomaton)obj;

            if (Type != other.Type)
            {
                return(false);
            }
            if (Type == AUTOMATON_TYPE.SINGLE || Type == AUTOMATON_TYPE.PREFIX)
            {
                if (!Term.Equals(other.Term))
                {
                    return(false);
                }
            }
            else if (Type == AUTOMATON_TYPE.NORMAL)
            {
                if (!RunAutomaton.Equals(other.RunAutomaton))
                {
                    return(false);
                }
            }

            return(true);
        }
コード例 #23
0
                internal IntersectTermsEnum(FSTTermsReader.TermsReader outerInstance, CompiledAutomaton compiled, BytesRef startTerm) : base(outerInstance)
                {
                    this.outerInstance = outerInstance;
                    //if (TEST) System.out.println("Enum init, startTerm=" + startTerm);
                    this.fst        = outerInstance.dict;
                    this.fstReader  = fst.GetBytesReader();
                    this.fstOutputs = outerInstance.dict.Outputs;
                    this.fsa        = compiled.RunAutomaton;
                    this.level      = -1;
                    this.stack      = new Frame[16];
                    for (int i = 0; i < stack.Length; i++)
                    {
                        this.stack[i] = new Frame(this);
                    }

                    Frame frame;

                    frame = LoadVirtualFrame(NewFrame());
                    this.level++;
                    frame = LoadFirstFrame(NewFrame());
                    PushFrame(frame);

                    this.meta     = null;
                    this.metaUpto = 1;
                    this.decoded  = false;
                    this.pending  = false;

                    if (startTerm == null)
                    {
                        pending = IsAccept(TopFrame());
                    }
                    else
                    {
                        DoSeekCeil(startTerm);
                        pending = !startTerm.Equals(term) && IsValid(TopFrame()) && IsAccept(TopFrame());
                    }
                }
コード例 #24
0
        protected override AcceptStatus Accept(BytesRef term)
        {
            if (!this.includeLower && term.Equals(lowerBytesRef))
            {
                return(AcceptStatus.NO);
            }

            // Use this field's default sort ordering
            if (upperBytesRef != null)
            {
                int cmp = termComp.Compare(upperBytesRef, term);

                /*
                 * if beyond the upper term, or is exclusive and this is equal to
                 * the upper term, break out
                 */
                if ((cmp < 0) || (!includeUpper && cmp == 0))
                {
                    return(AcceptStatus.END);
                }
            }

            return(AcceptStatus.YES);
        }
コード例 #25
0
            public override bool Equals(object o)
            {
                if (this == o)
                {
                    return(true);
                }
                if (o == null || GetType() != o.GetType())
                {
                    return(false);
                }

                FacetEntry that = (FacetEntry)o;

                if (count != that.count)
                {
                    return(false);
                }
                if (!value.Equals(that.value))
                {
                    return(false);
                }

                return(true);
            }
コード例 #26
0
 public override void FinishTerm(BytesRef text, TermStats stats)
 {
     Debug.Assert(State == TermsConsumerState.START);
     State = TermsConsumerState.INITIAL;
     Debug.Assert(text.Equals(LastTerm));
     Debug.Assert(stats.DocFreq > 0); // otherwise, this method should not be called.
     Debug.Assert(stats.DocFreq == LastPostingsConsumer.DocFreq);
     SumDocFreq += stats.DocFreq;
     if (fieldInfo.FieldIndexOptions == FieldInfo.IndexOptions.DOCS_ONLY)
     {
         Debug.Assert(stats.TotalTermFreq == -1);
     }
     else
     {
         Debug.Assert(stats.TotalTermFreq == LastPostingsConsumer.TotalTermFreq);
         SumTotalTermFreq += stats.TotalTermFreq;
     }
     @in.FinishTerm(text, stats);
 }
コード例 #27
0
        public override void Build(IInputEnumerator enumerator)
        {
            if (enumerator.HasContexts)
            {
                throw new ArgumentException("this suggester doesn't support contexts");
            }
            string prefix     = this.GetType().Name;
            var    directory  = OfflineSorter.DefaultTempDir();
            var    tempInput  = FileSupport.CreateTempFile(prefix, ".input", directory);
            var    tempSorted = FileSupport.CreateTempFile(prefix, ".sorted", directory);

            hasPayloads = enumerator.HasPayloads;

            var writer = new OfflineSorter.ByteSequencesWriter(tempInput);

            OfflineSorter.ByteSequencesReader reader = null;
            var scratch = new BytesRef();

            TokenStreamToAutomaton ts2a = GetTokenStreamToAutomaton();

            bool success = false;

            count = 0;
            byte[] buffer = new byte[8];
            try
            {
                var      output = new ByteArrayDataOutput(buffer);
                BytesRef surfaceForm;

                while (enumerator.MoveNext())
                {
                    surfaceForm = enumerator.Current;
                    ISet <Int32sRef> paths = ToFiniteStrings(surfaceForm, ts2a);

                    maxAnalyzedPathsForOneInput = Math.Max(maxAnalyzedPathsForOneInput, paths.Count);

                    foreach (Int32sRef path in paths)
                    {
                        Util.Fst.Util.ToBytesRef(path, scratch);

                        // length of the analyzed text (FST input)
                        if (scratch.Length > ushort.MaxValue - 2)
                        {
                            throw new ArgumentException("cannot handle analyzed forms > " + (ushort.MaxValue - 2) +
                                                        " in length (got " + scratch.Length + ")");
                        }
                        ushort analyzedLength = (ushort)scratch.Length;

                        // compute the required length:
                        // analyzed sequence + weight (4) + surface + analyzedLength (short)
                        int requiredLength = analyzedLength + 4 + surfaceForm.Length + 2;

                        BytesRef payload;

                        if (hasPayloads)
                        {
                            if (surfaceForm.Length > (ushort.MaxValue - 2))
                            {
                                throw new ArgumentException("cannot handle surface form > " + (ushort.MaxValue - 2) +
                                                            " in length (got " + surfaceForm.Length + ")");
                            }
                            payload = enumerator.Payload;
                            // payload + surfaceLength (short)
                            requiredLength += payload.Length + 2;
                        }
                        else
                        {
                            payload = null;
                        }

                        buffer = ArrayUtil.Grow(buffer, requiredLength);

                        output.Reset(buffer);

                        output.WriteInt16((short)analyzedLength);

                        output.WriteBytes(scratch.Bytes, scratch.Offset, scratch.Length);

                        output.WriteInt32(EncodeWeight(enumerator.Weight));

                        if (hasPayloads)
                        {
                            for (int i = 0; i < surfaceForm.Length; i++)
                            {
                                if (surfaceForm.Bytes[i] == PAYLOAD_SEP)
                                {
                                    throw new ArgumentException(
                                              "surface form cannot contain unit separator character U+001F; this character is reserved");
                                }
                            }
                            output.WriteInt16((short)surfaceForm.Length);
                            output.WriteBytes(surfaceForm.Bytes, surfaceForm.Offset, surfaceForm.Length);
                            output.WriteBytes(payload.Bytes, payload.Offset, payload.Length);
                        }
                        else
                        {
                            output.WriteBytes(surfaceForm.Bytes, surfaceForm.Offset, surfaceForm.Length);
                        }

                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(output.Position == requiredLength, () => output.Position + " vs " + requiredLength);
                        }

                        writer.Write(buffer, 0, output.Position);
                    }
                    count++;
                }
                writer.Dispose();

                // Sort all input/output pairs (required by FST.Builder):
                (new OfflineSorter(new AnalyzingComparer(hasPayloads))).Sort(tempInput, tempSorted);

                // Free disk space:
                tempInput.Delete();

                reader = new OfflineSorter.ByteSequencesReader(tempSorted);

                var outputs = new PairOutputs <long?, BytesRef>(PositiveInt32Outputs.Singleton,
                                                                ByteSequenceOutputs.Singleton);
                var builder = new Builder <PairOutputs <long?, BytesRef> .Pair>(FST.INPUT_TYPE.BYTE1, outputs);

                // Build FST:
                BytesRef  previousAnalyzed = null;
                BytesRef  analyzed         = new BytesRef();
                BytesRef  surface          = new BytesRef();
                Int32sRef scratchInts      = new Int32sRef();
                var       input            = new ByteArrayDataInput();

                // Used to remove duplicate surface forms (but we
                // still index the hightest-weight one).  We clear
                // this when we see a new analyzed form, so it cannot
                // grow unbounded (at most 256 entries):
                var seenSurfaceForms = new JCG.HashSet <BytesRef>();

                var dedup = 0;
                while (reader.Read(scratch))
                {
                    input.Reset(scratch.Bytes, scratch.Offset, scratch.Length);
                    ushort analyzedLength = (ushort)input.ReadInt16();
                    analyzed.Grow(analyzedLength + 2);
                    input.ReadBytes(analyzed.Bytes, 0, analyzedLength);
                    analyzed.Length = analyzedLength;

                    long cost = input.ReadInt32();

                    surface.Bytes = scratch.Bytes;
                    if (hasPayloads)
                    {
                        surface.Length = (ushort)input.ReadInt16();
                        surface.Offset = input.Position;
                    }
                    else
                    {
                        surface.Offset = input.Position;
                        surface.Length = scratch.Length - surface.Offset;
                    }

                    if (previousAnalyzed == null)
                    {
                        previousAnalyzed = new BytesRef();
                        previousAnalyzed.CopyBytes(analyzed);
                        seenSurfaceForms.Add(BytesRef.DeepCopyOf(surface));
                    }
                    else if (analyzed.Equals(previousAnalyzed))
                    {
                        dedup++;
                        if (dedup >= maxSurfaceFormsPerAnalyzedForm)
                        {
                            // More than maxSurfaceFormsPerAnalyzedForm
                            // dups: skip the rest:
                            continue;
                        }
                        if (seenSurfaceForms.Contains(surface))
                        {
                            continue;
                        }
                        seenSurfaceForms.Add(BytesRef.DeepCopyOf(surface));
                    }
                    else
                    {
                        dedup = 0;
                        previousAnalyzed.CopyBytes(analyzed);
                        seenSurfaceForms.Clear();
                        seenSurfaceForms.Add(BytesRef.DeepCopyOf(surface));
                    }

                    // TODO: I think we can avoid the extra 2 bytes when
                    // there is no dup (dedup==0), but we'd have to fix
                    // the exactFirst logic ... which would be sort of
                    // hairy because we'd need to special case the two
                    // (dup/not dup)...

                    // NOTE: must be byte 0 so we sort before whatever
                    // is next
                    analyzed.Bytes[analyzed.Offset + analyzed.Length]     = 0;
                    analyzed.Bytes[analyzed.Offset + analyzed.Length + 1] = (byte)dedup;
                    analyzed.Length += 2;

                    Util.Fst.Util.ToInt32sRef(analyzed, scratchInts);
                    //System.out.println("ADD: " + scratchInts + " -> " + cost + ": " + surface.utf8ToString());
                    if (!hasPayloads)
                    {
                        builder.Add(scratchInts, outputs.NewPair(cost, BytesRef.DeepCopyOf(surface)));
                    }
                    else
                    {
                        int      payloadOffset = input.Position + surface.Length;
                        int      payloadLength = scratch.Length - payloadOffset;
                        BytesRef br            = new BytesRef(surface.Length + 1 + payloadLength);
                        Array.Copy(surface.Bytes, surface.Offset, br.Bytes, 0, surface.Length);
                        br.Bytes[surface.Length] = PAYLOAD_SEP;
                        Array.Copy(scratch.Bytes, payloadOffset, br.Bytes, surface.Length + 1, payloadLength);
                        br.Length = br.Bytes.Length;
                        builder.Add(scratchInts, outputs.NewPair(cost, br));
                    }
                }
                fst = builder.Finish();

                //Util.dotToFile(fst, "/tmp/suggest.dot");

                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Dispose(reader, writer);
                }
                else
                {
                    IOUtils.DisposeWhileHandlingException(reader, writer);
                }

                tempInput.Delete();
                tempSorted.Delete();
            }
        }
コード例 #28
0
        public override bool SeekExact(BytesRef term)
        {
            queue.Clear();
            numTop = 0;

            bool seekOpt = false;

            if (lastSeek != null && termComp.Compare(lastSeek, term) <= 0)
            {
                seekOpt = true;
            }

            lastSeek      = null;
            lastSeekExact = true;

            for (int i = 0; i < numSubs; i++)
            {
                bool status;
                // LUCENE-2130: if we had just seek'd already, prior
                // to this seek, and the new seek term is after the
                // previous one, don't try to re-seek this sub if its
                // current term is already beyond this new seek term.
                // Doing so is a waste because this sub will simply
                // seek to the same spot.
                if (seekOpt)
                {
                    BytesRef curTerm = currentSubs[i].Current;
                    if (curTerm != null)
                    {
                        int cmp = termComp.Compare(term, curTerm);
                        if (cmp == 0)
                        {
                            status = true;
                        }
                        else if (cmp < 0)
                        {
                            status = false;
                        }
                        else
                        {
                            status = currentSubs[i].Terms.SeekExact(term);
                        }
                    }
                    else
                    {
                        status = false;
                    }
                }
                else
                {
                    status = currentSubs[i].Terms.SeekExact(term);
                }

                if (status)
                {
                    top[numTop++] = currentSubs[i];
                    current       = currentSubs[i].Current = currentSubs[i].Terms.Term;
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(term.Equals(currentSubs[i].Current));
                    }
                }
            }

            // if at least one sub had exact match to the requested
            // term then we found match
            return(numTop > 0);
        }
コード例 #29
0
 /// <summary>
 /// returns true if term is within k edits of the query term </summary>
 internal bool Matches(BytesRef term, int k)
 {
     return k == 0 ? term.Equals(TermRef) : Matchers[k].Run(term.Bytes, term.Offset, term.Length);
 }
コード例 #30
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: @Override public SeekStatus seekCeil(util.BytesRef text) throws java.io.IOException
		public override SeekStatus seekCeil(BytesRef text)
		{
		  //System.out.println("te.seek text=" + field.name + ":" + text.utf8ToString() + " this=" + this);
		  current = fstEnum.seekCeil(text);
		  if (current == null)
		  {
			return SeekStatus.END;
		  }
		  else
		  {

			// System.out.println("  got term=" + current.input.utf8ToString());
			// for(int i=0;i<current.output.length;i++) {
			//   System.out.println("    " + Integer.toHexString(current.output.bytes[i]&0xFF));
			// }

			didDecode = false;

			if (text.Equals(current.input))
			{
			  //System.out.println("  found!");
			  return SeekStatus.FOUND;
			}
			else
			{
			  //System.out.println("  not found: " + current.input.utf8ToString());
			  return SeekStatus.NOT_FOUND;
			}
		  }
		}
コード例 #31
0
                public DirectIntersectTermsEnum(DirectPostingsFormat.DirectField outerInstance,
                    CompiledAutomaton compiled, BytesRef startTerm)
                {
                    this.outerInstance = outerInstance;
                    runAutomaton = compiled.RunAutomaton;
                    compiledAutomaton = compiled;
                    termOrd = -1;
                    states = new State[1];
                    states[0] = new State(this);
                    states[0].changeOrd = outerInstance.terms.Length;
                    states[0].state = runAutomaton.InitialState;
                    states[0].transitions = compiledAutomaton.SortedTransitions[states[0].state];
                    states[0].transitionUpto = -1;
                    states[0].transitionMax = -1;

                    //System.out.println("IE.init startTerm=" + startTerm);

                    if (startTerm != null)
                    {
                        int skipUpto = 0;
                        if (startTerm.Length == 0)
                        {
                            if (outerInstance.terms.Length > 0 && outerInstance.termOffsets[1] == 0)
                            {
                                termOrd = 0;
                            }
                        }
                        else
                        {
                            termOrd++;

                            for (int i = 0; i < startTerm.Length; i++)
                            {
                                int label = startTerm.Bytes[startTerm.Offset + i] & 0xFF;

                                while (label > states[i].transitionMax)
                                {
                                    states[i].transitionUpto++;
                                    Debug.Assert(states[i].transitionUpto < states[i].transitions.Length);
                                    states[i].transitionMin = states[i].transitions[states[i].transitionUpto].Min;
                                    states[i].transitionMax = states[i].transitions[states[i].transitionUpto].Max;
                                    Debug.Assert(states[i].transitionMin >= 0);
                                    Debug.Assert(states[i].transitionMin <= 255);
                                    Debug.Assert(states[i].transitionMax >= 0);
                                    Debug.Assert(states[i].transitionMax <= 255);
                                }

                                // Skip forwards until we find a term matching
                                // the label at this position:
                                while (termOrd < outerInstance.terms.Length)
                                {
                                    int skipOffset = outerInstance.skipOffsets[termOrd];
                                    int numSkips = outerInstance.skipOffsets[termOrd + 1] - skipOffset;
                                    int termOffset_i = outerInstance.termOffsets[termOrd];
                                    int termLength = outerInstance.termOffsets[1 + termOrd] - termOffset_i;

                                    // if (DEBUG) {
                                    //   System.out.println("  check termOrd=" + termOrd + " term=" + new BytesRef(termBytes, termOffset, termLength).utf8ToString() + " skips=" + Arrays.toString(skips) + " i=" + i);
                                    // }

                                    if (termOrd == states[stateUpto].changeOrd)
                                    {
                                        // if (DEBUG) {
                                        //   System.out.println("  end push return");
                                        // }
                                        stateUpto--;
                                        termOrd--;
                                        return;
                                    }

                                    if (termLength == i)
                                    {
                                        termOrd++;
                                        skipUpto = 0;
                                        // if (DEBUG) {
                                        //   System.out.println("    term too short; next term");
                                        // }
                                    }
                                    else if (label < (outerInstance.termBytes[termOffset_i + i] & 0xFF))
                                    {
                                        termOrd--;
                                        // if (DEBUG) {
                                        //   System.out.println("  no match; already beyond; return termOrd=" + termOrd);
                                        // }
                                        stateUpto -= skipUpto;
                                        Debug.Assert(stateUpto >= 0);
                                        return;
                                    }
                                    else if (label == (outerInstance.termBytes[termOffset_i + i] & 0xFF))
                                    {
                                        // if (DEBUG) {
                                        //   System.out.println("    label[" + i + "] matches");
                                        // }
                                        if (skipUpto < numSkips)
                                        {
                                            Grow();

                                            int nextState = runAutomaton.Step(states[stateUpto].state, label);

                                            // Automaton is required to accept startTerm:
                                            Debug.Assert(nextState != -1);

                                            stateUpto++;
                                            states[stateUpto].changeOrd = outerInstance.skips[skipOffset + skipUpto++];
                                            states[stateUpto].state = nextState;
                                            states[stateUpto].transitions =
                                                compiledAutomaton.SortedTransitions[nextState];
                                            states[stateUpto].transitionUpto = -1;
                                            states[stateUpto].transitionMax = -1;
                                            //System.out.println("  push " + states[stateUpto].transitions.length + " trans");

                                            // if (DEBUG) {
                                            //   System.out.println("    push skip; changeOrd=" + states[stateUpto].changeOrd);
                                            // }

                                            // Match next label at this same term:
                                            goto nextLabelContinue;
                                        }
                                        else
                                        {
                                            // if (DEBUG) {
                                            //   System.out.println("    linear scan");
                                            // }
                                            // Index exhausted: just scan now (the
                                            // number of scans required will be less
                                            // than the minSkipCount):

                                            int startTermOrd = termOrd;
                                            while (termOrd < outerInstance.terms.Length &&
                                                   outerInstance.Compare(termOrd, startTerm) <= 0)
                                            {
                                                Debug.Assert(termOrd == startTermOrd ||
                                                             outerInstance.skipOffsets[termOrd] ==
                                                             outerInstance.skipOffsets[termOrd + 1]);
                                                termOrd++;
                                            }
                                            Debug.Assert(termOrd - startTermOrd < outerInstance.minSkipCount);
                                            termOrd--;
                                            stateUpto -= skipUpto;
                                            // if (DEBUG) {
                                            //   System.out.println("  end termOrd=" + termOrd);
                                            // }
                                            return;
                                        }
                                    }
                                    else
                                    {
                                        if (skipUpto < numSkips)
                                        {
                                            termOrd = outerInstance.skips[skipOffset + skipUpto];
                                            // if (DEBUG) {
                                            //   System.out.println("  no match; skip to termOrd=" + termOrd);
                                            // }
                                        }
                                        else
                                        {
                                            // if (DEBUG) {
                                            //   System.out.println("  no match; next term");
                                            // }
                                            termOrd++;
                                        }
                                        skipUpto = 0;
                                    }
                                }

                                // startTerm is >= last term so enum will not
                                // return any terms:
                                termOrd--;
                                // if (DEBUG) {
                                //   System.out.println("  beyond end; no terms will match");
                                // }
                                return;
                                nextLabelContinue:
                                ;
                            }
                            nextLabelBreak:
                            ;
                        }

                        int termOffset = outerInstance.termOffsets[termOrd];
                        int termLen = outerInstance.termOffsets[1 + termOrd] - termOffset;

                        if (termOrd >= 0 &&
                            !startTerm.Equals(new BytesRef(outerInstance.termBytes, termOffset, termLen)))
                        {
                            stateUpto -= skipUpto;
                            termOrd--;
                        }
                        // if (DEBUG) {
                        //   System.out.println("  loop end; return termOrd=" + termOrd + " stateUpto=" + stateUpto);
                        // }
                    }
                }
コード例 #32
0
            public override SeekStatus SeekCeil(BytesRef target)
            {
                // already here
                if (term != null && term.Equals(target))
                {
                    return(SeekStatus.FOUND);
                }

                int startIdx = Array.BinarySearch(outerInstance.m_indexedTermsArray, target);

                if (startIdx >= 0)
                {
                    // we hit the term exactly... lucky us!
                    TermsEnum.SeekStatus seekStatus = termsEnum.SeekCeil(target);
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(seekStatus == TermsEnum.SeekStatus.FOUND);
                    }
                    ord = startIdx << outerInstance.indexIntervalBits;
                    SetTerm();
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(term != null);
                    }
                    return(SeekStatus.FOUND);
                }

                // we didn't hit the term exactly
                startIdx = -startIdx - 1;

                if (startIdx == 0)
                {
                    // our target occurs *before* the first term
                    TermsEnum.SeekStatus seekStatus = termsEnum.SeekCeil(target);
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(seekStatus == TermsEnum.SeekStatus.NOT_FOUND);
                    }
                    ord = 0;
                    SetTerm();
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(term != null);
                    }
                    return(SeekStatus.NOT_FOUND);
                }

                // back up to the start of the block
                startIdx--;

                if ((ord >> outerInstance.indexIntervalBits) == startIdx && term != null && term.CompareTo(target) <= 0)
                {
                    // we are already in the right block and the current term is before the term we want,
                    // so we don't need to seek.
                }
                else
                {
                    // seek to the right block
                    TermsEnum.SeekStatus seekStatus = termsEnum.SeekCeil(outerInstance.m_indexedTermsArray[startIdx]);
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(seekStatus == TermsEnum.SeekStatus.FOUND);
                    }
                    ord = startIdx << outerInstance.indexIntervalBits;
                    SetTerm();
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(term != null);                           // should be non-null since it's in the index
                    }
                }

                while (term != null && term.CompareTo(target) < 0)
                {
                    Next();
                }

                if (term == null)
                {
                    return(SeekStatus.END);
                }
                else if (term.CompareTo(target) == 0)
                {
                    return(SeekStatus.FOUND);
                }
                else
                {
                    return(SeekStatus.NOT_FOUND);
                }
            }
コード例 #33
0
            public override SeekStatus SeekCeil(BytesRef target)
            {
                // already here
                if (Term_Renamed != null && Term_Renamed.Equals(target))
                {
                    return(SeekStatus.FOUND);
                }

                int startIdx = OuterInstance.IndexedTermsArray.ToList().BinarySearch(target);

                if (startIdx >= 0)
                {
                    // we hit the term exactly... lucky us!
                    TermsEnum.SeekStatus seekStatus = TermsEnum.SeekCeil(target);
                    Debug.Assert(seekStatus == TermsEnum.SeekStatus.FOUND);
                    Ord_Renamed = startIdx << OuterInstance.IndexIntervalBits;
                    SetTerm();
                    Debug.Assert(Term_Renamed != null);
                    return(SeekStatus.FOUND);
                }

                // we didn't hit the term exactly
                startIdx = -startIdx - 1;

                if (startIdx == 0)
                {
                    // our target occurs *before* the first term
                    TermsEnum.SeekStatus seekStatus = TermsEnum.SeekCeil(target);
                    Debug.Assert(seekStatus == TermsEnum.SeekStatus.NOT_FOUND);
                    Ord_Renamed = 0;
                    SetTerm();
                    Debug.Assert(Term_Renamed != null);
                    return(SeekStatus.NOT_FOUND);
                }

                // back up to the start of the block
                startIdx--;

                if ((Ord_Renamed >> OuterInstance.IndexIntervalBits) == startIdx && Term_Renamed != null && Term_Renamed.CompareTo(target) <= 0)
                {
                    // we are already in the right block and the current term is before the term we want,
                    // so we don't need to seek.
                }
                else
                {
                    // seek to the right block
                    TermsEnum.SeekStatus seekStatus = TermsEnum.SeekCeil(OuterInstance.IndexedTermsArray[startIdx]);
                    Debug.Assert(seekStatus == TermsEnum.SeekStatus.FOUND);
                    Ord_Renamed = startIdx << OuterInstance.IndexIntervalBits;
                    SetTerm();
                    Debug.Assert(Term_Renamed != null); // should be non-null since it's in the index
                }

                while (Term_Renamed != null && Term_Renamed.CompareTo(target) < 0)
                {
                    Next();
                }

                if (Term_Renamed == null)
                {
                    return(SeekStatus.END);
                }
                else if (Term_Renamed.CompareTo(target) == 0)
                {
                    return(SeekStatus.FOUND);
                }
                else
                {
                    return(SeekStatus.NOT_FOUND);
                }
            }
コード例 #34
0
 public override void SeekExact(BytesRef term, TermState state)
 {
     termOrd = (int) ((OrdTermState) state).Ord;
     SetTerm();
     Debug.Assert(term.Equals(scratch));
 }
コード例 #35
0
 /// <summary>
 /// Returns <c>true</c> if <paramref name="term"/> is within <paramref name="k"/> edits of the query term </summary>
 internal bool Matches(BytesRef term, int k)
 {
     return(k == 0 ? term.Equals(termRef) : matchers[k].Run(term.Bytes, term.Offset, term.Length));
 }
コード例 #36
0
            private void LoadTerms()
            {
                var posIntOutputs = PositiveInt32Outputs.Singleton;
                var outputsInner  = new PairOutputs <long?, long?>(posIntOutputs, posIntOutputs);
                var outputs       = new PairOutputs <long?, PairOutputs <long?, long?> .Pair>(posIntOutputs, outputsInner);

                // honestly, wtf kind of generic mess is this.
                var b     = new Builder <PairOutputs <long?, PairOutputs <long?, long?> .Pair> .Pair>(FST.INPUT_TYPE.BYTE1, outputs);
                var input = (IndexInput)_outerInstance._input.Clone();

                input.Seek(_termsStart);

                var  lastTerm      = new BytesRef(10);
                long lastDocsStart = -1;
                int  docFreq       = 0;
                long totalTermFreq = 0;
                var  visitedDocs   = new FixedBitSet(_maxDoc);

                var scratchIntsRef = new Int32sRef();

                while (true)
                {
                    SimpleTextUtil.ReadLine(input, _scratch);
                    if (_scratch.Equals(SimpleTextFieldsWriter.END) || StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FIELD))
                    {
                        if (lastDocsStart != -1)
                        {
                            b.Add(Util.ToInt32sRef(lastTerm, scratchIntsRef),
                                  outputs.NewPair(lastDocsStart, outputsInner.NewPair(docFreq, totalTermFreq)));
                            _sumTotalTermFreq += totalTermFreq;
                        }
                        break;
                    }

                    if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.DOC))
                    {
                        docFreq++;
                        _sumDocFreq++;
                        UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.DOC.Length, _scratch.Length - SimpleTextFieldsWriter.DOC.Length,
                                                _scratchUtf16);
                        int docId = ArrayUtil.ParseInt32(_scratchUtf16.Chars, 0, _scratchUtf16.Length);
                        visitedDocs.Set(docId);
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FREQ))
                    {
                        UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.FREQ.Length,
                                                _scratch.Length - SimpleTextFieldsWriter.FREQ.Length, _scratchUtf16);
                        totalTermFreq += ArrayUtil.ParseInt32(_scratchUtf16.Chars, 0, _scratchUtf16.Length);
                    }
                    else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.TERM))
                    {
                        if (lastDocsStart != -1)
                        {
                            b.Add(Util.ToInt32sRef(lastTerm, scratchIntsRef),
                                  outputs.NewPair(lastDocsStart, outputsInner.NewPair(docFreq, totalTermFreq)));
                        }
                        lastDocsStart = input.GetFilePointer();
                        int len = _scratch.Length - SimpleTextFieldsWriter.TERM.Length;
                        if (len > lastTerm.Length)
                        {
                            lastTerm.Grow(len);
                        }
                        Array.Copy(_scratch.Bytes, SimpleTextFieldsWriter.TERM.Length, lastTerm.Bytes, 0, len);
                        lastTerm.Length    = len;
                        docFreq            = 0;
                        _sumTotalTermFreq += totalTermFreq;
                        totalTermFreq      = 0;
                        _termCount++;
                    }
                }
                _docCount = visitedDocs.Cardinality();
                _fst      = b.Finish();
            }