Ejemplo n.º 1
0
 public override void FinishTerm(BytesRef text, TermStats stats)
 {
     if (Debugging.AssertsEnabled)
     {
         Debugging.Assert(state == TermsConsumerState.START);
     }
     state = TermsConsumerState.INITIAL;
     if (Debugging.AssertsEnabled)
     {
         Debugging.Assert(text.Equals(lastTerm));
     }
     if (Debugging.AssertsEnabled)
     {
         Debugging.Assert(stats.DocFreq > 0);                           // otherwise, this method should not be called.
     }
     if (Debugging.AssertsEnabled)
     {
         Debugging.Assert(stats.DocFreq == lastPostingsConsumer.docFreq);
     }
     sumDocFreq += stats.DocFreq;
     if (fieldInfo.IndexOptions == IndexOptions.DOCS_ONLY)
     {
         if (Debugging.AssertsEnabled)
         {
             Debugging.Assert(stats.TotalTermFreq == -1);
         }
     }
     else
     {
         if (Debugging.AssertsEnabled)
         {
             Debugging.Assert(stats.TotalTermFreq == lastPostingsConsumer.totalTermFreq);
         }
         sumTotalTermFreq += stats.TotalTermFreq;
     }
     @in.FinishTerm(text, stats);
 }
Ejemplo n.º 2
0
            public override void FinishTerm(BytesRef text, TermStats stats)
            {
                Debug.Assert(postingsWriter.docCount == stats.DocFreq);

                Debug.Assert(buffer2.FilePointer == 0);

                buffer2.WriteVInt(stats.DocFreq);
                if (field.FieldIndexOptions != IndexOptions.DOCS_ONLY)
                {
                    buffer2.WriteVLong(stats.TotalTermFreq - stats.DocFreq);
                }
                int pos = (int)buffer2.FilePointer;

                buffer2.WriteTo(finalBuffer, 0);
                buffer2.Reset();

                int totalBytes = pos + (int)postingsWriter.buffer.FilePointer;

                if (totalBytes > finalBuffer.Length)
                {
                    finalBuffer = ArrayUtil.Grow(finalBuffer, totalBytes);
                }
                postingsWriter.buffer.WriteTo(finalBuffer, pos);
                postingsWriter.buffer.Reset();

                spare.Bytes  = finalBuffer;
                spare.Length = totalBytes;

                //System.out.println("    finishTerm term=" + text.utf8ToString() + " " + totalBytes + " bytes totalTF=" + stats.totalTermFreq);
                //for(int i=0;i<totalBytes;i++) {
                //  System.out.println("      " + Integer.toHexString(finalBuffer[i]&0xFF));
                //}

                builder.Add(Util.ToIntsRef(text, scratchIntsRef), BytesRef.DeepCopyOf(spare));
                termCount++;
            }
            public override void Add(BytesRef text, TermStats stats, long termsFilePointer)
            {
                if (text.Length == 0)
                {
                    // We already added empty string in ctor
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(termsFilePointer == startTermsFilePointer);
                    }
                    return;
                }
                int lengthSave = text.Length;

                text.Length = outerInstance.IndexedTermPrefixLength(lastTerm, text);
                try
                {
                    fstBuilder.Add(Util.Fst.Util.ToInt32sRef(text, scratchIntsRef), termsFilePointer);
                }
                finally
                {
                    text.Length = lengthSave;
                }
                lastTerm.CopyBytes(text);
            }
Ejemplo n.º 4
0
 public abstract void Add(BytesRef text, TermStats stats, long termsFilePointer);
 public override void FinishTerm(BytesRef text, TermStats stats)
 {
     // Record this term in our BloomFilter
     if (stats.DocFreq > 0)
     {
         _bloomFilter.AddValue(text);
     }
     _delegateTermsConsumer.FinishTerm(text, stats);
 }
Ejemplo n.º 6
0
 public abstract bool CheckIndexTerm(BytesRef text, TermStats stats);
 public override void Add(BytesRef text, TermStats stats, long termsFilePointer)
 {
     if (text.Length == 0)
     {
         // We already added empty string in ctor
         Debug.Assert(termsFilePointer == _startTermsFilePointer);
         return;
     }
     int lengthSave = text.Length;
     text.Length = _vgtiw.IndexedTermPrefixLength(_lastTerm, text);
     try
     {
         _fstBuilder.Add(Util.ToIntsRef(text, _scratchIntsRef), termsFilePointer);
     }
     finally
     {
         text.Length = lengthSave;
     }
     _lastTerm.CopyBytes(text);
 }
            public override bool CheckIndexTerm(BytesRef text, TermStats stats)
            {
                // NOTE: we must force the first term per field to be
                // indexed, in case policy doesn't:
                if (_vgtiw._policy.IsIndexTerm(text, stats) || _first)
                {
                    _first = false;
                    return true;
                }

                _lastTerm.CopyBytes(text);
                return false;
            }
Ejemplo n.º 9
0
 public override void FinishTerm(BytesRef text, TermStats stats)
 {
     Debug.Assert(State == TermsConsumerState.START);
     State = TermsConsumerState.INITIAL;
     Debug.Assert(text.Equals(LastTerm));
     Debug.Assert(stats.DocFreq > 0); // otherwise, this method should not be called.
     Debug.Assert(stats.DocFreq == LastPostingsConsumer.DocFreq);
     SumDocFreq += stats.DocFreq;
     if (fieldInfo.FieldIndexOptions == FieldInfo.IndexOptions.DOCS_ONLY)
     {
         Debug.Assert(stats.TotalTermFreq == -1);
     }
     else
     {
         Debug.Assert(stats.TotalTermFreq == LastPostingsConsumer.TotalTermFreq);
         SumTotalTermFreq += stats.TotalTermFreq;
     }
     @in.FinishTerm(text, stats);
 }
Ejemplo n.º 10
0
            public override void Add(BytesRef text, TermStats stats, long termsFilePointer)
            {
                int indexedTermLength = _fgtiw.IndexedTermPrefixLength(_lastTerm, text);

                // write only the min prefix that shows the diff
                // against prior term
                _fgtiw.Output.WriteBytes(text.Bytes, text.Offset, indexedTermLength);

                if (_termLengths.Length == NumIndexTerms)
                {
                    _termLengths = ArrayUtil.Grow(_termLengths);
                }
                if (_termsPointerDeltas.Length == NumIndexTerms)
                {
                    _termsPointerDeltas = ArrayUtil.Grow(_termsPointerDeltas);
                }

                // save delta terms pointer
                _termsPointerDeltas[NumIndexTerms] = (int)(termsFilePointer - _lastTermsPointer);
                _lastTermsPointer = termsFilePointer;

                // save term length (in bytes)
                Debug.Assert(indexedTermLength <= short.MaxValue);
                _termLengths[NumIndexTerms] = (short)indexedTermLength;
                _totTermLength += indexedTermLength;

                _lastTerm.CopyBytes(text);
                NumIndexTerms++;
            }
Ejemplo n.º 11
0
    public void finishTerm(BytesRef text, TermStats stats)  {

      Debug.Assert( stats.docFreq > 0;
      //System.out.println("BTW: finishTerm term=" + fieldInfo.name + ":" + text.utf8ToString() + " " + text + " seg=" + segment + " df=" + stats.docFreq);

      final bool isIndexTerm = fieldIndexWriter.checkIndexTerm(text, stats);

      if (isIndexTerm) {
        if (pendingCount > 0) {
          // Instead of writing each term, live, we gather terms
          // in RAM in a pending buffer, and then write the
          // entire block in between index terms:
          flushBlock();
        }
        fieldIndexWriter.add(text, stats, out.getFilePointer());
        //System.out.println("  index term!");
      }

      if (pendingTerms.length == pendingCount) {
        final TermEntry[] newArray = new TermEntry[ArrayUtil.oversize(pendingCount+1, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
Ejemplo n.º 12
0
            public override void FinishTerm(BytesRef text, TermStats stats)
            {
                // write term meta data into fst

                var state = _outerInstance._postingsWriter.NewTermState();

                var meta = new FSTTermOutputs.TermData
                {
                    LONGS = new long[_longsSize],
                    BYTES = null,
                    DOC_FREQ = state.DocFreq = stats.DocFreq,
                    TOTAL_TERM_FREQ = state.TotalTermFreq = stats.TotalTermFreq
                };
                _outerInstance._postingsWriter.FinishTerm(state);
                _outerInstance._postingsWriter.EncodeTerm(meta.LONGS, _metaWriter, _fieldInfo, state, true);
                var bytesSize = (int) _metaWriter.FilePointer;
                if (bytesSize > 0)
                {
                    meta.BYTES = new byte[bytesSize];
                    _metaWriter.WriteTo(meta.BYTES, 0);
                    _metaWriter.Reset();
                }
                _builder.Add(Util.ToIntsRef(text, _scratchTerm), meta);
                _numTerms++;
            }
Ejemplo n.º 13
0
 public abstract bool CheckIndexTerm(BytesRef text, TermStats stats);
Ejemplo n.º 14
0
 public override void FinishTerm(BytesRef text, TermStats stats)
 {
     if (stats.DocFreq > 0)
     {
         long skipPointer = OuterInstance.SkipListWriter.WriteSkip(OuterInstance.FreqOut);
         TermInfo.DocFreq = stats.DocFreq;
         TermInfo.SkipOffset = (int)(skipPointer - TermInfo.FreqPointer);
         //System.out.println("  w finish term=" + text.utf8ToString() + " fnum=" + fieldInfo.number);
         OuterInstance.TermsOut.Add(FieldInfo.Number, text, TermInfo);
     }
 }
Ejemplo n.º 15
0
 public abstract void Add(BytesRef text, TermStats stats, long termsFilePointer);
Ejemplo n.º 16
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: @Override public void finishTerm(util.BytesRef text, codecs.TermStats stats) throws java.io.IOException
		public override void finishTerm(BytesRef text, TermStats stats)
		{

		  Debug.Assert(postingsWriter.docCount == stats.docFreq);

		  Debug.Assert(buffer2.FilePointer == 0);

		  buffer2.WriteVInt(stats.docFreq);
		  if (field.IndexOptions != IndexOptions.DOCS_ONLY)
		  {
			buffer2.WriteVLong(stats.totalTermFreq - stats.docFreq);
		  }
		  int pos = (int) buffer2.FilePointer;
		  buffer2.WriteTo(finalBuffer, 0);
		  buffer2.reset();

//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int totalBytes = pos + (int) postingsWriter.buffer.getFilePointer();
		  int totalBytes = pos + (int) postingsWriter.buffer.FilePointer;
		  if (totalBytes > finalBuffer.Length)
		  {
			finalBuffer = ArrayUtil.grow(finalBuffer, totalBytes);
		  }
		  postingsWriter.buffer.WriteTo(finalBuffer, pos);
		  postingsWriter.buffer.reset();

		  spare.bytes = finalBuffer;
		  spare.length = totalBytes;

		  //System.out.println("    finishTerm term=" + text.utf8ToString() + " " + totalBytes + " bytes totalTF=" + stats.totalTermFreq);
		  //for(int i=0;i<totalBytes;i++) {
		  //  System.out.println("      " + Integer.toHexString(finalBuffer[i]&0xFF));
		  //}

		  builder.add(Util.toIntsRef(text, scratchIntsRef), BytesRef.deepCopyOf(spare));
		  termCount++;
		}
Ejemplo n.º 17
0
            public override bool CheckIndexTerm(BytesRef text, TermStats stats)
            {
                // First term is first indexed term:
                //System.output.println("FGW: checkIndexTerm text=" + text.utf8ToString());
                if (0 == (_numTerms++ % _fgtiw._termIndexInterval))
                    return true;

                // save last term just before next index term so we
                // can compute wasted suffix
                if (0 == _numTerms % _fgtiw._termIndexInterval)
                    _lastTerm.CopyBytes(text);

                return false;
            }
Ejemplo n.º 18
0
            public override void FinishTerm(BytesRef text, TermStats stats)
            {
                if (_numTerms > 0 && _numTerms%SKIP_INTERVAL == 0)
                {
                    BufferSkip();
                }
                // write term meta data into fst
                var longs = new long[_longsSize];

                long delta = stats.TotalTermFreq - stats.DocFreq;
                if (stats.TotalTermFreq > 0)
                {
                    if (delta == 0)
                    {
                        _statsOut.WriteVInt(stats.DocFreq << 1 | 1);
                    }
                    else
                    {
                        _statsOut.WriteVInt(stats.DocFreq << 1 | 0);
                        _statsOut.WriteVLong(stats.TotalTermFreq - stats.DocFreq);
                    }
                }
                else
                {
                    _statsOut.WriteVInt(stats.DocFreq);
                }
                var state = _outerInstance.postingsWriter.NewTermState();
                state.DocFreq = stats.DocFreq;
                state.TotalTermFreq = stats.TotalTermFreq;
                _outerInstance.postingsWriter.FinishTerm(state);
                _outerInstance.postingsWriter.EncodeTerm(longs, _metaBytesOut, _fieldInfo, state, true);
                for (var i = 0; i < _longsSize; i++)
                {
                    _metaLongsOut.WriteVLong(longs[i] - _lastLongs[i]);
                    _lastLongs[i] = longs[i];
                }
                _metaLongsOut.WriteVLong(_metaBytesOut.FilePointer - _lastMetaBytesFp);

                _builder.Add(Util.ToIntsRef(text, _scratchTerm), _numTerms);
                _numTerms++;

                _lastMetaBytesFp = _metaBytesOut.FilePointer;
            }
 /// <summary>
 /// Called sequentially on every term being written,
 /// returning <c>true</c> if this term should be indexed.
 /// </summary>
 public abstract bool IsIndexTerm(BytesRef term, TermStats stats);
Ejemplo n.º 20
0
 public override void FinishTerm(BytesRef term, TermStats stats)
 {
 }
Ejemplo n.º 21
0
 public override void FinishTerm(BytesRef term, TermStats stats)
 {
 }
            public override bool IsIndexTerm(BytesRef term, TermStats stats)
            {
                if (stats.DocFreq >= _docFreqThresh || _count >= _interval)
                {
                    _count = 1;
                    return true;
                }

                _count++;
                return false;
            }
Ejemplo n.º 23
0
            public override void FinishTerm(BytesRef text, TermStats stats)
            {

                Debug.Assert(stats.DocFreq > 0);

                var isIndexTerm = _fieldIndexWriter.CheckIndexTerm(text, stats);

                if (isIndexTerm)
                {
                    if (_pendingCount > 0)
                    {
                        // Instead of writing each term, live, we gather terms
                        // in RAM in a pending buffer, and then write the
                        // entire block in between index terms:
                        FlushBlock();
                    }
                    _fieldIndexWriter.Add(text, stats, _btw._output.FilePointer);
                }

                if (_pendingTerms.Length == _pendingCount)
                {
                    var newArray =
                        new TermEntry[ArrayUtil.Oversize(_pendingCount + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
                    Array.Copy(_pendingTerms, 0, newArray, 0, _pendingCount);
                    for (var i = _pendingCount; i < newArray.Length; i++)
                    {
                        newArray[i] = new TermEntry();
                    }
                    _pendingTerms = newArray;
                }
                var te = _pendingTerms[_pendingCount];
                te.Term.CopyBytes(text);
                te.State = _postingsWriter.NewTermState();
                te.State.DocFreq = stats.DocFreq;
                te.State.TotalTermFreq = stats.TotalTermFreq;
                _postingsWriter.FinishTerm(te.State);

                _pendingCount++;
                _numTerms++;
            }
            public override bool IsIndexTerm(BytesRef term, TermStats stats)
            {
                if (_count >= _interval)
                {
                    _count = 1;
                    return true;
                }

                _count++;
                return false;
            }
 /// <summary>
 /// Called sequentially on every term being written
 /// returning true if this term should be indexed
 /// </summary>
 public abstract bool IsIndexTerm(BytesRef term, TermStats stats);
Ejemplo n.º 26
0
 public override bool IsIndexTerm(BytesRef term, TermStats stats)
 {
     return(rand.Next(gap) == gap / 2);
 }
Ejemplo n.º 27
0
            public override void FinishTerm(BytesRef text, TermStats stats)
            {

                Debug.Assert(postingsWriter.docCount == stats.DocFreq);

                Debug.Assert(buffer2.FilePointer == 0);

                buffer2.WriteVInt(stats.DocFreq);
                if (field.FieldIndexOptions != IndexOptions.DOCS_ONLY)
                {
                    buffer2.WriteVLong(stats.TotalTermFreq - stats.DocFreq);
                }
                int pos = (int)buffer2.FilePointer;
                buffer2.WriteTo(finalBuffer, 0);
                buffer2.Reset();

                int totalBytes = pos + (int)postingsWriter.buffer.FilePointer;
                if (totalBytes > finalBuffer.Length)
                {
                    finalBuffer = ArrayUtil.Grow(finalBuffer, totalBytes);
                }
                postingsWriter.buffer.WriteTo(finalBuffer, pos);
                postingsWriter.buffer.Reset();

                spare.Bytes = finalBuffer;
                spare.Length = totalBytes;

                //System.out.println("    finishTerm term=" + text.utf8ToString() + " " + totalBytes + " bytes totalTF=" + stats.totalTermFreq);
                //for(int i=0;i<totalBytes;i++) {
                //  System.out.println("      " + Integer.toHexString(finalBuffer[i]&0xFF));
                //}

                builder.Add(Util.ToIntsRef(text, scratchIntsRef), BytesRef.DeepCopyOf(spare));
                termCount++;
            }