public override void FinishTerm(BytesRef text, TermStats stats) { if (Debugging.AssertsEnabled) { Debugging.Assert(state == TermsConsumerState.START); } state = TermsConsumerState.INITIAL; if (Debugging.AssertsEnabled) { Debugging.Assert(text.Equals(lastTerm)); } if (Debugging.AssertsEnabled) { Debugging.Assert(stats.DocFreq > 0); // otherwise, this method should not be called. } if (Debugging.AssertsEnabled) { Debugging.Assert(stats.DocFreq == lastPostingsConsumer.docFreq); } sumDocFreq += stats.DocFreq; if (fieldInfo.IndexOptions == IndexOptions.DOCS_ONLY) { if (Debugging.AssertsEnabled) { Debugging.Assert(stats.TotalTermFreq == -1); } } else { if (Debugging.AssertsEnabled) { Debugging.Assert(stats.TotalTermFreq == lastPostingsConsumer.totalTermFreq); } sumTotalTermFreq += stats.TotalTermFreq; } @in.FinishTerm(text, stats); }
public override void FinishTerm(BytesRef text, TermStats stats) { Debug.Assert(postingsWriter.docCount == stats.DocFreq); Debug.Assert(buffer2.FilePointer == 0); buffer2.WriteVInt(stats.DocFreq); if (field.FieldIndexOptions != IndexOptions.DOCS_ONLY) { buffer2.WriteVLong(stats.TotalTermFreq - stats.DocFreq); } int pos = (int)buffer2.FilePointer; buffer2.WriteTo(finalBuffer, 0); buffer2.Reset(); int totalBytes = pos + (int)postingsWriter.buffer.FilePointer; if (totalBytes > finalBuffer.Length) { finalBuffer = ArrayUtil.Grow(finalBuffer, totalBytes); } postingsWriter.buffer.WriteTo(finalBuffer, pos); postingsWriter.buffer.Reset(); spare.Bytes = finalBuffer; spare.Length = totalBytes; //System.out.println(" finishTerm term=" + text.utf8ToString() + " " + totalBytes + " bytes totalTF=" + stats.totalTermFreq); //for(int i=0;i<totalBytes;i++) { // System.out.println(" " + Integer.toHexString(finalBuffer[i]&0xFF)); //} builder.Add(Util.ToIntsRef(text, scratchIntsRef), BytesRef.DeepCopyOf(spare)); termCount++; }
public override void Add(BytesRef text, TermStats stats, long termsFilePointer) { if (text.Length == 0) { // We already added empty string in ctor if (Debugging.AssertsEnabled) { Debugging.Assert(termsFilePointer == startTermsFilePointer); } return; } int lengthSave = text.Length; text.Length = outerInstance.IndexedTermPrefixLength(lastTerm, text); try { fstBuilder.Add(Util.Fst.Util.ToInt32sRef(text, scratchIntsRef), termsFilePointer); } finally { text.Length = lengthSave; } lastTerm.CopyBytes(text); }
public abstract void Add(BytesRef text, TermStats stats, long termsFilePointer);
public override void FinishTerm(BytesRef text, TermStats stats) { // Record this term in our BloomFilter if (stats.DocFreq > 0) { _bloomFilter.AddValue(text); } _delegateTermsConsumer.FinishTerm(text, stats); }
public abstract bool CheckIndexTerm(BytesRef text, TermStats stats);
public override void Add(BytesRef text, TermStats stats, long termsFilePointer) { if (text.Length == 0) { // We already added empty string in ctor Debug.Assert(termsFilePointer == _startTermsFilePointer); return; } int lengthSave = text.Length; text.Length = _vgtiw.IndexedTermPrefixLength(_lastTerm, text); try { _fstBuilder.Add(Util.ToIntsRef(text, _scratchIntsRef), termsFilePointer); } finally { text.Length = lengthSave; } _lastTerm.CopyBytes(text); }
public override bool CheckIndexTerm(BytesRef text, TermStats stats) { // NOTE: we must force the first term per field to be // indexed, in case policy doesn't: if (_vgtiw._policy.IsIndexTerm(text, stats) || _first) { _first = false; return true; } _lastTerm.CopyBytes(text); return false; }
public override void FinishTerm(BytesRef text, TermStats stats) { Debug.Assert(State == TermsConsumerState.START); State = TermsConsumerState.INITIAL; Debug.Assert(text.Equals(LastTerm)); Debug.Assert(stats.DocFreq > 0); // otherwise, this method should not be called. Debug.Assert(stats.DocFreq == LastPostingsConsumer.DocFreq); SumDocFreq += stats.DocFreq; if (fieldInfo.FieldIndexOptions == FieldInfo.IndexOptions.DOCS_ONLY) { Debug.Assert(stats.TotalTermFreq == -1); } else { Debug.Assert(stats.TotalTermFreq == LastPostingsConsumer.TotalTermFreq); SumTotalTermFreq += stats.TotalTermFreq; } @in.FinishTerm(text, stats); }
public override void Add(BytesRef text, TermStats stats, long termsFilePointer) { int indexedTermLength = _fgtiw.IndexedTermPrefixLength(_lastTerm, text); // write only the min prefix that shows the diff // against prior term _fgtiw.Output.WriteBytes(text.Bytes, text.Offset, indexedTermLength); if (_termLengths.Length == NumIndexTerms) { _termLengths = ArrayUtil.Grow(_termLengths); } if (_termsPointerDeltas.Length == NumIndexTerms) { _termsPointerDeltas = ArrayUtil.Grow(_termsPointerDeltas); } // save delta terms pointer _termsPointerDeltas[NumIndexTerms] = (int)(termsFilePointer - _lastTermsPointer); _lastTermsPointer = termsFilePointer; // save term length (in bytes) Debug.Assert(indexedTermLength <= short.MaxValue); _termLengths[NumIndexTerms] = (short)indexedTermLength; _totTermLength += indexedTermLength; _lastTerm.CopyBytes(text); NumIndexTerms++; }
public void finishTerm(BytesRef text, TermStats stats) { Debug.Assert( stats.docFreq > 0; //System.out.println("BTW: finishTerm term=" + fieldInfo.name + ":" + text.utf8ToString() + " " + text + " seg=" + segment + " df=" + stats.docFreq); final bool isIndexTerm = fieldIndexWriter.checkIndexTerm(text, stats); if (isIndexTerm) { if (pendingCount > 0) { // Instead of writing each term, live, we gather terms // in RAM in a pending buffer, and then write the // entire block in between index terms: flushBlock(); } fieldIndexWriter.add(text, stats, out.getFilePointer()); //System.out.println(" index term!"); } if (pendingTerms.length == pendingCount) { final TermEntry[] newArray = new TermEntry[ArrayUtil.oversize(pendingCount+1, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
public override void FinishTerm(BytesRef text, TermStats stats) { // write term meta data into fst var state = _outerInstance._postingsWriter.NewTermState(); var meta = new FSTTermOutputs.TermData { LONGS = new long[_longsSize], BYTES = null, DOC_FREQ = state.DocFreq = stats.DocFreq, TOTAL_TERM_FREQ = state.TotalTermFreq = stats.TotalTermFreq }; _outerInstance._postingsWriter.FinishTerm(state); _outerInstance._postingsWriter.EncodeTerm(meta.LONGS, _metaWriter, _fieldInfo, state, true); var bytesSize = (int) _metaWriter.FilePointer; if (bytesSize > 0) { meta.BYTES = new byte[bytesSize]; _metaWriter.WriteTo(meta.BYTES, 0); _metaWriter.Reset(); } _builder.Add(Util.ToIntsRef(text, _scratchTerm), meta); _numTerms++; }
public override void FinishTerm(BytesRef text, TermStats stats) { if (stats.DocFreq > 0) { long skipPointer = OuterInstance.SkipListWriter.WriteSkip(OuterInstance.FreqOut); TermInfo.DocFreq = stats.DocFreq; TermInfo.SkipOffset = (int)(skipPointer - TermInfo.FreqPointer); //System.out.println(" w finish term=" + text.utf8ToString() + " fnum=" + fieldInfo.number); OuterInstance.TermsOut.Add(FieldInfo.Number, text, TermInfo); } }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: @Override public void finishTerm(util.BytesRef text, codecs.TermStats stats) throws java.io.IOException public override void finishTerm(BytesRef text, TermStats stats) { Debug.Assert(postingsWriter.docCount == stats.docFreq); Debug.Assert(buffer2.FilePointer == 0); buffer2.WriteVInt(stats.docFreq); if (field.IndexOptions != IndexOptions.DOCS_ONLY) { buffer2.WriteVLong(stats.totalTermFreq - stats.docFreq); } int pos = (int) buffer2.FilePointer; buffer2.WriteTo(finalBuffer, 0); buffer2.reset(); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int totalBytes = pos + (int) postingsWriter.buffer.getFilePointer(); int totalBytes = pos + (int) postingsWriter.buffer.FilePointer; if (totalBytes > finalBuffer.Length) { finalBuffer = ArrayUtil.grow(finalBuffer, totalBytes); } postingsWriter.buffer.WriteTo(finalBuffer, pos); postingsWriter.buffer.reset(); spare.bytes = finalBuffer; spare.length = totalBytes; //System.out.println(" finishTerm term=" + text.utf8ToString() + " " + totalBytes + " bytes totalTF=" + stats.totalTermFreq); //for(int i=0;i<totalBytes;i++) { // System.out.println(" " + Integer.toHexString(finalBuffer[i]&0xFF)); //} builder.add(Util.toIntsRef(text, scratchIntsRef), BytesRef.deepCopyOf(spare)); termCount++; }
public override bool CheckIndexTerm(BytesRef text, TermStats stats) { // First term is first indexed term: //System.output.println("FGW: checkIndexTerm text=" + text.utf8ToString()); if (0 == (_numTerms++ % _fgtiw._termIndexInterval)) return true; // save last term just before next index term so we // can compute wasted suffix if (0 == _numTerms % _fgtiw._termIndexInterval) _lastTerm.CopyBytes(text); return false; }
public override void FinishTerm(BytesRef text, TermStats stats) { if (_numTerms > 0 && _numTerms%SKIP_INTERVAL == 0) { BufferSkip(); } // write term meta data into fst var longs = new long[_longsSize]; long delta = stats.TotalTermFreq - stats.DocFreq; if (stats.TotalTermFreq > 0) { if (delta == 0) { _statsOut.WriteVInt(stats.DocFreq << 1 | 1); } else { _statsOut.WriteVInt(stats.DocFreq << 1 | 0); _statsOut.WriteVLong(stats.TotalTermFreq - stats.DocFreq); } } else { _statsOut.WriteVInt(stats.DocFreq); } var state = _outerInstance.postingsWriter.NewTermState(); state.DocFreq = stats.DocFreq; state.TotalTermFreq = stats.TotalTermFreq; _outerInstance.postingsWriter.FinishTerm(state); _outerInstance.postingsWriter.EncodeTerm(longs, _metaBytesOut, _fieldInfo, state, true); for (var i = 0; i < _longsSize; i++) { _metaLongsOut.WriteVLong(longs[i] - _lastLongs[i]); _lastLongs[i] = longs[i]; } _metaLongsOut.WriteVLong(_metaBytesOut.FilePointer - _lastMetaBytesFp); _builder.Add(Util.ToIntsRef(text, _scratchTerm), _numTerms); _numTerms++; _lastMetaBytesFp = _metaBytesOut.FilePointer; }
/// <summary> /// Called sequentially on every term being written, /// returning <c>true</c> if this term should be indexed. /// </summary> public abstract bool IsIndexTerm(BytesRef term, TermStats stats);
public override void FinishTerm(BytesRef term, TermStats stats) { }
public override bool IsIndexTerm(BytesRef term, TermStats stats) { if (stats.DocFreq >= _docFreqThresh || _count >= _interval) { _count = 1; return true; } _count++; return false; }
public override void FinishTerm(BytesRef text, TermStats stats) { Debug.Assert(stats.DocFreq > 0); var isIndexTerm = _fieldIndexWriter.CheckIndexTerm(text, stats); if (isIndexTerm) { if (_pendingCount > 0) { // Instead of writing each term, live, we gather terms // in RAM in a pending buffer, and then write the // entire block in between index terms: FlushBlock(); } _fieldIndexWriter.Add(text, stats, _btw._output.FilePointer); } if (_pendingTerms.Length == _pendingCount) { var newArray = new TermEntry[ArrayUtil.Oversize(_pendingCount + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; Array.Copy(_pendingTerms, 0, newArray, 0, _pendingCount); for (var i = _pendingCount; i < newArray.Length; i++) { newArray[i] = new TermEntry(); } _pendingTerms = newArray; } var te = _pendingTerms[_pendingCount]; te.Term.CopyBytes(text); te.State = _postingsWriter.NewTermState(); te.State.DocFreq = stats.DocFreq; te.State.TotalTermFreq = stats.TotalTermFreq; _postingsWriter.FinishTerm(te.State); _pendingCount++; _numTerms++; }
public override bool IsIndexTerm(BytesRef term, TermStats stats) { if (_count >= _interval) { _count = 1; return true; } _count++; return false; }
/// <summary> /// Called sequentially on every term being written /// returning true if this term should be indexed /// </summary> public abstract bool IsIndexTerm(BytesRef term, TermStats stats);
public override bool IsIndexTerm(BytesRef term, TermStats stats) { return(rand.Next(gap) == gap / 2); }