/// <summary> /// Called when downsizing bitsets for serialization /// </summary> /// <param name="fieldInfo">The field with sparse set bits</param> /// <param name="initialSet">The bits accumulated</param> /// <returns> null or a hopefully more densely packed, smaller bitset</returns> public FuzzySet Downsize(FieldInfo fieldInfo, FuzzySet initialSet) { // Aim for a bitset size that would have 10% of bits set (so 90% of searches // would fail-fast) const float targetMaxSaturation = 0.1f; return initialSet.Downsize(targetMaxSaturation); }
public BloomFilteredFieldsProducer(BloomFilteringPostingsFormat outerInstance, SegmentReadState state) { var bloomFileName = IndexFileNames.SegmentFileName( state.SegmentInfo.Name, state.SegmentSuffix, BLOOM_EXTENSION); ChecksumIndexInput bloomIn = null; var success = false; try { bloomIn = state.Directory.OpenChecksumInput(bloomFileName, state.Context); var version = CodecUtil.CheckHeader(bloomIn, /*BLOOM_CODEC_NAME*/ outerInstance.Name, VERSION_START, VERSION_CURRENT); // Load the hash function used in the BloomFilter // hashFunction = HashFunction.forName(bloomIn.readString()); // Load the delegate postings format var delegatePostingsFormat = ForName(bloomIn.ReadString()); _delegateFieldsProducer = delegatePostingsFormat .FieldsProducer(state); var numBlooms = bloomIn.ReadInt32(); for (var i = 0; i < numBlooms; i++) { var fieldNum = bloomIn.ReadInt32(); var bloom = FuzzySet.Deserialize(bloomIn); var fieldInfo = state.FieldInfos.FieldInfo(fieldNum); _bloomsByFieldName.Add(fieldInfo.Name, bloom); } if (version >= VERSION_CHECKSUM) { CodecUtil.CheckFooter(bloomIn); } else { #pragma warning disable 612, 618 CodecUtil.CheckEOF(bloomIn); #pragma warning restore 612, 618 } IOUtils.Dispose(bloomIn); success = true; } finally { if (!success) { IOUtils.DisposeWhileHandlingException(bloomIn, _delegateFieldsProducer); } } }
public override FuzzySet GetSetForField(SegmentWriteState state, FieldInfo info) { return(FuzzySet.CreateSetBasedOnMaxMemory(1024)); }
public WrappedTermsConsumer(TermsConsumer termsConsumer, FuzzySet bloomFilter) { _delegateTermsConsumer = termsConsumer; _bloomFilter = bloomFilter; }
public BloomFilteredTermsEnum(Terms delegateTerms, TermsEnum reuseDelegate, FuzzySet filter) { _delegateTerms = delegateTerms; _reuseDelegate = reuseDelegate; this.filter = filter; }
public BloomFilteredTerms(Terms terms, FuzzySet filter) { _delegateTerms = terms; _filter = filter; }
public override bool IsSaturated(FuzzySet bloomFilter, FieldInfo fieldInfo) { // Don't bother saving bitsets if >90% of bits are set - we don't want to // throw any more memory at this problem. return(bloomFilter.GetSaturation() > 0.9f); }
public override FuzzySet GetSetForField(SegmentWriteState state, FieldInfo info) { //Assume all of the docs have a unique term (e.g. a primary key) and we hope to maintain a set with 10% of bits set return(FuzzySet.CreateSetBasedOnQuality(state.SegmentInfo.DocCount, 0.10f)); }
public override bool IsSaturated(FuzzySet bloomFilter, FieldInfo fieldInfo) { // Don't bother saving bitsets if >90% of bits are set - we don't want to // throw any more memory at this problem. return bloomFilter.GetSaturation() > 0.9f; }
private void SaveAppropriatelySizedBloomFilter(DataOutput bloomOutput, FuzzySet bloomFilter, FieldInfo fieldInfo) { var rightSizedSet = _bfpf._bloomFilterFactory.Downsize(fieldInfo, bloomFilter) ?? bloomFilter; rightSizedSet.Serialize(bloomOutput); }
public BloomFilteredTermsEnum(Terms delegateTerms, TermsEnum reuseDelegate, FuzzySet filter) { _delegateTerms = delegateTerms; _reuseDelegate = reuseDelegate; FILTER = filter; }
/// <summary> /// Used to determine if the given filter has reached saturation and should be retired i.e. not saved any more /// </summary> /// <param name="bloomFilter">The bloomFilter being tested</param> /// <param name="fieldInfo">The field with which this filter is associated</param> /// <returns>true if the set has reached saturation and should be retired</returns> public abstract bool IsSaturated(FuzzySet bloomFilter, FieldInfo fieldInfo);
public override bool IsSaturated(FuzzySet bloomFilter, FieldInfo fieldInfo) { // For test purposes always maintain the BloomFilter - even past the point // of usefulness when all bits are set return false; }
public override bool IsSaturated(FuzzySet bloomFilter, FieldInfo fieldInfo) { // For test purposes always maintain the BloomFilter - even past the point // of usefulness when all bits are set return(false); }