/// <summary>
 /// Called when downsizing bitsets for serialization
 /// </summary>
 /// <param name="fieldInfo">The field with sparse set bits</param>
 /// <param name="initialSet">The bits accumulated</param>
 /// <returns> null or a hopefully more densely packed, smaller bitset</returns>
 public FuzzySet Downsize(FieldInfo fieldInfo, FuzzySet initialSet)
 {
     // Aim for a bitset size that would have 10% of bits set (so 90% of searches
     // would fail-fast)
     const float targetMaxSaturation = 0.1f;
     return initialSet.Downsize(targetMaxSaturation);
 }
Example #2
0
            public BloomFilteredFieldsProducer(BloomFilteringPostingsFormat outerInstance, SegmentReadState state)
            {
                var bloomFileName = IndexFileNames.SegmentFileName(
                    state.SegmentInfo.Name, state.SegmentSuffix, BLOOM_EXTENSION);
                ChecksumIndexInput bloomIn = null;
                var success = false;

                try
                {
                    bloomIn = state.Directory.OpenChecksumInput(bloomFileName, state.Context);
                    var version = CodecUtil.CheckHeader(bloomIn, /*BLOOM_CODEC_NAME*/ outerInstance.Name, VERSION_START, VERSION_CURRENT);
                    // Load the hash function used in the BloomFilter
                    // hashFunction = HashFunction.forName(bloomIn.readString());
                    // Load the delegate postings format
                    var delegatePostingsFormat = ForName(bloomIn.ReadString());

                    _delegateFieldsProducer = delegatePostingsFormat
                                              .FieldsProducer(state);
                    var numBlooms = bloomIn.ReadInt32();
                    for (var i = 0; i < numBlooms; i++)
                    {
                        var fieldNum  = bloomIn.ReadInt32();
                        var bloom     = FuzzySet.Deserialize(bloomIn);
                        var fieldInfo = state.FieldInfos.FieldInfo(fieldNum);
                        _bloomsByFieldName.Add(fieldInfo.Name, bloom);
                    }

                    if (version >= VERSION_CHECKSUM)
                    {
                        CodecUtil.CheckFooter(bloomIn);
                    }
                    else
                    {
#pragma warning disable 612, 618
                        CodecUtil.CheckEOF(bloomIn);
#pragma warning restore 612, 618
                    }

                    IOUtils.Dispose(bloomIn);
                    success = true;
                }
                finally
                {
                    if (!success)
                    {
                        IOUtils.DisposeWhileHandlingException(bloomIn, _delegateFieldsProducer);
                    }
                }
            }
 public override FuzzySet GetSetForField(SegmentWriteState state, FieldInfo info)
 {
     return(FuzzySet.CreateSetBasedOnMaxMemory(1024));
 }
 public WrappedTermsConsumer(TermsConsumer termsConsumer, FuzzySet bloomFilter)
 {
     _delegateTermsConsumer = termsConsumer;
     _bloomFilter           = bloomFilter;
 }
 public BloomFilteredTermsEnum(Terms delegateTerms, TermsEnum reuseDelegate, FuzzySet filter)
 {
     _delegateTerms = delegateTerms;
     _reuseDelegate = reuseDelegate;
     this.filter    = filter;
 }
 public BloomFilteredTerms(Terms terms, FuzzySet filter)
 {
     _delegateTerms = terms;
     _filter        = filter;
 }
 public override bool IsSaturated(FuzzySet bloomFilter, FieldInfo fieldInfo)
 {
     // Don't bother saving bitsets if >90% of bits are set - we don't want to
     // throw any more memory at this problem.
     return(bloomFilter.GetSaturation() > 0.9f);
 }
 public override FuzzySet GetSetForField(SegmentWriteState state, FieldInfo info)
 {
     //Assume all of the docs have a unique term (e.g. a primary key) and we hope to maintain a set with 10% of bits set
     return(FuzzySet.CreateSetBasedOnQuality(state.SegmentInfo.DocCount, 0.10f));
 }
 public override bool IsSaturated(FuzzySet bloomFilter, FieldInfo fieldInfo)
 {
     // Don't bother saving bitsets if >90% of bits are set - we don't want to
     // throw any more memory at this problem.
     return bloomFilter.GetSaturation() > 0.9f;
 }
            private void SaveAppropriatelySizedBloomFilter(DataOutput bloomOutput,
                FuzzySet bloomFilter, FieldInfo fieldInfo)
            {

                var rightSizedSet = _bfpf._bloomFilterFactory.Downsize(fieldInfo,
                    bloomFilter) ?? bloomFilter;

                rightSizedSet.Serialize(bloomOutput);
            }
 public BloomFilteredTermsEnum(Terms delegateTerms, TermsEnum reuseDelegate, FuzzySet filter)
 {
     _delegateTerms = delegateTerms;
     _reuseDelegate = reuseDelegate;
     FILTER = filter;
 }
 public BloomFilteredTerms(Terms terms, FuzzySet filter)
 {
     _delegateTerms = terms;
     _filter = filter;
 }
Example #13
0
 /// <summary>
 /// Used to determine if the given filter has reached saturation and should be retired i.e. not saved any more
 /// </summary>
 /// <param name="bloomFilter">The bloomFilter being tested</param>
 /// <param name="fieldInfo">The field with which this filter is associated</param>
 /// <returns>true if the set has reached saturation and should be retired</returns>
 public abstract bool IsSaturated(FuzzySet bloomFilter, FieldInfo fieldInfo);
 public override bool IsSaturated(FuzzySet bloomFilter, FieldInfo fieldInfo)
 {
     // For test purposes always maintain the BloomFilter - even past the point
     // of usefulness when all bits are set
     return false;
 }
 public override bool IsSaturated(FuzzySet bloomFilter, FieldInfo fieldInfo)
 {
     // For test purposes always maintain the BloomFilter - even past the point
     // of usefulness when all bits are set
     return(false);
 }
Example #16
0
 /// <summary>
 /// Used to determine if the given filter has reached saturation and should be retired i.e. not saved any more
 /// </summary>
 /// <param name="bloomFilter">The bloomFilter being tested</param>
 /// <param name="fieldInfo">The field with which this filter is associated</param>
 /// <returns>true if the set has reached saturation and should be retired</returns>
 public abstract bool IsSaturated(FuzzySet bloomFilter, FieldInfo fieldInfo);
 public WrappedTermsConsumer(TermsConsumer termsConsumer, FuzzySet bloomFilter)
 {
     _delegateTermsConsumer = termsConsumer;
     _bloomFilter = bloomFilter;
 }