Exemple #1
0
        private ISpellChecker CreateSpellChecker(string[] testData)
        {
            var logger   = LogManager.GetLogger(typeof(SpellCheckerTests));
            var simpleBF = new SimpleBloomFilter(testData.Length + 23, new IHasher[] { new DJB2Hasher(), new SDBMHasher() }, logger);

            return(new PreloadedSpellChecker(simpleBF, testData, logger));
        }
Exemple #2
0
        public SortedDictionary <Number640, Data> Get(Number640 from, Number640 to, SimpleBloomFilter <Number160> contentKeyBloomFilter,
                                                      SimpleBloomFilter <Number160> versionKeyBloomFilter, SimpleBloomFilter <Number160> contentBloomFilter, int limit,
                                                      bool ascending, bool isBloomFilterAnd)
        {
            var rLock = RangeLock.Lock(from, to);

            try
            {
                var tmp = _backend.SubMap(from, to, limit, ascending);

                foreach (var kvp in tmp.ToList()) // iterate over copy
                {
                    // remove from original
                    if (kvp.Value.HasPrepareFlag)
                    {
                        tmp.Remove(kvp.Key);
                        continue;
                    }
                    if (isBloomFilterAnd)
                    {
                        if (!contentKeyBloomFilter.Contains(kvp.Key.ContentKey))
                        {
                            tmp.Remove(kvp.Key);
                            continue;
                        }
                        if (!versionKeyBloomFilter.Contains(kvp.Key.VersionKey))
                        {
                            tmp.Remove(kvp.Key);
                            continue;
                        }
                        if (!contentBloomFilter.Contains(kvp.Value.Hash))
                        {
                            tmp.Remove(kvp.Key);
                        }
                    }
                    else
                    {
                        if (contentKeyBloomFilter.Contains(kvp.Key.ContentKey))
                        {
                            tmp.Remove(kvp.Key);
                            continue;
                        }
                        if (versionKeyBloomFilter.Contains(kvp.Key.VersionKey))
                        {
                            tmp.Remove(kvp.Key);
                            continue;
                        }
                        if (contentBloomFilter.Contains(kvp.Value.Hash))
                        {
                            tmp.Remove(kvp.Key);
                        }
                    }
                }
                return(tmp);
            }
            finally
            {
                rLock.Unlock();
            }
        }
Exemple #3
0
 /// <summary>
 /// Searches for multiple content keys. There may be false positives.
 /// </summary>
 /// <param name="locationKey">The location key.</param>
 /// <param name="domainKey">The domain key.</param>
 /// <param name="keyBloomFilter">For Get() and Remove() one can provide a bloom filter of
 /// content keys and the remote peer indicates if those keys are on that peer.</param>
 public SearchValues(Number160 locationKey, Number160 domainKey, SimpleBloomFilter<Number160> keyBloomFilter)
 {
     LocationKey = locationKey;
     DomainKey = domainKey;
     ContentKey = null;
     KeyBloomFilter = keyBloomFilter;
     ContentBloomFilter = null;
     From = null;
     To = null;
 }
Exemple #4
0
 public Message SetBloomFilter(SimpleBloomFilter <Number160> bloomFilter)
 {
     if (!_presetContentTypes)
     {
         SetContentType(Content.BloomFilter);
     }
     if (_bloomFilterList == null)
     {
         _bloomFilterList = new List <SimpleBloomFilter <Number160> >(1);
     }
     _bloomFilterList.Add(bloomFilter);
     return(this);
 }
Exemple #5
0
        public void TestCase()
        {
            var hashes = new Func<string, uint>[]{StringHashFirstHalf,StringHashLastHalf};
            var size = Convert.ToInt32(Math.Pow(2, 16));

            IBloomFilter<string> bloomFilter = new SimpleBloomFilter<string>(size, hashes);
            bloomFilter.Add("haochi");
            bloomFilter.Add("chen");

            Assert.AreEqual(bloomFilter.Query("haochi"), Existence.MAYBE);
            Assert.AreEqual(bloomFilter.Query("chen"), Existence.MAYBE);
            Assert.AreEqual(bloomFilter.Query("orlando"), Existence.NO);
            Assert.AreEqual(bloomFilter.Query("bloom"), Existence.NO);
        }
Exemple #6
0
        public DigestInfo Digest(Number320 locationAndDomainKey, SimpleBloomFilter <Number160> keyBloomFilter, SimpleBloomFilter <Number160> contentBloomFilter, int limit, bool ascending, bool isBloomFilterAnd)
        {
            var digestInfo = new DigestInfo();
            var rLock      = Lock(locationAndDomainKey);

            try {
                var from = new Number640(locationAndDomainKey, Number160.Zero, Number160.Zero);
                var to   = new Number640(locationAndDomainKey, Number160.MaxValue, Number160.MaxValue);
                var tmp  = _backend.SubMap(from, to, limit, ascending);

                foreach (var kvp in tmp)
                {
                    if (isBloomFilterAnd)
                    {
                        if (keyBloomFilter == null || keyBloomFilter.Contains(kvp.Key.ContentKey))
                        {
                            if (contentBloomFilter == null || contentBloomFilter.Contains(kvp.Value.Hash))
                            {
                                if (!kvp.Value.HasPrepareFlag)
                                {
                                    digestInfo.Put(kvp.Key, kvp.Value.BasedOnSet);
                                }
                            }
                        }
                    }
                    else
                    {
                        if (keyBloomFilter == null || !keyBloomFilter.Contains(kvp.Key.ContentKey))
                        {
                            if (contentBloomFilter == null || !contentBloomFilter.Contains(kvp.Value.Hash))
                            {
                                if (!kvp.Value.HasPrepareFlag)
                                {
                                    digestInfo.Put(kvp.Key, kvp.Value.BasedOnSet);
                                }
                            }
                        }
                    }
                }
                return(digestInfo);
            }
            finally
            {
                rLock.Unlock();
            }
        }
Exemple #7
0
        private static Message CreateMessageBloomFilter()
        {
            var sampleBf1 = new SimpleBloomFilter <Number160>(2, 5);

            sampleBf1.Add(_sample160_1);

            var sampleBf2 = new SimpleBloomFilter <Number160>(2, 5);

            sampleBf2.Add(_sample160_2);
            sampleBf2.Add(_sample160_1);

            var sampleBf3 = new SimpleBloomFilter <Number160>(2, 5);

            sampleBf3.Add(_sample160_1);
            sampleBf3.Add(_sample160_2);
            sampleBf3.Add(_sample160_3);

            var sampleBf4 = new SimpleBloomFilter <Number160>(2, 5);

            sampleBf4.Add(_sample160_1);
            sampleBf4.Add(_sample160_2);
            sampleBf4.Add(_sample160_3);
            sampleBf4.Add(_sample160_4);

            var sampleBf5 = new SimpleBloomFilter <Number160>(2, 5);

            sampleBf5.Add(_sample160_1);
            sampleBf5.Add(_sample160_2);
            sampleBf5.Add(_sample160_3);
            sampleBf5.Add(_sample160_4);
            sampleBf5.Add(_sample160_5);

            var m = Utils2.CreateDummyMessage();

            m.SetBloomFilter(sampleBf1);
            m.SetBloomFilter(sampleBf2);
            m.SetBloomFilter(sampleBf3);
            m.SetBloomFilter(sampleBf4);
            m.SetBloomFilter(sampleBf5);
            m.SetBloomFilter(sampleBf1);
            m.SetBloomFilter(sampleBf2);
            m.SetBloomFilter(sampleBf3);
            return(m);
        }
Exemple #8
0
 public Message SetBloomFilter(SimpleBloomFilter<Number160> bloomFilter)
 {
     if (!_presetContentTypes)
     {
         SetContentType(Content.BloomFilter);
     }
     if (_bloomFilterList == null)
     {
         _bloomFilterList = new List<SimpleBloomFilter<Number160>>(1);
     }
     _bloomFilterList.Add(bloomFilter);
     return this;
 }
Exemple #9
0
 public DigestBuilder SetContentBloomFilter(SimpleBloomFilter <Number160> contentBloomFilter)
 {
     ContentBloomFilter = contentBloomFilter;
     return(this);
 }
Exemple #10
0
 public DigestBuilder SetKeyBloomFilter(SimpleBloomFilter <Number160> keyBloomFilter)
 {
     KeyBloomFilter = keyBloomFilter;
     return(this);
 }
Exemple #11
0
        public DigestInfo Digest(Number320 locationAndDomainKey, SimpleBloomFilter<Number160> keyBloomFilter, SimpleBloomFilter<Number160> contentBloomFilter, int limit, bool ascending, bool isBloomFilterAnd)
        {
            var digestInfo = new DigestInfo();
		    var rLock = Lock(locationAndDomainKey);
		    try {
			    var from = new Number640(locationAndDomainKey, Number160.Zero, Number160.Zero);
			    var to = new Number640(locationAndDomainKey, Number160.MaxValue, Number160.MaxValue);
			    var tmp = _backend.SubMap(from, to, limit, ascending);

			    foreach (var kvp in tmp)
                {
				    if (isBloomFilterAnd)
                    {
					    if (keyBloomFilter == null || keyBloomFilter.Contains(kvp.Key.ContentKey))
                        {
						    if (contentBloomFilter == null || contentBloomFilter.Contains(kvp.Value.Hash))
                            {
							    if (!kvp.Value.HasPrepareFlag)
                                {
								    digestInfo.Put(kvp.Key, kvp.Value.BasedOnSet);
							    }
						    }
					    }
				    }
                    else
                    {
					    if (keyBloomFilter == null || !keyBloomFilter.Contains(kvp.Key.ContentKey))
                        {
						    if (contentBloomFilter == null || !contentBloomFilter.Contains(kvp.Value.Hash))
                            {
							    if (!kvp.Value.HasPrepareFlag)
                                {
								    digestInfo.Put(kvp.Key, kvp.Value.BasedOnSet);
							    }
						    }
					    }
				    }
			    }
			    return digestInfo;
		    } 
            finally
            {
			    rLock.Unlock();
		    }
        }
Exemple #12
0
        public SortedDictionary<Number640, Data> Get(Number640 from, Number640 to, SimpleBloomFilter<Number160> contentKeyBloomFilter,
	        SimpleBloomFilter<Number160> versionKeyBloomFilter, SimpleBloomFilter<Number160> contentBloomFilter,  int limit, 
            bool ascending, bool isBloomFilterAnd)
        {
		    var rLock = RangeLock.Lock(from, to);
		    try
            {
			    var tmp = _backend.SubMap(from, to, limit, ascending);

                foreach (var kvp in tmp.ToList()) // iterate over copy
                {
                    // remove from original
                    if (kvp.Value.HasPrepareFlag)
                    {
                        tmp.Remove(kvp.Key);
                        continue;
                    }
                    if (isBloomFilterAnd)
                    {
                        if (!contentKeyBloomFilter.Contains(kvp.Key.ContentKey))
                        {
                            tmp.Remove(kvp.Key);
                            continue;
                        }
                        if (!versionKeyBloomFilter.Contains(kvp.Key.VersionKey))
                        {
                            tmp.Remove(kvp.Key);
                            continue;
                        }
                        if (!contentBloomFilter.Contains(kvp.Value.Hash))
                        {
                            tmp.Remove(kvp.Key);
                        }
                    }
                    else
                    {
                        if (contentKeyBloomFilter.Contains(kvp.Key.ContentKey))
                        {
						    tmp.Remove(kvp.Key);
						    continue;
					    }
					    if (versionKeyBloomFilter.Contains(kvp.Key.VersionKey))
                        {
						    tmp.Remove(kvp.Key);
						    continue;
					    }
					    if (contentBloomFilter.Contains(kvp.Value.Hash)) 
                        {
						    tmp.Remove(kvp.Key);
					    }
                    }
                }
			    return tmp;
		    }
            finally
            {
			    rLock.Unlock();
		    }
	    }
Exemple #13
0
 public GetBuilder SetVersionKeyBloomFilter(SimpleBloomFilter <Number160> versionKeyBloomFilter)
 {
     VersionKeyBloomFilter = versionKeyBloomFilter;
     return(this);
 }
        /// <summary>
        /// Similar to <seealso cref="BooleanQueryWithExclusionsFastAlternativeVersion"/> using a BloomFilter instead of a HashSet
        /// Load up the BloomFilter with the exclusions, then loop through the Base Query, until we have pageSize + Skip items that aren't in the BloomFilter.
        /// Expensive when there are LOTS of exclusions, but cheaper when the BaseQuery is large because we don't process all of it (stop when we have enough)
        /// </summary>
        internal List <Question> BooleanQueryWithExclusionsBloomFilterVersion(QueryType type, string tag, IList <string> excludedTags, int pageSize, int skip)
        {
            var gcInfo = new GCCollectionInfo();
            var timer  = Stopwatch.StartNew();

            TagByQueryLookup        queryInfo     = GetTagByQueryLookup(type);
            Func <Question, string> fieldSelector = GetFieldSelector(type);

            ThrowIfInvalidParameters(tag, pageSize, queryInfo);

            //int bloomFilterSize = 40 * 1000 * 1000; // million's, 40mil produces several False +ve's
            int bloomFilterSize = 100 * 1000 * 1000; // million's

#if DEBUG
            var bloomFilterCreationTimer = Stopwatch.StartNew();
            var bloomFilter = new SimpleBloomFilter(bloomFilterSize);
            bloomFilterCreationTimer.Stop();
            Logger.Log("Took {0} ({1:N2} ms) to create the bloom filter with {2:N0} bits ({3:N2} bytes)",
                       bloomFilterCreationTimer.Elapsed, bloomFilterCreationTimer.Elapsed.TotalMilliseconds, bloomFilterSize, bloomFilterSize / 8);
#else
            var bloomFilter = new SimpleBloomFilter(bloomFilterSize);
#endif

#if DEBUG
            //var tests = new[] { 1066589, 2793150, 364114, 910374 }; // These are the Question Id's NOT the array index ([]) values!!
            var tests     = new[] { 192257, 616585, 53029, 158368 }; // These ARE the array index ([]) values
            var debugging = cache.Value.GetCachedHashSet();
#endif
            foreach (var excludedTag in excludedTags)
            {
                foreach (var qu in queryInfo[excludedTag])
                {
                    bloomFilter.Add(qu);
#if DEBUG
                    debugging.Add(qu);

                    if (tests.Contains(qu))
                    {
                        // It it's false, it's DEFINITELY false
                        // It it's true, it could really be false (false +ve)
                        var possiblyExists = bloomFilter.PossiblyExists(qu, debugInfo: true);
                        Logger.Log("Bloom Filter.PossiblyExists - {0,8} = {1} ****", qu, possiblyExists);
                        Logger.Log("  DebuggingHashSet.Contains - {0,8} = {1} ****", qu, debugging.Contains(qu));
                    }
#endif
                }
            }
            var baseQuery = queryInfo[tag];
#if DEBUG
            var result =
                baseQuery.Where(b =>
            {
                var possiblyExists = bloomFilter.PossiblyExists(b);
                if (possiblyExists == false)
                {
                    return(true);        // we can use it
                }
                if (debugging.Contains(b) == false)
                {
                    var qu = questions[b];
                    Logger.Log("FALSE +VE: {0,8}, PossiblyExists = {1}, debugging.Contains() = {2}, Id = {3,8}, Tags = {4}",
                               b, possiblyExists, debugging.Contains(b), qu.Id, string.Join(",", qu.Tags));
                }
                return(false);        // we can't use it
            })
#else
            var result = baseQuery.Where(b => bloomFilter.PossiblyExists(b) == false)
#endif
                .Skip(skip)
                .Take(pageSize)
                .Select(i => questions[i])
                .ToList();
            timer.Stop();
            gcInfo.UpdateCollectionInfo();

            Logger.Log("Base Query: {0}, there are {1:N0} Excluded Tags", tag, excludedTags.Count);
            Results.AddData(timer.Elapsed.TotalMilliseconds.ToString("#.##"));
            using (Utils.SetConsoleColour(Utils.GetColorForTimespan(timer.Elapsed)))
            {
                Logger.Log("Boolean Query {0} against tag \"{1}\", pageSize = {2}, skip = {3}, took {4} ({5:N2} ms) - BLOOM",
                           type, tag, pageSize, skip, timer.Elapsed, timer.Elapsed.TotalMilliseconds);
            }
            //Log("Got {0} results, Bloom Filter contains {1:N0} items (some could be dupes), Truthiness {2:N2}",
            //    result.Count(), bloomFilter.NumberOfItems, bloomFilter.Truthiness);
            Logger.Log("Got {0} results, Bloom Filter contains {1:N0} items (some could be dupes)", result.Count(), bloomFilter.NumberOfItems);
            Logger.Log(gcInfo.ToString());
            //var formattedResults = result.Select(r => string.Format("Id: {0,8}, {1}: {2,4}, Tags: {3}, ", r.Id, type, fieldSelector(r), string.Join(",", r.Tags)));
            //Log("  {0}", string.Join("\n  ", formattedResults));
            Logger.Log("");

#if DEBUG
            foreach (var item in tests)
            {
                var possiblyExists = bloomFilter.PossiblyExists(item, debugInfo: true);
                Logger.Log("Bloom Filter.PossiblyExists - {0,8} = {1}", item, possiblyExists);
                Logger.Log("  DebuggingHashSet.Contains - {0,8} = {1}", item, debugging.Contains(item));
                Logger.Log("");
            }
            // When the values in "tests" represent Question Id
            //var testResults = tests.Select(t => questions.First(qu => qu.Id == t))
            //                       .Select(r => string.Format("Id: {0,8}, {1}: {2,4}, Tags: {3}, ", r.Id, type, fieldSelector(r), string.Join(",", r.Tags)));
            // When the values in "tests" represent array indexes, i.e. questions[x]
            var testResults = tests.Select(t => questions[t])
                              .Select(r => string.Format("Id: {0,8}, {1}: {2,4}, Tags: {3}, ", r.Id, type, fieldSelector(r), string.Join(",", r.Tags)));
            Logger.Log("  {0}", string.Join("\n  ", testResults));
#endif

            return(result);
        }