public void BloomFilter_DoesNotContain_ShouldDetectUncontained() { filter.Add("foo"); filter.Add("bar"); Assert.IsTrue(filter.DoesNotContain("foobar")); }
public void BloomFilterAddDifferentSizesTest() { var addSize = 10000; var testData = DataGenerator.Generate().Take(addSize).ToArray(); var testData2 = DataGenerator.Generate().Skip(addSize).Take(addSize).ToArray(); var errorRate = 0.001F; var size = testData.Length; var configuration = new DefaultBloomFilterConfiguration(); var bloomFilter = new BloomFilter <TestEntity, long>(configuration); bloomFilter.Initialize(4 * size, errorRate); foreach (var itm in testData) { bloomFilter.Add(itm); } var bloomFilter2 = new BloomFilter <TestEntity, long>(configuration); //We have to create a foldable version. var data = bloomFilter.Extract(); var foldFactor = configuration.FoldingStrategy.GetAllFoldFactors(data.BlockSize).Where(f => f > 1).OrderBy(f => f).First(); bloomFilter2.Initialize(addSize, data.BlockSize / foldFactor, data.HashFunctionCount); foreach (var itm in testData2) { bloomFilter2.Add(itm); } bloomFilter.Add(bloomFilter2); var contained = testData.Union(testData2).Count(item => bloomFilter.Contains(item)); Assert.AreEqual(2 * addSize, contained, "Not all items found in added Bloom filters"); }
public void AddMultipleDifferentItemsToBloomTest() { BloomConfiguration config = new BloomConfiguration { BloomFilterSize = 8 * (long)BloomFilterSizeMultipliers.Bits, }; BloomFilter bloomFilter = new BloomFilter(config); string testString = $"TestString"; byte[] testByteArray1 = Encoding.ASCII.GetBytes(testString); string testString2 = $"TestString2"; byte[] testByteArray2 = Encoding.ASCII.GetBytes(testString2); bloomFilter.Add(testByteArray1); bloomFilter.Add(testByteArray2); bool[] actual = bloomFilter.Filter; bool[] expected = { true, false, true, true, false, true, false, true }; Assert.True(CompareBoolArrays(expected, actual)); Assert.True(bloomFilter.Contains(testByteArray1)); Assert.True(bloomFilter.Contains(testByteArray2)); }
public void TestAddTen() { var filter = new BloomFilter(32); filter.Add(s0); filter.Add(s1); filter.Add(s2); filter.Add(s3); filter.Add(s4); filter.Add(s5); filter.Add(s6); filter.Add(s7); filter.Add(s8); filter.Add(s9); Assert.True(filter.IsValue(s0) == true); Assert.True(filter.IsValue(s1) == true); Assert.True(filter.IsValue(s2) == true); Assert.True(filter.IsValue(s3) == true); Assert.True(filter.IsValue(s4) == true); Assert.True(filter.IsValue(s5) == true); Assert.True(filter.IsValue(s6) == true); Assert.True(filter.IsValue(s7) == true); Assert.True(filter.IsValue(s8) == true); Assert.True(filter.IsValue(s9) == true); Assert.True(filter.IsValue("awdfbswer0") == false); int a = 0; }
public static void TestsIsValue() { var testFilter = new BloomFilter(32); Assert.AreEqual(32, testFilter.bitArray.Length); Assert.AreEqual(32, testFilter.filter_len); testFilter.Add(zero); testFilter.Add(one); for (int i = 0; i < testFilter.filter_len; i++) { if (i == 5 || i == 13 || i == 27 || i == 29) { Assert.IsTrue(testFilter.bitArray[i] == true); } else { Assert.IsTrue(testFilter.bitArray[i] == false); } } Assert.IsTrue(testFilter.IsValue(zero)); Assert.IsTrue(testFilter.IsValue(one)); Assert.IsTrue(testFilter.IsValue(two)); Assert.IsTrue(testFilter.IsValue(three)); Assert.IsTrue(testFilter.IsValue(four)); Assert.IsTrue(testFilter.IsValue(five)); Assert.IsTrue(testFilter.IsValue(six)); Assert.IsTrue(testFilter.IsValue(seven)); Assert.IsTrue(testFilter.IsValue(eigth)); Assert.IsTrue(testFilter.IsValue(nine)); }
public void TestTest() { int m = 7, n = 10; uint nTweak = 123456; BloomFilter filter = new BloomFilter(m, n, nTweak); Transaction tx = new Transaction { Script = TestUtils.GetByteArray(32, 0x42), SystemFee = 4200000000, Signers = new Signer[] { new Signer() { Account = (new byte[0]).ToScriptHash() } }, Attributes = Array.Empty <TransactionAttribute>(), Witnesses = new[] { new Witness { InvocationScript = new byte[0], VerificationScript = new byte[0] } } }; filter.Test(tx).Should().BeFalse(); filter.Add(tx.Witnesses[0].ScriptHash.ToArray()); filter.Test(tx).Should().BeTrue(); filter.Add(tx.Hash.ToArray()); filter.Test(tx).Should().BeTrue(); }
public void TestBit_4() { BloomFilter filter = new BloomFilter(32); filter.Add("0123456789"); filter.Add("1234567890"); filter.Add("2345678901"); filter.Add("3456789012"); filter.Add("4567890123"); filter.Add("5678901234"); filter.Add("6789012345"); filter.Add("7890123456"); filter.Add("8901234567"); filter.Add("9012345678"); Assert.AreEqual(true, filter.IsValue("0123456789")); Assert.AreEqual(true, filter.IsValue("1234567890")); Assert.AreEqual(true, filter.IsValue("2345678901")); Assert.AreEqual(true, filter.IsValue("3456789012")); Assert.AreEqual(true, filter.IsValue("4567890123")); Assert.AreEqual(true, filter.IsValue("5678901234")); Assert.AreEqual(true, filter.IsValue("6789012345")); Assert.AreEqual(true, filter.IsValue("7890123456")); Assert.AreEqual(true, filter.IsValue("8901234567")); Assert.AreEqual(true, filter.IsValue("9012345678")); }
public void TestTest() { int m = 7, n = 10; uint nTweak = 123456; BloomFilter filter = new BloomFilter(m, n, nTweak); Transaction tx = new Transaction { Script = TestUtils.GetByteArray(32, 0x42), Sender = UInt160.Zero, SystemFee = 4200000000, Attributes = new TransactionAttribute[0], Cosigners = new Cosigner[0], Witnesses = new[] { new Witness { InvocationScript = new byte[0], VerificationScript = new byte[0] } } }; filter.Test(tx).Should().BeFalse(); filter.Add(tx.Witnesses[0].ScriptHash.ToArray()); filter.Test(tx).Should().BeTrue(); filter.Add(tx.Hash.ToArray()); filter.Test(tx).Should().BeTrue(); }
public void BloomFilterAddTest() { var addSize = 10000; var testData = DataGenerator.Generate().Take(addSize).ToArray(); var testData2 = DataGenerator.Generate().Skip(addSize).Take(addSize).ToArray(); var errorRate = 0.001F; var size = testData.Length; var configuration = new DefaultBloomFilterConfiguration(); var bloomFilter = new BloomFilter <TestEntity, long>(configuration); bloomFilter.Initialize(2 * size, errorRate); foreach (var itm in testData) { bloomFilter.Add(itm); } var bloomFilter2 = new BloomFilter <TestEntity, long>(configuration); bloomFilter2.Initialize(2 * size, errorRate); foreach (var itm in testData2) { bloomFilter2.Add(itm); } bloomFilter.Add(bloomFilter2); var contained = testData.Union(testData2).Count(item => bloomFilter.ContainsKey(item.Id)); Assert.AreEqual(contained, 2 * addSize, "Not all items found in added Bloom filters"); }
public WellUsedBloomFilterTest() { Subject = new BloomFilter(); Subject.Add("foo"); Subject.Add("bar"); Subject.Add("baz"); Subject.Add("qux"); }
public void SettingInit()//进行Settings.SeedsAddress Settings.HrefKeywords urlFilterKeyWord 基础设定 { //webClient.DownloadFileCompleted += WebClient_DownloadFileCompleted; //种子地址需要加布隆过滤 //Settings.Depth = 4; //代理ip模式 Settings.IPProxyList = new List <IPProxy>(); Settings.IgnoreSucceedUrlToDB = true;//不添加地址到数据库 Settings.MaxReTryTimes = 20; Settings.ThreadCount = 1; Settings.Accept = "application/json, text/plain, */*"; // Settings.ContentType = "application/x-www-form-urlencoded"; Settings.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36"; Settings.HeadSetDic = new Dictionary <string, string>(); Settings.HeadSetDic.Add("Accept-Encoding", "gzip, deflate, br"); //Settings.UseSuperWebClient = true; //Settings.hi = new HttpInput(); //HttpManager.Instance.InitWebClient(Settings.hi, true, 30, 30); Console.WriteLine("正在获取已存在的url数据"); //布隆url初始化,防止重复读取url Console.WriteLine("正在初始化选择url队列"); //Settings.SimulateCookies = "serviceToken=rcu+GfCMDhx4ZCDJLkqDU3/2m8d3M3zMS0UfygZTLjHh0Pc1ch+8xHq9RcoydhhNhFpUIzLU+dE/QTFqlBNxUMxmE1Zm6Le0D5+Ued9T9M/4tRwfTIaqhcthlNd4mbjUOKcQmLv1Sl/mBIk7nYgGwC4wjcKOWoqhyScI3v/P63KN6/tHny5ukDe8nu4VfkLYty8g1R/J1xTzpeUe8Eua9pqnp8RfJxaijBkkXDc5CLCZieq2/Jdw7E1pbUUIMyaLLkGPX2qIr1PWV7k8hVi8Pg==; userId=86746990; jiamitu_slh=m+6hSHbUeRXZg+u7iCPkZycZ+Bs=; jiamitu_ph=iYZ5flgCd0IjNWfZk3N+Xw==; Hm_lvt_08ad038db088bb26c69084e80bc70125=1529372392,1529372396; Hm_lpvt_08ad038db088bb26c69084e80bc70125=1531119965"; var allCount = 697591; var allPage = (allCount / takeCount) + 1; //foreach (var region in regionList) { for (var pageIndex = 1; pageIndex <= 1; pageIndex++) { //https://jiamitu.mi.com/pet/data/list?page=1&limit=10&order=rare_degree&orderBy=desc&followUp=https:%2F%2Fjiamitu.mi.com%2Fhome var url = string.Format("https://jiamitu.mi.com/pet/data/list?page={0}&limit=10&order=generated_id&orderBy=desc&followUp=https:%2F%2Fjiamitu.mi.com%2Fhome%23", pageIndex, takeCount); if (!filter.Contains(url))//详情添加 { filter.Add(url); UrlQueue.Instance.EnQueue(new UrlInfo(url) { Depth = 1 }); } } } //Settings.SeedsAddress.Add(string.Format("http://fdc.fang.com/data/land/CitySelect.aspx")); Settings.RegularFilterExpressions.Add("XXX");//不添加其他 if (SimulateLogin()) { // Console.WriteLine("zluckymn模拟登陆成功"); } else { Console.WriteLine("初始化失败"); } }
public void Run() { if (!test.Contains("1234")) { test.Add("1234"); } iplist.Add(new proxy(Guid.NewGuid().ToString(), "1234", 2)); System.IO.File.AppendAllTextAsync("d:\\freeIP.txt", $"{DateTime.Now.ToString()}{Environment.NewLine}"); }
public void SettingInit()//进行Settings.SeedsAddress Settings.HrefKeywords urlFilterKeyWord 基础设定 { //种子地址需要加布隆过滤 //Settings.Depth = 4; //代理ip模式 //Settings.IPProxyList =IPProxyHelper.GetIpProxyList("2"); //var ipProxyList = dataop.FindAllByQuery("IPProxy", Query.NE("status", "1")).ToList(); // Settings.IPProxyList.AddRange(ipProxyList.Select(c => new IPProxy(c.Text("ip"))).Distinct()); // Settings.IPProxyList.Add(new IPProxy("1.209.188.180:8080")); Settings.IgnoreSucceedUrlToDB = true; Settings.ThreadCount = 2; Settings.DBSaveCountLimit = 1; Settings.MaxReTryTimes = 10; Settings.IgnoreFailUrl = true; //Settings.AutoSpeedLimit = true; //Settings.AutoSpeedLimitMaxMSecond = 1000; //Settings.CurWebProxy = GetWebProxy(); Settings.AccessToken = reqtoken; Settings.ContentType = "application/x-www-form-urlencoded"; this.Settings.UserAgent = "haodf_app/1.0"; Settings.PostEncoding = Encoding.UTF8; var allDoctorList = dataop.FindAllByQuery(DataTableNameDoctor, Query.NE("isScheduleUpdate", "1")).SetFields("guid").ToList(); foreach (var doctor in allDoctorList)//法庭 { var postData = string.Format("app=p&os=android&n=2&m=GT-I9300&v=5.2.5&di={0}&s=hd&doctorId={1}&deviceToken={0}&p=1&userId=0¤tUserId=0&sv=4.4.2&api=1.2", reqtoken, doctor.Text("guid")); UrlQueue.Instance.EnQueue(new UrlInfo(materialUrl) { Depth = 1, PostData = postData }); if (!guidFilter.Contains(doctor.Text("guid"))) { guidFilter.Add(doctor.Text("guid")); } } //var testUrl = "http://z.hc360.com/getmmtlast.cgi?dt=1&w=外墙面砖&v=59&e=100&c=供应信息&n=3101&m=2&H=1&bt=0"; //var testAuthorization = appHelper.GetHuiCongAuthorizationCode(testUrl); //UrlQueue.Instance.EnQueue(new UrlInfo(testUrl) { Depth = 1, Authorization = testAuthorization }); Console.WriteLine("正在加载账号数据"); //Settings.HrefKeywords.Add(string.Format("/market/"));//先不加其他的 //Settings.HrefKeywords.Add(string.Format("data/land/_________0_"));//先不加其他的 //是否guid //不进行地址爬取 Settings.RegularFilterExpressions.Add(@"luckymnXXXXXXXXXXXXXXXXXX"); if (SimulateLogin()) { // Console.WriteLine("zluckymn模拟登陆成功"); } else { Console.WriteLine("模拟登陆失败"); } }
public void SettingInit()//进行Settings.SeedsAddress Settings.HrefKeywords urlFilterKeyWord 基础设定 { //webClient.DownloadFileCompleted += WebClient_DownloadFileCompleted; //种子地址需要加布隆过滤 //Settings.Depth = 4; //代理ip模式 Settings.IPProxyList = new List <IPProxy>(); Settings.IgnoreSucceedUrlToDB = true;//不添加地址到数据库 Settings.MaxReTryTimes = 20; Settings.ThreadCount = 10; Settings.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"; Settings.ContentType = "application/x-www-form-urlencoded"; Settings.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36"; Settings.HeadSetDic = new Dictionary <string, string>(); Settings.HeadSetDic.Add("Accept-Encoding", "gzip, deflate"); //Settings.UseSuperWebClient = true; //Settings.hi = new HttpInput(); //HttpManager.Instance.InitWebClient(Settings.hi, true, 30, 30); Console.WriteLine("正在获取已存在的url数据"); //布隆url初始化,防止重复读取url Console.WriteLine("正在初始化选择url队列"); Settings.SimulateCookies = "UM_distinctid=163c909dac3a0b-0a58055a844938-737356c-13c680-163c909dac46fa; JSESSIONID=0000uekkAKP_Q0hcDP33LrLYCY5:-1; CNZZDATA4237675=cnzz_eid%3D1374545162-1528085523-null%26ntime%3D1528091007"; var regionList = dataop.FindAll(DataTableNameRegion).ToList(); var typeList = dataop.FindAll(DataTableNameType).ToList(); var region = new BsonDocument(); var type = new BsonDocument(); //foreach (var region in regionList) { // foreach (var type in typeList) { var url = string.Format("http://www.jyfcc.com.cn/PreSellCert_List.do?region={0}&type={1}", region.Text("name"), type.Text("name")); var postData = string.Format("region={0}&hsusage={1}&project=&developer=&button=%B2%E9%D1%AF", region.Text("id"), type.Text("id")); if (!filter.Contains(url))//详情添加 { filter.Add(url); UrlQueue.Instance.EnQueue(new UrlInfo(url) { Depth = 1, PostData = postData }); } } } //Settings.SeedsAddress.Add(string.Format("http://fdc.fang.com/data/land/CitySelect.aspx")); Settings.RegularFilterExpressions.Add("XXX");//不添加其他 if (SimulateLogin()) { // Console.WriteLine("zluckymn模拟登陆成功"); } else { Console.WriteLine("初始化失败"); } }
public void TestBloomFillRatio() { var f = new BloomFilter(100, 0.1); f.Add(A_BYTES); f.Add(B_BYTES); f.Add(C_BYTES); var ratio = f.FillRatio(); Assert.AreEqual(0.025, ratio); }
public void is_not_in_filter_int(int value) { var filter = new BloomFilter <int>(10, HashFunctions.HashInt); filter.Add(1); filter.Add(100); filter.Add(2); filter.Add(200); filter.Add(3); Assert.False(filter.Contains(value)); }
public void is_not_in_filter_string(string value) { var filter = new BloomFilter <string>(10, HashFunctions.HashString); filter.Add("bananas"); filter.Add("watermelons"); filter.Add("cantaloupes"); filter.Add("grapes"); filter.Add("grapefruits"); Assert.False(filter.Contains(value)); }
public void ContainsTest() { BloomFilter <string> bf = new BloomFilter <string>(20, 3); bf.Add("testing"); bf.Add("nottesting"); bf.Add("testingagain"); Assert.False(bf.Contains("badstring")); Assert.True(bf.Contains("testing")); Assert.True(bf.Contains("nottesting")); Assert.True(bf.Contains("testingagain")); }
public void TestAdd() { var filter = new BloomFilter(32); var slot1 = filter.Hash1(lines[0]); var slot2 = filter.Hash2(lines[0]); filter.Add(lines[0]); filter.Add(null); Assert.IsTrue(filter.IsValue(lines[0])); Assert.IsTrue(filter.bitArray[slot1]); Assert.IsTrue(filter.bitArray[slot2]); Assert.IsTrue(filter.IsValue(null)); Assert.IsTrue(filter.bitArray[0]); }
public void BasicTest() { var filter = new BloomFilter<string>(10000); filter.Add("foo"); filter.Add("bar"); Assert.IsTrue(filter.Contains("foo")); Assert.IsTrue(filter.Contains("bar")); Assert.IsFalse(filter.Contains("baz")); filter.Add("baz"); Assert.IsTrue(filter.Contains("baz")); }
public void SettingInit()//进行Settings.SeedsAddress Settings.HrefKeywords urlFilterKeyWord 基础设定 { //webClient.DownloadFileCompleted += WebClient_DownloadFileCompleted; //种子地址需要加布隆过滤 //Settings.Depth = 4; //代理ip模式 Settings.IPProxyList = new List <IPProxy>(); Settings.IgnoreSucceedUrlToDB = true;//不添加地址到数据库 Settings.MaxReTryTimes = 20; Settings.ThreadCount = 10; Settings.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"; Settings.ContentType = "application/x-www-form-urlencoded"; Settings.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36"; Settings.HeadSetDic = new Dictionary <string, string>(); Settings.HeadSetDic.Add("Accept-Encoding", "gzip, deflate"); Settings.SimulateCookies = "UM_distinctid=163c909dac3a0b-0a58055a844938-737356c-13c680-163c909dac46fa; JSESSIONID=0000uekkAKP_Q0hcDP33LrLYCY5:-1; CNZZDATA4237675=cnzz_eid%3D1374545162-1528085523-null%26ntime%3D1528091007"; Settings.Referer = "http://www.jyfcc.com.cn/PreSellCert_Detail.do?pscid=101074"; Console.WriteLine("正在获取已存在的url数据"); //布隆url初始化,防止重复读取url Console.WriteLine("正在初始化选择url队列"); projectList = dataop.FindAllByQuery(DataTableName, Query.NE("isUpdate", 1)).ToList(); //iCPH foreach (var proj in projectList) { var mhUrl = string.Format("http://www.jyfcc.com.cn/ifrm_House_List.do?pscid={0}", proj.Text("projId")); if (!filter.Contains(mhUrl))//具体页面 { filter.Add(mhUrl); UrlQueue.Instance.EnQueue(new UrlInfo(mhUrl) { Depth = 1, UniqueKey = proj.Text("projId") }); } } // UrlQueue.Instance.EnQueue(new UrlInfo("http://www.hhcool.com/cool286073/1.html?s=11&d=0"+"&checkPageCount=1") { Depth = 1 }); //Settings.SeedsAddress.Add(string.Format("http://fdc.fang.com/data/land/CitySelect.aspx")); Settings.RegularFilterExpressions.Add("XXX");//不添加其他 if (SimulateLogin()) { // Console.WriteLine("zluckymn模拟登陆成功"); } else { Console.WriteLine("初始化失败"); } }
public void always_return_true_if_an_item_was_added_for_large_n() { int n = 1234567; double p = 1.0e-6; BloomFilter filter = new BloomFilter(n, p); //no items added yet for (int i = 0; i <= n; i++) { Assert.IsFalse(filter.MayExist(i)); } //add the items for (int i = 0; i <= n; i++) { filter.Add(i); } //all the items should exist for (int i = 0; i <= n; i++) { Assert.IsTrue(filter.MayExist(i)); } }
public void Random() { Random random = new Random(); for (int i = 0; i < 5000; i++) { BloomFilter <int> bf = new BloomFilter <int>(); List <int> values = new List <int>(); for (int j = 0; j < 50; j++) { DataStructures.HashSet <int> hashSet = new DataStructures.HashSet <int>(); int rand = random.Next(); if (!hashSet.Contains(rand)) { values.Add(rand); hashSet.Add(rand); bf.Add(rand); } } foreach (int check in values) { Assert.True(bf.Contains(check)); } } }
public void Add1() { BloomFilter <int> bf = new BloomFilter <int>(); bf.Add(1); Assert.True(bf.Contains(1)); }
public void BloomFilterRemoveItemTest() { var addSize = 10000; var testData = DataGenerator.Generate().Take(addSize).ToArray(); var errorRate = 0.001F; var size = testData.Length; var configuration = new DefaultBloomFilterConfiguration(); var bloomFilter = new BloomFilter <TestEntity, long>(configuration); bloomFilter.Initialize(2 * size, errorRate); foreach (var itm in testData) { bloomFilter.Add(itm); } var contained = testData.Count(item => bloomFilter.Contains(item)); foreach (var item in testData.Take(addSize / 2)) { bloomFilter.Remove(item); } var containedAfterRemove = testData.Count(item => bloomFilter.Contains(item)); //Bloom filter does not behave well under removal Assert.AreEqual(containedAfterRemove, 4137, "Wrong item count after removal."); }
public void Probability_of_false_positive_is_accurate_when_filter_is_at_capacity() { var attempts = 3; // this test occasionally fails due to the odd outlier, since it's probabilistic, so we retry it a few times while (attempts-- > 0) { var filter = new BloomFilter(1000, .01); var stringsInFilter = Enumerable.Range(1, 1000).Select(_ => Guid.NewGuid().ToString()); foreach (var s in stringsInFilter) { filter.Add(s); } var falsePositives = Enumerable.Range(1001, 10000) .Select(i => i.ToString()) .Where(s => filter.MayContain(s)) .ToList(); Console.WriteLine(falsePositives.Count + " false positives"); Console.WriteLine(falsePositives.ToLogString()); try { falsePositives.Count.Should().BeInRange(70, 120); return; } catch (Exception) when (attempts == 0) { throw; } } }
private static void VerifyFalsePositiveRate(int numberOfItems, double targetFalsePositiveRate, int randomSeed) { const int NumberOfIntentionallyAbsentItems = 10000; const double AllowedErrorMagnitude = 0.2; var filter = new BloomFilter(BloomFilter.Parameters.CreateOptimalWithFalsePositiveProbability(numberOfItems, targetFalsePositiveRate)); Item[] allItems = GenerateUniqueRandomItems(new Random(randomSeed), numberOfItems + NumberOfIntentionallyAbsentItems); for (int i = 0; i < numberOfItems; i++) { filter.Add(allItems[i].GetHash()); } int numberOfFalsePositives = 0; for (int i = numberOfItems; i < allItems.Length; i++) { if (filter.PossiblyContains(allItems[i].GetHash())) { numberOfFalsePositives++; } } double expectedFalsePositives = targetFalsePositiveRate * NumberOfIntentionallyAbsentItems; double error = (expectedFalsePositives - numberOfFalsePositives) / expectedFalsePositives; XAssert.IsTrue( Math.Abs(error) <= AllowedErrorMagnitude, "The false-positive rate was signficantly different than theoretical. This should be unlikely. Actual: {0} Expected: {1} Error ratio: {2} (allowed {3})", numberOfFalsePositives, expectedFalsePositives, error, AllowedErrorMagnitude); }
public void support_adding_large_values() { int n = 1234567; double p = 1.0e-6; BloomFilter filter = new BloomFilter(n, p); long[] items = { 192389123812L, 286928492L, 27582928698L, 72669175482L, 1738996371L, 939342020387L, 37253255484L, 346536436L, 123921398432L, 8324982394329432L, 183874782348723874L, long.MaxValue }; //no items added yet for (int i = 0; i < items.Length; i++) { Assert.IsFalse(filter.MayExist(items[i])); } //add the items for (int i = 0; i < items.Length; i++) { filter.Add(items[i]); } //all the items should exist for (int i = 0; i < items.Length; i++) { Assert.IsTrue(filter.MayExist(items[i])); } //all the neighbouring items should probably not exist for (int i = 0; i < items.Length; i++) { Assert.IsFalse(filter.MayExist(items[i] - 1)); Assert.IsFalse(filter.MayExist(items[i] + 1)); } }
public void TestBit_1() { BloomFilter filter = new BloomFilter(32); filter.Add("floor"); Assert.AreEqual(true, filter.IsValue("floor")); }
private void OnFilterAddMessageReceived(FilterAddPayload payload) { if (bloom_filter != null) { bloom_filter.Add(payload.Data); } }
public void always_return_true_if_an_item_was_added() { for (int n = 1; n <= 1000; n++) { for (double p = 0.1; p > 1.0e-7; p /= 10.0) { BloomFilter filter = new BloomFilter(n, p); //no items added yet for (int i = 0; i <= n; i++) { Assert.IsFalse(filter.MayExist(i)); } //add the items for (int i = 0; i <= n; i++) { filter.Add(i); } //all the items should exist for (int i = 0; i <= n; i++) { Assert.IsTrue(filter.MayExist(i)); } } } }
public void For_a_value_that_was_added_Contains_returns_Maybe() { var filter = new BloomFilter(); var item = Guid.NewGuid().ToString(); filter.Add(item); filter.MayContain(item).Should().BeTrue(); }
public void have_false_positives_with_probability_p_for_large_n() { int n = 1234567; for (double p = 0.1; p > 1.0e-7; p /= 10.0) { BloomFilter filter = new BloomFilter(n, p); //add only odd numbers for (int i = 1; i <= n; i += 2) { filter.Add(i); } //expected number of false positives int expectedFalsePositives = (int)Math.Ceiling(n * p / 2.0); //none of these items should exist but there may be some false positives int falsePositives = 0; for (int i = 2; i <= n; i += 2) { if (filter.MayExist(i)) { falsePositives++; } } if (falsePositives > 0) { Console.Out.WriteLine("n: {0}, p:{1}. Found {2} false positives. Expected false positives: {3}", n, p, falsePositives, expectedFalsePositives); } Assert.LessOrEqual(falsePositives, expectedFalsePositives); } }
public void FalsePositivesEstimationIsCorrect() { const int s = 10000, n = s * 7 + 1; var set = new HashSet<double>(); var r = new Random(); var k = (int)Math.Round(BloomFilter<double>.GetOptimalNumberOfHashFunctions(n, s)); var h = EnumerateHashFunctions(n).Take(k).ToArray(); var bloom = new BloomFilter<double>(n, h); for (int i = 0; i < s; i++) { double next = 100 * r.NextDouble(); set.Add(next); bloom.Add(next); } double estimated = BloomFilter<double>.GetEstimatedFalsePositiveProbability(k, n, set.Count); int errors = 0; const int trials = 1000000; for (int i = 0; i < trials;) { double next = 100 * r.NextDouble(); if (!set.Contains(next)) { if (bloom.Contains(next)) ++errors; ++i; } } double actual = errors / (double)trials; Assert.Equal(estimated, actual, 3); }
public void For_a_value_that_was_not_added_Contains_returns_DefinitelyNot() { var filter = new BloomFilter(); filter.Add(Guid.NewGuid().ToString()); filter.MayContain(Guid.NewGuid().ToString()) .Should() .BeFalse(); }
public void FalsePostiveRateCrossesThresholdAtCorrectCount() { var filter = new BloomFilter<int>(100, 0.1f); for (int i = 0; i < 99; i++) { filter.Add(i); Assert.IsTrue(filter.Contains(i)); } Assert.IsFalse(filter.FalsePositiveRate > 0.1f); filter.Add(1000); filter.Add(1001); filter.Add(1002); Assert.IsTrue(filter.FalsePositiveRate > 0.1f); }
public void BasicBloomFilterCorrectlyActsAsASet() { BloomFilter<int> filter = new BloomFilter<int>(100, 2); //10 cannot already be in the collection, so inserting it must succeed Assert.IsFalse(filter.Add(10)); Assert.IsTrue(filter.Add(10)); //10 is in the collection Assert.IsTrue(filter.Contains(10)); //check a load more numbers for (int i = 0; i < 100; i++) { filter.Add(i); Assert.IsTrue(filter.Contains(i)); } }
static void Main() { BloomFilter<string> bf = new BloomFilter<string>(20, 3); bf.Add("testing"); bf.Add("nottesting"); bf.Add("testingagain"); Console.WriteLine(bf.Contains("badstring")); // False Console.WriteLine(bf.Contains("testing")); // True List<string> testItems = new List<string>() { "badstring", "testing", "test" }; Console.WriteLine(bf.ContainsAll(testItems)); // False Console.WriteLine(bf.ContainsAny(testItems)); // True // False Positive Probability: 0.040894188143892 Console.WriteLine("False Positive Probability: " + bf.FalsePositiveProbability()); }
public void ToBase64String_and_string_constructor_correctly_serialize_and_deserialize_BloomFilter_state() { var filter = new BloomFilter(); var item = Guid.NewGuid().ToString(); filter.Add(item); Console.WriteLine(filter.ToString()); filter = new BloomFilter(filter.ToString()); filter.MayContain(item) .Should() .BeTrue(); }
public void when_Contains_is_DefinitelyNot_then_it_definitely_is_not_in_the_set() { var set = new HashSet<string>(Enumerable.Range(1, 100000).Select(_ => Guid.NewGuid().ToString())); var filter = new BloomFilter(100, .7); foreach (var s in set) { filter.Add(s); } foreach (var s in set.Where(s => !filter.MayContain(s))) { set.Contains(s).Should().Be(false); } }
public void NoFalseNegativesTest() { const int s = 10000, n = s * 7 + 1; var set = new HashSet<double>(); var r = new Random(); var k = (int)Math.Round(BloomFilter<double>.GetOptimalNumberOfHashFunctions(n, s)); var h = EnumerateHashFunctions(n).Take(k).ToArray(); var bloom = new BloomFilter<double>(n, h); for (int i = 0; i < s; i++) { double next = 100 * r.NextDouble(); set.Add(next); bloom.Add(next); } Assert.False(set.Any(d => !bloom.Contains(d))); }
public void BaseParalleUselTest(int testNum) { var bf = new BloomFilter<string>(100000000, 0.001f); Parallel.For(0, testNum, i => bf.Add(GetTestString(i).ToString())); var failCount = 0; Parallel.For(0, testNum, i => { if (!bf.Contains(GetTestString(i).ToString())) { //Trace.WriteLine($"{i}不存在"); failCount += 1; } }); Trace.WriteLine($"测试写入{testNum}个对象, 共有{failCount}个({(float)failCount / testNum:f6})对象不存在"); }
public void Probability_of_false_positive_is_accurate_when_filter_is_at_capacity() { var filter = new BloomFilter(1000, .01); var stringsInFilter = Enumerable.Range(1, 1000).Select(_ => Guid.NewGuid().ToString()); foreach (var s in stringsInFilter) { filter.Add(s); } var falsePositives = Enumerable.Range(1001, 10000) .Select(i => i.ToString()) .Where(s => filter.MayContain(s)) .ToList(); Console.WriteLine(falsePositives.Count() + " false positives"); Console.WriteLine(falsePositives.ToLogString()); falsePositives.Count.Should().BeInRange(70, 120); }
public void Reindex() { lock (_bloomfilter) { var newsize = this.Count() + Configuration.DictionarySplitSize * 2; var newbloom = new BloomFilter<string>(newsize); foreach (var d in _primarystore) foreach (var k in d.Keys) newbloom.Add(k); _bloomfilter = newbloom; _indexpossibleincoerences = 0; } }
public static int Main() { var hashes = new IStringHash[] { new StandardHash (), new MurmurHash2Simple (), new SuperFastHashSimple (), /* new CryptographicHash (MD5.Create ()), new CryptographicHash (SHA1.Create ()), new CryptographicHash (RIPEMD160Managed.Create ()), new CryptographicHash (MACTripleDES.Create ()) */ }; var bloom = new BloomFilter (1000003, hashes); var positive = new List<string> (); var negative = new List<string> (); var toggle = true; foreach (var line in File.ReadAllLines ("/usr/share/dict/words")) { var l = line.Trim (); if (toggle) { positive.Add (l); bloom.Add (l); } else { negative.Add (l); } toggle = !toggle; } Console.WriteLine ("occupancy for " + positive.Count + " words: " + bloom.Occupancy ()); foreach (var line in positive) { if (!bloom.Lookup (line)) { Console.WriteLine ("error!"); return 1; } } int false_positives = 0; foreach (var line in negative) { if (bloom.Lookup (line)) ++false_positives; } Console.WriteLine ("false positives: " + ((float)false_positives / negative.Count)); return 0; }
public async Task BloomFilter_can_be_round_tripped_through_JSON_serialization() { var filter = new BloomFilter(capacity: 10000); filter.Add("one"); filter.Add("two"); filter.Add("three"); var json = JsonConvert.SerializeObject(filter, Formatting.Indented); Console.WriteLine(json); var filter2 = JsonConvert.DeserializeObject<BloomFilter>(json); filter2.MayContain("one").Should().BeTrue(); filter2.MayContain("two").Should().BeTrue(); filter2.MayContain("three").Should().BeTrue(); filter2.MayContain("false").Should().BeFalse(); }
public void Size_comparison_vs_full_set() { var capacity = 100000; var filter = new BloomFilter(capacity, .0000001); var list = Enumerable.Range(1, capacity).Select(i => Guid.NewGuid().ToString()).ToList(); foreach (var s in list) { filter.Add(s); } File.WriteAllText(@"c:\temp\list.txt", string.Join("", list)); File.WriteAllText(@"c:\temp\filter.txt", filter.ToString()); }