public void bloom_create_insert_serialize_with_tweak() { // Same test as bloom_create_insert_serialize, but we add a nTweak of 100 BloomFilter filter = new BloomFilter(3, 0.01, 2147483649, BloomFlags.UPDATE_ALL); filter.Insert(ParseHex("99108ad8ed9bb6274d3980bab5a85c048f0950c8")); Assert.True(filter.Contains(ParseHex("99108ad8ed9bb6274d3980bab5a85c048f0950c8")), "BloomFilter doesn't contain just-inserted object!"); // One bit different in first byte Assert.True(!filter.Contains(ParseHex("19108ad8ed9bb6274d3980bab5a85c048f0950c8")), "BloomFilter contains something it shouldn't!"); filter.Insert(ParseHex("b5a2c786d9ef4658287ced5914b37a1b4aa32eee")); Assert.True(filter.Contains(ParseHex("b5a2c786d9ef4658287ced5914b37a1b4aa32eee")), "BloomFilter doesn't contain just-inserted object (2)!"); filter.Insert(ParseHex("b9300670b4c5366e95b2699e8b18bc75e5f729c5")); Assert.True(filter.Contains(ParseHex("b9300670b4c5366e95b2699e8b18bc75e5f729c5")), "BloomFilter doesn't contain just-inserted object (3)!"); var ms = new MemoryStream(); BitcoinStream bitcoinStream = new BitcoinStream(ms, true); bitcoinStream.ReadWrite(filter); var expected = ParseHex("03ce4299050000000100008001"); AssertEx.CollectionEquals(expected, ms.ToArray()); }
static void Main(string[] args) { var filter = new BloomFilter(HashFunction.Fnv | HashFunction.Murmur, 10000000, 0.0001d); Console.WriteLine(filter.ArraySize); var test = "Hello my name is Terry"; Timed(() => filter.Add(test)).Speak(); Timed(() => filter.Add(test)).Speak(); Timed(() => filter.Add(test)).Speak(); Timed(() => filter.Add(test)).Speak(); Timed(() => filter.Add(test)).Speak(); Timed(() => filter.Contains(test)).Speak(); var test2 = "Hello my name is Bob"; Timed(() => filter.Contains(test2)).Speak(); Timed(() => filter.Add(test2)).Speak(); Timed(() => filter.Contains(test2)).Speak(); Timed(() => filter.Add("an boobus")).Speak(); }
public void BloomFilterRemoveItemTest() { var addSize = 10000; var testData = DataGenerator.Generate().Take(addSize).ToArray(); var errorRate = 0.001F; var size = testData.Length; var configuration = new DefaultBloomFilterConfiguration(); var bloomFilter = new BloomFilter <TestEntity, long>(configuration); bloomFilter.Initialize(2 * size, errorRate); foreach (var itm in testData) { bloomFilter.Add(itm); } var contained = testData.Count(item => bloomFilter.Contains(item)); foreach (var item in testData.Take(addSize / 2)) { bloomFilter.Remove(item); } var containedAfterRemove = testData.Count(item => bloomFilter.Contains(item)); //Bloom filter does not behave well under removal Assert.AreEqual(containedAfterRemove, 4137, "Wrong item count after removal."); }
public void AddMultipleDifferentItemsToBloomTest() { BloomConfiguration config = new BloomConfiguration { BloomFilterSize = 8 * (long)BloomFilterSizeMultipliers.Bits, }; BloomFilter bloomFilter = new BloomFilter(config); string testString = $"TestString"; byte[] testByteArray1 = Encoding.ASCII.GetBytes(testString); string testString2 = $"TestString2"; byte[] testByteArray2 = Encoding.ASCII.GetBytes(testString2); bloomFilter.Add(testByteArray1); bloomFilter.Add(testByteArray2); bool[] actual = bloomFilter.Filter; bool[] expected = { true, false, true, true, false, true, false, true }; Assert.True(CompareBoolArrays(expected, actual)); Assert.True(bloomFilter.Contains(testByteArray1)); Assert.True(bloomFilter.Contains(testByteArray2)); }
public void bloom_create_insert_serialize() { BloomFilter filter = new BloomFilter(3, 0.01, 0, BloomFlags.UPDATE_ALL); filter.Insert(ParseHex("99108ad8ed9bb6274d3980bab5a85c048f0950c8")); Assert.True(filter.Contains(ParseHex("99108ad8ed9bb6274d3980bab5a85c048f0950c8")), "BloomFilter doesn't contain just-inserted object!"); // One bit different in first byte Assert.True(!filter.Contains(ParseHex("19108ad8ed9bb6274d3980bab5a85c048f0950c8")), "BloomFilter contains something it shouldn't!"); filter.Insert(ParseHex("b5a2c786d9ef4658287ced5914b37a1b4aa32eee")); Assert.True(filter.Contains(ParseHex("b5a2c786d9ef4658287ced5914b37a1b4aa32eee")), "BloomFilter doesn't contain just-inserted object (2)!"); filter.Insert(ParseHex("b9300670b4c5366e95b2699e8b18bc75e5f729c5")); Assert.True(filter.Contains(ParseHex("b9300670b4c5366e95b2699e8b18bc75e5f729c5")), "BloomFilter doesn't contain just-inserted object (3)!"); var ms = new MemoryStream(); BitcoinStream bitcoinStream = new BitcoinStream(ms, true); bitcoinStream.ReadWrite(filter); var expected = ParseHex("03614e9b050000000000000001"); AssertEx.CollectionEquals(expected, ms.ToArray()); }
public void SettingInit()//进行Settings.SeedsAddress Settings.HrefKeywords urlFilterKeyWord 基础设定 { //webClient.DownloadFileCompleted += WebClient_DownloadFileCompleted; //种子地址需要加布隆过滤 //Settings.Depth = 4; //代理ip模式 Settings.IPProxyList = new List <IPProxy>(); Settings.IgnoreSucceedUrlToDB = true;//不添加地址到数据库 Settings.MaxReTryTimes = 20; Settings.ThreadCount = 1; Settings.Accept = "application/json, text/plain, */*"; // Settings.ContentType = "application/x-www-form-urlencoded"; Settings.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36"; Settings.HeadSetDic = new Dictionary <string, string>(); Settings.HeadSetDic.Add("Accept-Encoding", "gzip, deflate, br"); //Settings.UseSuperWebClient = true; //Settings.hi = new HttpInput(); //HttpManager.Instance.InitWebClient(Settings.hi, true, 30, 30); Console.WriteLine("正在获取已存在的url数据"); //布隆url初始化,防止重复读取url Console.WriteLine("正在初始化选择url队列"); //Settings.SimulateCookies = "serviceToken=rcu+GfCMDhx4ZCDJLkqDU3/2m8d3M3zMS0UfygZTLjHh0Pc1ch+8xHq9RcoydhhNhFpUIzLU+dE/QTFqlBNxUMxmE1Zm6Le0D5+Ued9T9M/4tRwfTIaqhcthlNd4mbjUOKcQmLv1Sl/mBIk7nYgGwC4wjcKOWoqhyScI3v/P63KN6/tHny5ukDe8nu4VfkLYty8g1R/J1xTzpeUe8Eua9pqnp8RfJxaijBkkXDc5CLCZieq2/Jdw7E1pbUUIMyaLLkGPX2qIr1PWV7k8hVi8Pg==; userId=86746990; jiamitu_slh=m+6hSHbUeRXZg+u7iCPkZycZ+Bs=; jiamitu_ph=iYZ5flgCd0IjNWfZk3N+Xw==; Hm_lvt_08ad038db088bb26c69084e80bc70125=1529372392,1529372396; Hm_lpvt_08ad038db088bb26c69084e80bc70125=1531119965"; var allCount = 697591; var allPage = (allCount / takeCount) + 1; //foreach (var region in regionList) { for (var pageIndex = 1; pageIndex <= 1; pageIndex++) { //https://jiamitu.mi.com/pet/data/list?page=1&limit=10&order=rare_degree&orderBy=desc&followUp=https:%2F%2Fjiamitu.mi.com%2Fhome var url = string.Format("https://jiamitu.mi.com/pet/data/list?page={0}&limit=10&order=generated_id&orderBy=desc&followUp=https:%2F%2Fjiamitu.mi.com%2Fhome%23", pageIndex, takeCount); if (!filter.Contains(url))//详情添加 { filter.Add(url); UrlQueue.Instance.EnQueue(new UrlInfo(url) { Depth = 1 }); } } } //Settings.SeedsAddress.Add(string.Format("http://fdc.fang.com/data/land/CitySelect.aspx")); Settings.RegularFilterExpressions.Add("XXX");//不添加其他 if (SimulateLogin()) { // Console.WriteLine("zluckymn模拟登陆成功"); } else { Console.WriteLine("初始化失败"); } }
public void MustBePossibleToAddAndTestANumber() { var filter = new BloomFilter(100, 0.95); filter.Add(47); Assert.IsTrue(filter.Contains(47)); Assert.IsFalse(filter.Contains(48)); }
public void SettingInit()//进行Settings.SeedsAddress Settings.HrefKeywords urlFilterKeyWord 基础设定 { //种子地址需要加布隆过滤 //Settings.Depth = 4; //代理ip模式 //Settings.IPProxyList =IPProxyHelper.GetIpProxyList("2"); //var ipProxyList = dataop.FindAllByQuery("IPProxy", Query.NE("status", "1")).ToList(); // Settings.IPProxyList.AddRange(ipProxyList.Select(c => new IPProxy(c.Text("ip"))).Distinct()); // Settings.IPProxyList.Add(new IPProxy("1.209.188.180:8080")); Settings.IgnoreSucceedUrlToDB = true; Settings.ThreadCount = 2; Settings.DBSaveCountLimit = 1; Settings.MaxReTryTimes = 10; Settings.IgnoreFailUrl = true; //Settings.AutoSpeedLimit = true; //Settings.AutoSpeedLimitMaxMSecond = 1000; //Settings.CurWebProxy = GetWebProxy(); Settings.AccessToken = reqtoken; Settings.ContentType = "application/x-www-form-urlencoded"; this.Settings.UserAgent = "haodf_app/1.0"; Settings.PostEncoding = Encoding.UTF8; var allDoctorList = dataop.FindAllByQuery(DataTableNameDoctor, Query.NE("isScheduleUpdate", "1")).SetFields("guid").ToList(); foreach (var doctor in allDoctorList)//法庭 { var postData = string.Format("app=p&os=android&n=2&m=GT-I9300&v=5.2.5&di={0}&s=hd&doctorId={1}&deviceToken={0}&p=1&userId=0¤tUserId=0&sv=4.4.2&api=1.2", reqtoken, doctor.Text("guid")); UrlQueue.Instance.EnQueue(new UrlInfo(materialUrl) { Depth = 1, PostData = postData }); if (!guidFilter.Contains(doctor.Text("guid"))) { guidFilter.Add(doctor.Text("guid")); } } //var testUrl = "http://z.hc360.com/getmmtlast.cgi?dt=1&w=外墙面砖&v=59&e=100&c=供应信息&n=3101&m=2&H=1&bt=0"; //var testAuthorization = appHelper.GetHuiCongAuthorizationCode(testUrl); //UrlQueue.Instance.EnQueue(new UrlInfo(testUrl) { Depth = 1, Authorization = testAuthorization }); Console.WriteLine("正在加载账号数据"); //Settings.HrefKeywords.Add(string.Format("/market/"));//先不加其他的 //Settings.HrefKeywords.Add(string.Format("data/land/_________0_"));//先不加其他的 //是否guid //不进行地址爬取 Settings.RegularFilterExpressions.Add(@"luckymnXXXXXXXXXXXXXXXXXX"); if (SimulateLogin()) { // Console.WriteLine("zluckymn模拟登陆成功"); } else { Console.WriteLine("模拟登陆失败"); } }
public void SettingInit()//进行Settings.SeedsAddress Settings.HrefKeywords urlFilterKeyWord 基础设定 { //webClient.DownloadFileCompleted += WebClient_DownloadFileCompleted; //种子地址需要加布隆过滤 //Settings.Depth = 4; //代理ip模式 Settings.IPProxyList = new List <IPProxy>(); Settings.IgnoreSucceedUrlToDB = true;//不添加地址到数据库 Settings.MaxReTryTimes = 20; Settings.ThreadCount = 10; Settings.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"; Settings.ContentType = "application/x-www-form-urlencoded"; Settings.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36"; Settings.HeadSetDic = new Dictionary <string, string>(); Settings.HeadSetDic.Add("Accept-Encoding", "gzip, deflate"); //Settings.UseSuperWebClient = true; //Settings.hi = new HttpInput(); //HttpManager.Instance.InitWebClient(Settings.hi, true, 30, 30); Console.WriteLine("正在获取已存在的url数据"); //布隆url初始化,防止重复读取url Console.WriteLine("正在初始化选择url队列"); Settings.SimulateCookies = "UM_distinctid=163c909dac3a0b-0a58055a844938-737356c-13c680-163c909dac46fa; JSESSIONID=0000uekkAKP_Q0hcDP33LrLYCY5:-1; CNZZDATA4237675=cnzz_eid%3D1374545162-1528085523-null%26ntime%3D1528091007"; var regionList = dataop.FindAll(DataTableNameRegion).ToList(); var typeList = dataop.FindAll(DataTableNameType).ToList(); var region = new BsonDocument(); var type = new BsonDocument(); //foreach (var region in regionList) { // foreach (var type in typeList) { var url = string.Format("http://www.jyfcc.com.cn/PreSellCert_List.do?region={0}&type={1}", region.Text("name"), type.Text("name")); var postData = string.Format("region={0}&hsusage={1}&project=&developer=&button=%B2%E9%D1%AF", region.Text("id"), type.Text("id")); if (!filter.Contains(url))//详情添加 { filter.Add(url); UrlQueue.Instance.EnQueue(new UrlInfo(url) { Depth = 1, PostData = postData }); } } } //Settings.SeedsAddress.Add(string.Format("http://fdc.fang.com/data/land/CitySelect.aspx")); Settings.RegularFilterExpressions.Add("XXX");//不添加其他 if (SimulateLogin()) { // Console.WriteLine("zluckymn模拟登陆成功"); } else { Console.WriteLine("初始化失败"); } }
public void Run() { if (!test.Contains("1234")) { test.Add("1234"); } iplist.Add(new proxy(Guid.NewGuid().ToString(), "1234", 2)); System.IO.File.AppendAllTextAsync("d:\\freeIP.txt", $"{DateTime.Now.ToString()}{Environment.NewLine}"); }
public void KeysAddedCanBeFound() { const string input = "0123456789"; var filter = new BloomFilter(capacity: 1200000); filter.Add(input); Assert.IsTrue(filter.Contains(input)); Assert.IsFalse(filter.Contains("012345678")); }
public void UniqueMillion() { BloomFilter <int> bf = new BloomFilter <int>(); bf.Add(0); Assert.True(bf.Contains(0)); for (int i = 1; i < 2000000; i++) { Assert.False(bf.Contains(i)); } }
public void ContainsTest() { BloomFilter <string> bf = new BloomFilter <string>(20, 3); bf.Add("testing"); bf.Add("nottesting"); bf.Add("testingagain"); Assert.False(bf.Contains("badstring")); Assert.True(bf.Contains("testing")); Assert.True(bf.Contains("nottesting")); Assert.True(bf.Contains("testingagain")); }
public void BasicTest() { var filter = new BloomFilter<string>(10000); filter.Add("foo"); filter.Add("bar"); Assert.IsTrue(filter.Contains("foo")); Assert.IsTrue(filter.Contains("bar")); Assert.IsFalse(filter.Contains("baz")); filter.Add("baz"); Assert.IsTrue(filter.Contains("baz")); }
public void SettingInit()//进行Settings.SeedsAddress Settings.HrefKeywords urlFilterKeyWord 基础设定 { //webClient.DownloadFileCompleted += WebClient_DownloadFileCompleted; //种子地址需要加布隆过滤 //Settings.Depth = 4; //代理ip模式 Settings.IPProxyList = new List <IPProxy>(); Settings.IgnoreSucceedUrlToDB = true;//不添加地址到数据库 Settings.MaxReTryTimes = 20; Settings.ThreadCount = 10; Settings.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"; Settings.ContentType = "application/x-www-form-urlencoded"; Settings.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36"; Settings.HeadSetDic = new Dictionary <string, string>(); Settings.HeadSetDic.Add("Accept-Encoding", "gzip, deflate"); Settings.SimulateCookies = "UM_distinctid=163c909dac3a0b-0a58055a844938-737356c-13c680-163c909dac46fa; JSESSIONID=0000uekkAKP_Q0hcDP33LrLYCY5:-1; CNZZDATA4237675=cnzz_eid%3D1374545162-1528085523-null%26ntime%3D1528091007"; Settings.Referer = "http://www.jyfcc.com.cn/PreSellCert_Detail.do?pscid=101074"; Console.WriteLine("正在获取已存在的url数据"); //布隆url初始化,防止重复读取url Console.WriteLine("正在初始化选择url队列"); projectList = dataop.FindAllByQuery(DataTableName, Query.NE("isUpdate", 1)).ToList(); //iCPH foreach (var proj in projectList) { var mhUrl = string.Format("http://www.jyfcc.com.cn/ifrm_House_List.do?pscid={0}", proj.Text("projId")); if (!filter.Contains(mhUrl))//具体页面 { filter.Add(mhUrl); UrlQueue.Instance.EnQueue(new UrlInfo(mhUrl) { Depth = 1, UniqueKey = proj.Text("projId") }); } } // UrlQueue.Instance.EnQueue(new UrlInfo("http://www.hhcool.com/cool286073/1.html?s=11&d=0"+"&checkPageCount=1") { Depth = 1 }); //Settings.SeedsAddress.Add(string.Format("http://fdc.fang.com/data/land/CitySelect.aspx")); Settings.RegularFilterExpressions.Add("XXX");//不添加其他 if (SimulateLogin()) { // Console.WriteLine("zluckymn模拟登陆成功"); } else { Console.WriteLine("初始化失败"); } }
public void assert_pluto_actually_doesnt_exist_no_hard_feelings_you_guys() { var filter = new BloomFilter(8, .01, _indexer); foreach (var planet in _planets) { filter.Add(planet); } Assert.IsTrue(filter.Contains("earth")); Assert.IsTrue(filter.Contains("jupiter")); //due to high error rate pluto will end up having the same hash result as another planet in our collection Assert.IsFalse(filter.Contains("pluto")); }
public void FalsePositivesInRangeTest() { // set filter properties int capacity = 1000000; float errorRate = 0.001F; // 0.1% // instantiate filter and populate it with random strings var target = new BloomFilter <string>(capacity, errorRate); for (int i = 0; i < capacity; i++) { target.Add(Guid.NewGuid().ToString()); } // generate new random strings and check for them // about errorRate of them should return positive int falsePositives = 0; int testIterations = capacity; int expectedFalsePositives = (int)(testIterations * errorRate) * 2; for (int i = 0; i < testIterations; i++) { string test = Guid.NewGuid().ToString(); if (target.Contains(test)) { falsePositives++; } } if (falsePositives > expectedFalsePositives) { Assert.Fail("Number of false positives ({0}) greater than expected ({1}).", falsePositives, expectedFalsePositives); } }
public void NoFalseNegativesTest() { // set filter properties int capacity = 10000; float errorRate = 0.001F; // 0.1% // create input collection var inputs = generateRandomDataList(capacity); // instantiate filter and populate it with the inputs var target = new BloomFilter <string>(capacity, errorRate); foreach (string input in inputs) { target.Add(input); } // check for each input. if any are missing, the test failed foreach (string input in inputs) { if (!target.Contains(input)) { Assert.Fail("False negative: {0}", input); } } }
public void NotAddedItemShouldReturnFalseWhenChecked() { BloomFilter bloomFilter = new BloomFilter(100, 0.1d); bloomFilter.Insert("Ashley"); Assert.IsFalse(bloomFilter.Contains("Bloom")); }
public void AddedItemShouldReturnTrueWhenChecked() { BloomFilter bloomFilter = new BloomFilter(100, 0.1d); bloomFilter.Insert("Ashley"); Assert.AreEqual(bloomFilter.Contains("Ashley"), true); }
public void Contains_WhenItemHasBeenAdded_ShouldReturnTrue() { var filter = new BloomFilter <int>(50, 0.02); filter.Add(42); Assert.True(filter.Contains(42)); }
/// <summary> /// Limite request rate /// 时间单位全是毫秒 /// </summary> /// <param name="app"></param> /// <param name="reset">计算请求数量的周期</param> /// <param name="maxRequest">请求周期内一个ip的最大请求数</param> /// <param name="blockTime">违规封禁时间</param> /// <param name="maxReq">触发保护的请求数</param> /// <param name="period">保护机制的循环周期</param> /// <returns></returns> public static IApplicationBuilder UseRateLimiter(this IApplicationBuilder app, int reset = 1000, int maxRequest = 10, int blockTime = 3600000, int maxReq = 100, int period = 1000) { reqReset = new Timer(period); reqReset.Elapsed += ReqReset_Elapsed; unBlocTimer = new Timer(blockTime); unBlocTimer.Elapsed += ResetBlackList; resettimer = new Timer(reset); resettimer.Elapsed += ResetRequests; resettimer.Start(); unBlocTimer.Start(); reqReset.Start(); app.Use(async(context, next) => { var ip = context.Connection.RemoteIpAddress; if (!requesterInfos.TryAdd(ip, 0)) { requesterInfos[ip]++; if (requesterInfos[ip] > maxRequest) { blackList.Add(ip); } } if (maxReq < req++ || blackList.Contains(ip)) { throw new _429Exception(); } await next(); }); return(app); }
/// <summary> /// 数据接收处理,失败后抛出NullReferenceException异常,主线程会进行捕获 /// </summary> /// <param name="args">url参数</param> public void DataReceive(DataReceivedEventArgs args) { var hmtl = args.Html; JObject jsonObj = JObject.Parse(args.Html); var resultDic = jsonObj["Data"]["Datas"]; foreach (var result in resultDic) { var curAddBsonDocument = new BsonDocument(); var entSn = result["EntSn"]; var entName = result["EntName"]; var jobWorkplace = result["JobWorkplace"]; if (entFilter.Contains(entSn.ToString())) { Console.WriteLine("已存在公司"); continue; } curAddBsonDocument.Add("cityName", cityName); curAddBsonDocument.Add("entSn", entSn.ToString()); curAddBsonDocument.Add("entName", entName.ToString()); curAddBsonDocument.Add("jobWorkplace", jobWorkplace.ToString()); DBChangeQueue.Instance.EnQueue(new StorageData() { Document = curAddBsonDocument, Name = DataTableName, Type = StorageType.Insert }); entFilter.Add(entSn.ToString()); } //DBChangeQueue.Instance.EnQueue(new StorageData() { Document = curAddBsonDocument, Name = DataTableName, Query = Query.EQ("url", args.Url), Type = StorageType.Update }); Console.WriteLine("添加{0}", resultDic.Count()); }
public void FalsePositivesEstimationIsCorrect() { const int s = 10000, n = s * 7 + 1; var set = new HashSet<double>(); var r = new Random(); var k = (int)Math.Round(BloomFilter<double>.GetOptimalNumberOfHashFunctions(n, s)); var h = EnumerateHashFunctions(n).Take(k).ToArray(); var bloom = new BloomFilter<double>(n, h); for (int i = 0; i < s; i++) { double next = 100 * r.NextDouble(); set.Add(next); bloom.Add(next); } double estimated = BloomFilter<double>.GetEstimatedFalsePositiveProbability(k, n, set.Count); int errors = 0; const int trials = 1000000; for (int i = 0; i < trials;) { double next = 100 * r.NextDouble(); if (!set.Contains(next)) { if (bloom.Contains(next)) ++errors; ++i; } } double actual = errors / (double)trials; Assert.Equal(estimated, actual, 3); }
public override async Task <Message> HandleMessage(Message msg) { return(await Task.Run <Message>(() => { MessageContext context = new MessageContext(msg, this); string sieve = Smart.Format(Sieve, context); string value = Smart.Format(From, context); if (!Sieves.ContainsKey(sieve)) { // If the sieve doesn't exist yet, then technically we // haven't seen it before and failed the test return ThisFailedTheTest(msg); } BloomFilter <string> filter = Sieves[sieve]; if (filter.Contains(value)) { return ThisPassedTheTest(msg); } else { return ThisFailedTheTest(msg); } })); }
public override async Task <Message> HandleMessage(Message msg) { return(await Task.Run <Message>(() => { MessageContext context = new MessageContext(msg, this); string sieve = Smart.Format(Sieve, context); string value = Smart.Format(From, context); if (!Bloom.Sieves.ContainsKey(sieve)) { Bloom.Sieves.Add(sieve, new BloomFilter <string>(Bloom.CAPACITY)); } BloomFilter <string> filter = Bloom.Sieves[sieve]; if (!filter.Contains(value)) { filter.Add(value); Interlocked.Increment(ref newSincePersist); } if (newSincePersist >= PersistEvery) { // ToDo: Persist sieve to disk } return msg; })); }
public void BloomFilterAddDifferentSizesTest() { var addSize = 10000; var testData = DataGenerator.Generate().Take(addSize).ToArray(); var testData2 = DataGenerator.Generate().Skip(addSize).Take(addSize).ToArray(); var errorRate = 0.001F; var size = testData.Length; var configuration = new DefaultBloomFilterConfiguration(); var bloomFilter = new BloomFilter <TestEntity, long>(configuration); bloomFilter.Initialize(4 * size, errorRate); foreach (var itm in testData) { bloomFilter.Add(itm); } var bloomFilter2 = new BloomFilter <TestEntity, long>(configuration); //We have to create a foldable version. var data = bloomFilter.Extract(); var foldFactor = configuration.FoldingStrategy.GetAllFoldFactors(data.BlockSize).Where(f => f > 1).OrderBy(f => f).First(); bloomFilter2.Initialize(addSize, data.BlockSize / foldFactor, data.HashFunctionCount); foreach (var itm in testData2) { bloomFilter2.Add(itm); } bloomFilter.Add(bloomFilter2); var contained = testData.Union(testData2).Count(item => bloomFilter.Contains(item)); Assert.AreEqual(2 * addSize, contained, "Not all items found in added Bloom filters"); }
private unsafe static void UnmanagedMemory() { Console.WriteLine("Unmanaged Memory"); const int size = (int)(500 * MEGABYTE); var watch = new Stopwatch(); watch.Start(); var ptr = Marshal.AllocHGlobal(size); var filter = new BloomFilter(ptr, size / 4, new IHasher[] { new Murmur3AUnsafe(), new XXHashUnsafe() }); for (int i = 0; i < 1000000; i++) { var bytes = Guid.NewGuid().ToByteArray(); filter.Add(bytes); if (!filter.Contains(bytes)) { throw new Exception("broken"); } if (i % 100000 == 0) { Console.Write("."); } } Console.WriteLine(); Console.WriteLine(watch.Elapsed); Marshal.FreeHGlobal(ptr); }
public void Random() { Random random = new Random(); for (int i = 0; i < 5000; i++) { BloomFilter <int> bf = new BloomFilter <int>(); List <int> values = new List <int>(); for (int j = 0; j < 50; j++) { DataStructures.HashSet <int> hashSet = new DataStructures.HashSet <int>(); int rand = random.Next(); if (!hashSet.Contains(rand)) { values.Add(rand); hashSet.Add(rand); bf.Add(rand); } } foreach (int check in values) { Assert.True(bf.Contains(check)); } } }
public void Add1() { BloomFilter <int> bf = new BloomFilter <int>(); bf.Add(1); Assert.True(bf.Contains(1)); }
/// <summary> /// /// </summary> /// <returns></returns> private void InitialUrlQueue() { var skipCount = 1000; var takeCount = 1000; var query = Query.And(Query.NE("isUpdate", "1")); //过滤没有detailInfo的值 var allCount = dataop.FindCount(DataTableName, query); Console.WriteLine("待处理个数:{0}", allCount); var random = new Random(); if (allCount >= 10000) { skipCount = random.Next(1000, allCount); } else { skipCount = 0; } allSchoolList = dataop.FindLimitFieldsByQuery(DataTableName, Query.NE("isUpdate", "1"), new MongoDB.Driver.SortByDocument() { }, skipCount, takeCount, new string[] { "href" }).ToList(); foreach (var shchool in allSchoolList) { var url = string.Format("http://www.todgo.com/{0}", shchool.Text("href")); if (!filter.Contains(url)) { UrlQueue.Instance.EnQueue(new UrlInfo(url) { }); } } }
public Property Contains_WithFreshFilter_ShouldReturnFalse() { return(Prop.ForAll(Arb.Default.Int32(), item => { var filter = new BloomFilter <int>(50, 0.02, ByteConverter); Assert.False(filter.Contains(item)); })); }
public void BasicBloomFilterCorrectlyActsAsASet() { BloomFilter<int> filter = new BloomFilter<int>(100, 2); //10 cannot already be in the collection, so inserting it must succeed Assert.IsFalse(filter.Add(10)); Assert.IsTrue(filter.Add(10)); //10 is in the collection Assert.IsTrue(filter.Contains(10)); //check a load more numbers for (int i = 0; i < 100; i++) { filter.Add(i); Assert.IsTrue(filter.Contains(i)); } }
public void BasicBloomFilterCorrectlyActsAsASet() { BloomFilter <int> filter = new BloomFilter <int>(100, 2); //10 cannot already be in the collection, so inserting it must succeed Assert.IsFalse(filter.Add(10)); Assert.IsTrue(filter.Add(10)); //10 is in the collection Assert.IsTrue(filter.Contains(10)); //check a load more numbers for (int i = 0; i < 100; i++) { filter.Add(i); Assert.IsTrue(filter.Contains(i)); } }
static void Main() { BloomFilter<string> bf = new BloomFilter<string>(20, 3); bf.Add("testing"); bf.Add("nottesting"); bf.Add("testingagain"); Console.WriteLine(bf.Contains("badstring")); // False Console.WriteLine(bf.Contains("testing")); // True List<string> testItems = new List<string>() { "badstring", "testing", "test" }; Console.WriteLine(bf.ContainsAll(testItems)); // False Console.WriteLine(bf.ContainsAny(testItems)); // True // False Positive Probability: 0.040894188143892 Console.WriteLine("False Positive Probability: " + bf.FalsePositiveProbability()); }
public void NoFalseNegativesTest() { const int s = 10000, n = s * 7 + 1; var set = new HashSet<double>(); var r = new Random(); var k = (int)Math.Round(BloomFilter<double>.GetOptimalNumberOfHashFunctions(n, s)); var h = EnumerateHashFunctions(n).Take(k).ToArray(); var bloom = new BloomFilter<double>(n, h); for (int i = 0; i < s; i++) { double next = 100 * r.NextDouble(); set.Add(next); bloom.Add(next); } Assert.False(set.Any(d => !bloom.Contains(d))); }
public void FalsePostiveRateCrossesThresholdAtCorrectCount() { var filter = new BloomFilter<int>(100, 0.1f); for (int i = 0; i < 99; i++) { filter.Add(i); Assert.IsTrue(filter.Contains(i)); } Assert.IsFalse(filter.FalsePositiveRate > 0.1f); filter.Add(1000); filter.Add(1001); filter.Add(1002); Assert.IsTrue(filter.FalsePositiveRate > 0.1f); }
public void BaseParalleUselTest(int testNum) { var bf = new BloomFilter<string>(100000000, 0.001f); Parallel.For(0, testNum, i => bf.Add(GetTestString(i).ToString())); var failCount = 0; Parallel.For(0, testNum, i => { if (!bf.Contains(GetTestString(i).ToString())) { //Trace.WriteLine($"{i}不存在"); failCount += 1; } }); Trace.WriteLine($"测试写入{testNum}个对象, 共有{failCount}个({(float)failCount / testNum:f6})对象不存在"); }
public void merkle_block_4_test_update_none() { // Random real block (000000000000b731f2eef9e8c63173adfb07e41bd53eb0ef0a6b720d6cb6dea4) // With 7 txes Block block = new Block(); block.ReadWrite(ParseHex("0100000082bb869cf3a793432a66e826e05a6fc37469f8efb7421dc880670100000000007f16c5962e8bd963659c793ce370d95f093bc7e367117b3c30c1f8fdd0d9728776381b4d4c86041b554b85290701000000010000000000000000000000000000000000000000000000000000000000000000ffffffff07044c86041b0136ffffffff0100f2052a01000000434104eaafc2314def4ca98ac970241bcab022b9c1e1f4ea423a20f134c876f2c01ec0f0dd5b2e86e7168cefe0d81113c3807420ce13ad1357231a2252247d97a46a91ac000000000100000001bcad20a6a29827d1424f08989255120bf7f3e9e3cdaaa6bb31b0737fe048724300000000494830450220356e834b046cadc0f8ebb5a8a017b02de59c86305403dad52cd77b55af062ea10221009253cd6c119d4729b77c978e1e2aa19f5ea6e0e52b3f16e32fa608cd5bab753901ffffffff02008d380c010000001976a9142b4b8072ecbba129b6453c63e129e643207249ca88ac0065cd1d000000001976a9141b8dd13b994bcfc787b32aeadf58ccb3615cbd5488ac000000000100000003fdacf9b3eb077412e7a968d2e4f11b9a9dee312d666187ed77ee7d26af16cb0b000000008c493046022100ea1608e70911ca0de5af51ba57ad23b9a51db8d28f82c53563c56a05c20f5a87022100a8bdc8b4a8acc8634c6b420410150775eb7f2474f5615f7fccd65af30f310fbf01410465fdf49e29b06b9a1582287b6279014f834edc317695d125ef623c1cc3aaece245bd69fcad7508666e9c74a49dc9056d5fc14338ef38118dc4afae5fe2c585caffffffff309e1913634ecb50f3c4f83e96e70b2df071b497b8973a3e75429df397b5af83000000004948304502202bdb79c596a9ffc24e96f4386199aba386e9bc7b6071516e2b51dda942b3a1ed022100c53a857e76b724fc14d45311eac5019650d415c3abb5428f3aae16d8e69bec2301ffffffff2089e33491695080c9edc18a428f7d834db5b6d372df13ce2b1b0e0cbcb1e6c10000000049483045022100d4ce67c5896ee251c810ac1ff9ceccd328b497c8f553ab6e08431e7d40bad6b5022033119c0c2b7d792d31f1187779c7bd95aefd93d90a715586d73801d9b47471c601ffffffff0100714460030000001976a914c7b55141d097ea5df7a0ed330cf794376e53ec8d88ac0000000001000000045bf0e214aa4069a3e792ecee1e1bf0c1d397cde8dd08138f4b72a00681743447000000008b48304502200c45de8c4f3e2c1821f2fc878cba97b1e6f8807d94930713aa1c86a67b9bf1e40221008581abfef2e30f957815fc89978423746b2086375ca8ecf359c85c2a5b7c88ad01410462bb73f76ca0994fcb8b4271e6fb7561f5c0f9ca0cf6485261c4a0dc894f4ab844c6cdfb97cd0b60ffb5018ffd6238f4d87270efb1d3ae37079b794a92d7ec95ffffffffd669f7d7958d40fc59d2253d88e0f248e29b599c80bbcec344a83dda5f9aa72c000000008a473044022078124c8beeaa825f9e0b30bff96e564dd859432f2d0cb3b72d3d5d93d38d7e930220691d233b6c0f995be5acb03d70a7f7a65b6bc9bdd426260f38a1346669507a3601410462bb73f76ca0994fcb8b4271e6fb7561f5c0f9ca0cf6485261c4a0dc894f4ab844c6cdfb97cd0b60ffb5018ffd6238f4d87270efb1d3ae37079b794a92d7ec95fffffffff878af0d93f5229a68166cf051fd372bb7a537232946e0a46f53636b4dafdaa4000000008c493046022100c717d1714551663f69c3c5759bdbb3a0fcd3fab023abc0e522fe6440de35d8290221008d9cbe25bffc44af2b18e81c58eb37293fd7fe1c2e7b46fc37ee8c96c50ab1e201410462bb73f76ca0994fcb8b4271e6fb7561f5c0f9ca0cf6485261c4a0dc894f4ab844c6cdfb97cd0b60ffb5018ffd6238f4d87270efb1d3ae37079b794a92d7ec95ffffffff27f2b668859cd7f2f894aa0fd2d9e60963bcd07c88973f425f999b8cbfd7a1e2000000008c493046022100e00847147cbf517bcc2f502f3ddc6d284358d102ed20d47a8aa788a62f0db780022100d17b2d6fa84dcaf1c95d88d7e7c30385aecf415588d749afd3ec81f6022cecd701410462bb73f76ca0994fcb8b4271e6fb7561f5c0f9ca0cf6485261c4a0dc894f4ab844c6cdfb97cd0b60ffb5018ffd6238f4d87270efb1d3ae37079b794a92d7ec95ffffffff0100c817a8040000001976a914b6efd80d99179f4f4ff6f4dd0a007d018c385d2188ac000000000100000001834537b2f1ce8ef9373a258e10545ce5a50b758df616cd4356e0032554ebd3c4000000008b483045022100e68f422dd7c34fdce11eeb4509ddae38201773dd62f284e8aa9d96f85099d0b002202243bd399ff96b649a0fad05fa759d6a882f0af8c90cf7632c2840c29070aec20141045e58067e815c2f464c6a2a15f987758374203895710c2d452442e28496ff38ba8f5fd901dc20e29e88477167fe4fc299bf818fd0d9e1632d467b2a3d9503b1aaffffffff0280d7e636030000001976a914f34c3e10eb387efe872acb614c89e78bfca7815d88ac404b4c00000000001976a914a84e272933aaf87e1715d7786c51dfaeb5b65a6f88ac00000000010000000143ac81c8e6f6ef307dfe17f3d906d999e23e0189fda838c5510d850927e03ae7000000008c4930460221009c87c344760a64cb8ae6685a3eec2c1ac1bed5b88c87de51acd0e124f266c16602210082d07c037359c3a257b5c63ebd90f5a5edf97b2ac1c434b08ca998839f346dd40141040ba7e521fa7946d12edbb1d1e95a15c34bd4398195e86433c92b431cd315f455fe30032ede69cad9d1e1ed6c3c4ec0dbfced53438c625462afb792dcb098544bffffffff0240420f00000000001976a9144676d1b820d63ec272f1900d59d43bc6463d96f888ac40420f00000000001976a914648d04341d00d7968b3405c034adc38d4d8fb9bd88ac00000000010000000248cc917501ea5c55f4a8d2009c0567c40cfe037c2e71af017d0a452ff705e3f1000000008b483045022100bf5fdc86dc5f08a5d5c8e43a8c9d5b1ed8c65562e280007b52b133021acd9acc02205e325d613e555f772802bf413d36ba807892ed1a690a77811d3033b3de226e0a01410429fa713b124484cb2bd7b5557b2c0b9df7b2b1fee61825eadc5ae6c37a9920d38bfccdc7dc3cb0c47d7b173dbc9db8d37db0a33ae487982c59c6f8606e9d1791ffffffff41ed70551dd7e841883ab8f0b16bf04176b7d1480e4f0af9f3d4c3595768d068000000008b4830450221008513ad65187b903aed1102d1d0c47688127658c51106753fed0151ce9c16b80902201432b9ebcb87bd04ceb2de66035fbbaf4bf8b00d1cfe41f1a1f7338f9ad79d210141049d4cf80125bf50be1709f718c07ad15d0fc612b7da1f5570dddc35f2a352f0f27c978b06820edca9ef982c35fda2d255afba340068c5035552368bc7200c1488ffffffff0100093d00000000001976a9148edb68822f1ad580b043c7b3df2e400f8699eb4888ac00000000")); BloomFilter filter = new BloomFilter(10, 0.000001, 0, BloomFlags.UPDATE_NONE); // Match the generation pubkey filter.Insert(ParseHex("04eaafc2314def4ca98ac970241bcab022b9c1e1f4ea423a20f134c876f2c01ec0f0dd5b2e86e7168cefe0d81113c3807420ce13ad1357231a2252247d97a46a91")); // ...and the output address of the 4th transaction filter.Insert(ParseHex("b6efd80d99179f4f4ff6f4dd0a007d018c385d21")); MerkleBlock merkleBlock = new MerkleBlock(block, filter); Assert.True(merkleBlock.Header.GetHash() == block.GetHash()); // We shouldn't match any outpoints (UPDATE_NONE) Assert.True(!filter.Contains(new OutPoint(uint256.Parse("0x147caa76786596590baa4e98f5d9f48b86c7765e489f7a6ff3360fe5c674360b"), 0))); Assert.True(!filter.Contains((new OutPoint(uint256.Parse("0x02981fa052f0481dbc5868f4fc2166035a10f27a03cfd2de67326471df5bc041"), 0)))); }