public void For_a_value_that_was_added_Contains_returns_Maybe()
 {
     var filter = new BloomFilter();
     var item = Guid.NewGuid().ToString();
     filter.Add(item);
     filter.MayContain(item).Should().BeTrue();
 }
        public void Probability_of_false_positive_is_accurate_when_filter_is_at_capacity()
        {
            var attempts = 3;
            // this test occasionally fails due to the odd outlier, since it's probabilistic, so we retry it a few times
            while (attempts-- > 0)
            {
                var filter = new BloomFilter(1000, .01);

                var stringsInFilter = Enumerable.Range(1, 1000).Select(_ => Guid.NewGuid().ToString());

                foreach (var s in stringsInFilter)
                {
                    filter.Add(s);
                }

                var falsePositives = Enumerable.Range(1001, 10000)
                                               .Select(i => i.ToString())
                                               .Where(s => filter.MayContain(s))
                                               .ToList();

                Console.WriteLine(falsePositives.Count + " false positives");
                Console.WriteLine(falsePositives.ToLogString());

                try
                {
                    falsePositives.Count.Should().BeInRange(70, 120);
                    return;
                }
                catch (Exception) when (attempts == 0)
                {
                    throw;
                }
            }
        }
Example #3
0
		public void bloom_create_insert_serialize()
		{
			BloomFilter filter = new BloomFilter(3, 0.01, 0, BloomFlags.UPDATE_ALL);

			filter.Insert(ParseHex("99108ad8ed9bb6274d3980bab5a85c048f0950c8"));
			Assert.True(filter.Contains(ParseHex("99108ad8ed9bb6274d3980bab5a85c048f0950c8")), "BloomFilter doesn't contain just-inserted object!");
			// One bit different in first byte
			Assert.True(!filter.Contains(ParseHex("19108ad8ed9bb6274d3980bab5a85c048f0950c8")), "BloomFilter contains something it shouldn't!");

			filter.Insert(ParseHex("b5a2c786d9ef4658287ced5914b37a1b4aa32eee"));
			Assert.True(filter.Contains(ParseHex("b5a2c786d9ef4658287ced5914b37a1b4aa32eee")), "BloomFilter doesn't contain just-inserted object (2)!");

			filter.Insert(ParseHex("b9300670b4c5366e95b2699e8b18bc75e5f729c5"));
			Assert.True(filter.Contains(ParseHex("b9300670b4c5366e95b2699e8b18bc75e5f729c5")), "BloomFilter doesn't contain just-inserted object (3)!");


			var ms = new MemoryStream();
			BitcoinStream bitcoinStream = new BitcoinStream(ms, true);
			bitcoinStream.ReadWrite(filter);

			var expected = ParseHex("03614e9b050000000000000001");


			AssertEx.CollectionEquals(expected, ms.ToArray());
		}
        public void FalsePositivesEstimationIsCorrect()
        {
            const int s = 10000, n = s * 7 + 1;
            var set = new HashSet<double>();
            var r = new Random();
            var k = (int)Math.Round(BloomFilter<double>.GetOptimalNumberOfHashFunctions(n, s));
            var h = EnumerateHashFunctions(n).Take(k).ToArray();
            var bloom = new BloomFilter<double>(n, h);

            for (int i = 0; i < s; i++)
            {
                double next = 100 * r.NextDouble();
                set.Add(next);
                bloom.Add(next);
            }

            double estimated = BloomFilter<double>.GetEstimatedFalsePositiveProbability(k, n, set.Count);
            int errors = 0;
            const int trials = 1000000;
            for (int i = 0; i < trials;)
            {
                double next = 100 * r.NextDouble();
                if (!set.Contains(next))
                {
                    if (bloom.Contains(next)) ++errors;
                    ++i;
                }
            }

            double actual = errors / (double)trials;
            Assert.Equal(estimated, actual, 3);
        }
 public void For_a_value_that_was_not_added_Contains_returns_DefinitelyNot()
 {
     var filter = new BloomFilter();
     filter.Add(Guid.NewGuid().ToString());
     filter.MayContain(Guid.NewGuid().ToString())
           .Should()
           .BeFalse();
 }
Example #6
0
 public void MagnesiumInit()
 {
     _dcpManager = new DcpManager();
     _requestQueueOptions = new ExecutionDataflowBlockOptions();
     _storeQueue = new ActionBlock<DataContainer>(dc => _dcpManager.StoreHandler(dc));
     _parseQueue = new ActionBlock<DataResponse>(dresp => _dcpManager.ParseHandler(dresp, _storeQueue));
     _requestQueue = new ActionBlock<DataRequest>(dr => _dcpManager.RequestHandler(dr, _parseQueue), _requestQueueOptions);
     //布隆过滤器, 大小10M, 容错率0.001
     _bloomFilter = new BloomFilter<string>(10000000, 0.001f);
 }
        public void ToBase64String_and_string_constructor_correctly_serialize_and_deserialize_BloomFilter_state()
        {
            var filter = new BloomFilter();
            var item = Guid.NewGuid().ToString();
            filter.Add(item);
            Console.WriteLine(filter.ToString());

            filter = new BloomFilter(filter.ToString());

            filter.MayContain(item)
                  .Should()
                  .BeTrue();
        }
        public void BeginCrawler()
        {
            filter = new BloomFilter<string>(200000);

            InitSettings();

            var master = new CrawlMaster(Settings);
            master.AddUrlEvent += MasterAddUrlEvent;
            master.DataReceivedEvent += MasterDataReceivedEvent;
            master.CrawlErrorEvent += Master_CrawlErrorEvent;
            master.Crawl();

            Console.ReadKey();
        }
Example #9
0
        public void BasicTest()
        {
            var filter = new BloomFilter<string>(10000);
            filter.Add("foo");
            filter.Add("bar");

            Assert.IsTrue(filter.Contains("foo"));
            Assert.IsTrue(filter.Contains("bar"));
            Assert.IsFalse(filter.Contains("baz"));

            filter.Add("baz");

            Assert.IsTrue(filter.Contains("baz"));
        }
Example #10
0
        public void when_Contains_is_DefinitelyNot_then_it_definitely_is_not_in_the_set()
        {
            var set = new HashSet<string>(Enumerable.Range(1, 100000).Select(_ => Guid.NewGuid().ToString()));

            var filter = new BloomFilter(100, .7);

            foreach (var s in set)
            {
                filter.Add(s);
            }

            foreach (var s in set.Where(s => !filter.MayContain(s)))
            {
                set.Contains(s).Should().Be(false);
            }
        }
Example #11
0
        /// <summary>
        /// The main.
        /// </summary>
        /// <param name="args">
        /// The args.
        /// </param>
        private static void Main(string[] args)
        {
            filter = new BloomFilter<string>(200000);
            const string CityName = "2";

            // 设置种子地址
            //Settings.SeedsAddress.Add(string.Format("http://www.cnblogs.com/#p{0}", CityName));
            Settings.SeedsAddress.Add(string.Format("http://www.cnblogs.com"));
            // 设置 URL 关键字
            //Settings.HrefKeywords.Add(string.Format("/{0}/bj", CityName));
            //Settings.HrefKeywords.Add(string.Format("/{0}/sj", CityName));
            Settings.HrefKeywords.Add(string.Format("/{0}", "pick"));
            Settings.HrefKeywords.Add(string.Format("/{0}", "news"));

            // 设置爬取线程个数
            Settings.ThreadCount = 1;

            // 设置爬取深度
            Settings.Depth = 7;

            // 设置爬取时忽略的 Link,通过后缀名的方式,可以添加多个
            Settings.EscapeLinks.Add(".jpg");

            // 设置自动限速,1~5 秒随机间隔的自动限速
            Settings.AutoSpeedLimit = false;

            // 设置都是锁定域名,去除二级域名后,判断域名是否相等,相等则认为是同一个站点
            // 例如:mail.pzcast.com 和 www.pzcast.com
            Settings.LockHost = false;

            // 设置请求的 User-Agent HTTP 标头的值
            // settings.UserAgent 已提供默认值,如有特殊需求则自行设置

            // 设置请求页面的超时时间,默认值 15000 毫秒
            // settings.Timeout 按照自己的要求确定超时时间

            // 设置用于过滤的正则表达式
            // settings.RegularFilterExpressions.Add("");
            var master = new CrawlMaster(Settings);
            master.AddUrlEvent += MasterAddUrlEvent;
            master.DataReceivedEvent += MasterDataReceivedEvent;
            master.Crawl();

            Console.ReadKey();
        }
Example #12
0
		public void bloom_create_insert_key()
		{
			string strSecret = "5Kg1gnAjaLfKiwhhPpGS3QfRg2m6awQvaj98JCZBZQ5SuS2F15C";
			BitcoinSecret vchSecret = Network.Main.CreateBitcoinSecret(strSecret);
			var pubkey = vchSecret.PrivateKey.PubKey;

			BloomFilter filter = new BloomFilter(2, 0.001, 0, BloomFlags.UPDATE_ALL);
			filter.Insert(pubkey.ToBytes());
			filter.Insert(pubkey.Hash.ToBytes());

			var ms = new MemoryStream();
			BitcoinStream bitcoinStream = new BitcoinStream(ms, true);
			bitcoinStream.ReadWrite(filter);

			var expected = ParseHex("038fc16b080000000000000001");

			AssertEx.CollectionEquals(expected, ms.ToArray());
		}
        public void FalsePostiveRateCrossesThresholdAtCorrectCount()
        {
            var filter = new BloomFilter<int>(100, 0.1f);

            for (int i = 0; i < 99; i++)
            {
                filter.Add(i);
                Assert.IsTrue(filter.Contains(i));
            }

            Assert.IsFalse(filter.FalsePositiveRate > 0.1f);

            filter.Add(1000);
            filter.Add(1001);
            filter.Add(1002);

            Assert.IsTrue(filter.FalsePositiveRate > 0.1f);
        }
        public void BasicBloomFilterCorrectlyActsAsASet()
        {
            BloomFilter<int> filter = new BloomFilter<int>(100, 2);

            //10 cannot already be in the collection, so inserting it must succeed
            Assert.IsFalse(filter.Add(10));
            Assert.IsTrue(filter.Add(10));

            //10 is in the collection
            Assert.IsTrue(filter.Contains(10));

            //check a load more numbers
            for (int i = 0; i < 100; i++)
            {
                filter.Add(i);
                Assert.IsTrue(filter.Contains(i));
            }
        }
Example #15
0
        public void NoFalseNegativesTest()
        {
            const int s = 10000, n = s * 7 + 1;
            var set = new HashSet<double>();
            var r = new Random();
            var k = (int)Math.Round(BloomFilter<double>.GetOptimalNumberOfHashFunctions(n, s));
            var h = EnumerateHashFunctions(n).Take(k).ToArray();
            var bloom = new BloomFilter<double>(n, h);

            for (int i = 0; i < s; i++)
            {
                double next = 100 * r.NextDouble();
                set.Add(next);
                bloom.Add(next);
            }

            Assert.False(set.Any(d => !bloom.Contains(d)));
        }
Example #16
0
        public void BaseParalleUselTest(int testNum)
        {
            var bf = new BloomFilter<string>(100000000, 0.001f);

            Parallel.For(0, testNum, i => bf.Add(GetTestString(i).ToString()));

            var failCount = 0;

            Parallel.For(0, testNum, i =>
            {
                if (!bf.Contains(GetTestString(i).ToString()))
                {
                    //Trace.WriteLine($"{i}不存在");
                    failCount += 1;
                }
            });
            Trace.WriteLine($"测试写入{testNum}个对象, 共有{failCount}个({(float)failCount / testNum:f6})对象不存在");
        }
        static void Main()
        {
            BloomFilter<string> bf = new BloomFilter<string>(20, 3);

            bf.Add("testing");
            bf.Add("nottesting");
            bf.Add("testingagain");

            Console.WriteLine(bf.Contains("badstring")); // False
            Console.WriteLine(bf.Contains("testing")); // True

            List<string> testItems = new List<string>() { "badstring", "testing", "test" };

            Console.WriteLine(bf.ContainsAll(testItems)); // False
            Console.WriteLine(bf.ContainsAny(testItems)); // True

            // False Positive Probability: 0.040894188143892
            Console.WriteLine("False Positive Probability: " + bf.FalsePositiveProbability());
        }
Example #18
0
        public void Probability_of_false_positive_is_accurate_when_filter_is_at_capacity()
        {
            var filter = new BloomFilter(1000, .01);

            var stringsInFilter = Enumerable.Range(1, 1000).Select(_ => Guid.NewGuid().ToString());

            foreach (var s in stringsInFilter)
            {
                filter.Add(s);
            }

            var falsePositives = Enumerable.Range(1001, 10000)
                                           .Select(i => i.ToString())
                                           .Where(s => filter.MayContain(s))
                                           .ToList();

            Console.WriteLine(falsePositives.Count() + " false positives");
            Console.WriteLine(falsePositives.ToLogString());

            falsePositives.Count.Should().BeInRange(70, 120);
        }
Example #19
0
 public void GetHashTest()
 {
     Approvals.Verify("Hash of Test:" + BloomFilter.GetHash("Test"));
 }
Example #20
0
 List <BsonDocument> allLandUrlList = new List <BsonDocument>(); //没有县市的Url
 /// <summary>
 /// 谁的那个
 /// </summary>
 /// <param name="_Settings"></param>
 /// <param name="filter"></param>
 public ProfileCompanyDetailCrawler(CrawlSettings _Settings, BloomFilter <string> _filter, DataOperation _dataop)
 {
     Settings = _Settings; filter = _filter; dataop = _dataop;
 }
Example #21
0
        public void bloom_match()
        {
            // Random real transaction (b4749f017444b051c44dfd2720e88f314ff94f3dd6d56d40ef65854fcd7fff6b)
            Transaction tx = new Transaction();

            tx.ReadWrite(ParseHex("01000000010b26e9b7735eb6aabdf358bab62f9816a21ba9ebdb719d5299e88607d722c190000000008b4830450220070aca44506c5cef3a16ed519d7c3c39f8aab192c4e1c90d065f37b8a4af6141022100a8e160b856c2d43d27d8fba71e5aef6405b8643ac4cb7cb3c462aced7f14711a0141046d11fee51b0e60666d5049a9101a72741df480b96ee26488a4d3466b95c9a40ac5eeef87e10a5cd336c19a84565f80fa6c547957b7700ff4dfbdefe76036c339ffffffff021bff3d11000000001976a91404943fdd508053c75000106d3bc6e2754dbcff1988ac2f15de00000000001976a914a266436d2965547608b9e15d9032a7b9d64fa43188ac00000000"));


            // and one which spends it (e2769b09e784f32f62ef849763d4f45b98e07ba658647343b915ff832b110436)
            var ch  = new byte[] { 0x01, 0x00, 0x00, 0x00, 0x01, 0x6b, 0xff, 0x7f, 0xcd, 0x4f, 0x85, 0x65, 0xef, 0x40, 0x6d, 0xd5, 0xd6, 0x3d, 0x4f, 0xf9, 0x4f, 0x31, 0x8f, 0xe8, 0x20, 0x27, 0xfd, 0x4d, 0xc4, 0x51, 0xb0, 0x44, 0x74, 0x01, 0x9f, 0x74, 0xb4, 0x00, 0x00, 0x00, 0x00, 0x8c, 0x49, 0x30, 0x46, 0x02, 0x21, 0x00, 0xda, 0x0d, 0xc6, 0xae, 0xce, 0xfe, 0x1e, 0x06, 0xef, 0xdf, 0x05, 0x77, 0x37, 0x57, 0xde, 0xb1, 0x68, 0x82, 0x09, 0x30, 0xe3, 0xb0, 0xd0, 0x3f, 0x46, 0xf5, 0xfc, 0xf1, 0x50, 0xbf, 0x99, 0x0c, 0x02, 0x21, 0x00, 0xd2, 0x5b, 0x5c, 0x87, 0x04, 0x00, 0x76, 0xe4, 0xf2, 0x53, 0xf8, 0x26, 0x2e, 0x76, 0x3e, 0x2d, 0xd5, 0x1e, 0x7f, 0xf0, 0xbe, 0x15, 0x77, 0x27, 0xc4, 0xbc, 0x42, 0x80, 0x7f, 0x17, 0xbd, 0x39, 0x01, 0x41, 0x04, 0xe6, 0xc2, 0x6e, 0xf6, 0x7d, 0xc6, 0x10, 0xd2, 0xcd, 0x19, 0x24, 0x84, 0x78, 0x9a, 0x6c, 0xf9, 0xae, 0xa9, 0x93, 0x0b, 0x94, 0x4b, 0x7e, 0x2d, 0xb5, 0x34, 0x2b, 0x9d, 0x9e, 0x5b, 0x9f, 0xf7, 0x9a, 0xff, 0x9a, 0x2e, 0xe1, 0x97, 0x8d, 0xd7, 0xfd, 0x01, 0xdf, 0xc5, 0x22, 0xee, 0x02, 0x28, 0x3d, 0x3b, 0x06, 0xa9, 0xd0, 0x3a, 0xcf, 0x80, 0x96, 0x96, 0x8d, 0x7d, 0xbb, 0x0f, 0x91, 0x78, 0xff, 0xff, 0xff, 0xff, 0x02, 0x8b, 0xa7, 0x94, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x19, 0x76, 0xa9, 0x14, 0xba, 0xde, 0xec, 0xfd, 0xef, 0x05, 0x07, 0x24, 0x7f, 0xc8, 0xf7, 0x42, 0x41, 0xd7, 0x3b, 0xc0, 0x39, 0x97, 0x2d, 0x7b, 0x88, 0xac, 0x40, 0x94, 0xa8, 0x02, 0x00, 0x00, 0x00, 0x00, 0x19, 0x76, 0xa9, 0x14, 0xc1, 0x09, 0x32, 0x48, 0x3f, 0xec, 0x93, 0xed, 0x51, 0xf5, 0xfe, 0x95, 0xe7, 0x25, 0x59, 0xf2, 0xcc, 0x70, 0x43, 0xf9, 0x88, 0xac, 0x00, 0x00, 0x00, 0x00, 0x00 };
            var vch = ch.Take(ch.Length - 1).ToArray();

            Transaction spendingTx = new Transaction();

            spendingTx.ReadWrite(vch);

            BloomFilter filter = new BloomFilter(10, 0.000001, 0, BloomFlags.UPDATE_ALL);

            filter.Insert(uint256.Parse("0xb4749f017444b051c44dfd2720e88f314ff94f3dd6d56d40ef65854fcd7fff6b"));
            Assert.True(filter.IsRelevantAndUpdate(tx), "Simple Bloom filter didn't match tx hash");

            filter = new BloomFilter(10, 0.000001, 0, BloomFlags.UPDATE_ALL);
            // byte-reversed tx hash
            filter.Insert(ParseHex("6bff7fcd4f8565ef406dd5d63d4ff94f318fe82027fd4dc451b04474019f74b4"));
            Assert.True(filter.IsRelevantAndUpdate(tx), "Simple Bloom filter didn't match manually serialized tx hash");

            filter = new BloomFilter(10, 0.000001, 0, BloomFlags.UPDATE_ALL);
            filter.Insert(ParseHex("30450220070aca44506c5cef3a16ed519d7c3c39f8aab192c4e1c90d065f37b8a4af6141022100a8e160b856c2d43d27d8fba71e5aef6405b8643ac4cb7cb3c462aced7f14711a01"));
            Assert.True(filter.IsRelevantAndUpdate(tx), "Simple Bloom filter didn't match input signature");

            filter = new BloomFilter(10, 0.000001, 0, BloomFlags.UPDATE_ALL);
            filter.Insert(ParseHex("046d11fee51b0e60666d5049a9101a72741df480b96ee26488a4d3466b95c9a40ac5eeef87e10a5cd336c19a84565f80fa6c547957b7700ff4dfbdefe76036c339"));
            Assert.True(filter.IsRelevantAndUpdate(tx), "Simple Bloom filter didn't match input pub key");

            filter = new BloomFilter(10, 0.000001, 0, BloomFlags.UPDATE_ALL);
            filter.Insert(ParseHex("04943fdd508053c75000106d3bc6e2754dbcff19"));
            Assert.True(filter.IsRelevantAndUpdate(tx), "Simple Bloom filter didn't match output address");
            Assert.True(filter.IsRelevantAndUpdate(spendingTx), "Simple Bloom filter didn't add output");

            filter = new BloomFilter(10, 0.000001, 0, BloomFlags.UPDATE_ALL);
            filter.Insert(ParseHex("a266436d2965547608b9e15d9032a7b9d64fa431"));
            Assert.True(filter.IsRelevantAndUpdate(tx), "Simple Bloom filter didn't match output address");

            filter = new BloomFilter(10, 0.000001, 0, BloomFlags.UPDATE_ALL);
            filter.Insert(new OutPoint(uint256.Parse("0x90c122d70786e899529d71dbeba91ba216982fb6ba58f3bdaab65e73b7e9260b"), 0));
            Assert.True(filter.IsRelevantAndUpdate(tx), "Simple Bloom filter didn't match COutPoint");

            filter = new BloomFilter(10, 0.000001, 0, BloomFlags.UPDATE_ALL);
            OutPoint prevOutPoint = new OutPoint(uint256.Parse("0x90c122d70786e899529d71dbeba91ba216982fb6ba58f3bdaab65e73b7e9260b"), 0);

            {
                var data = prevOutPoint.ToBytes();
                filter.Insert(data);
            }
            Assert.True(filter.IsRelevantAndUpdate(tx), "Simple Bloom filter didn't match manually serialized COutPoint");

            filter = new BloomFilter(10, 0.000001, 0, BloomFlags.UPDATE_ALL);
            filter.Insert(uint256.Parse("00000009e784f32f62ef849763d4f45b98e07ba658647343b915ff832b110436"));
            Assert.True(!filter.IsRelevantAndUpdate(tx), "Simple Bloom filter matched random tx hash");

            filter = new BloomFilter(10, 0.000001, 0, BloomFlags.UPDATE_ALL);
            filter.Insert(ParseHex("0000006d2965547608b9e15d9032a7b9d64fa431"));
            Assert.True(!filter.IsRelevantAndUpdate(tx), "Simple Bloom filter matched random address");

            filter = new BloomFilter(10, 0.000001, 0, BloomFlags.UPDATE_ALL);
            filter.Insert(new OutPoint(uint256.Parse("0x90c122d70786e899529d71dbeba91ba216982fb6ba58f3bdaab65e73b7e9260b"), 1));
            Assert.True(!filter.IsRelevantAndUpdate(tx), "Simple Bloom filter matched COutPoint for an output we didn't care about");

            filter = new BloomFilter(10, 0.000001, 0, BloomFlags.UPDATE_ALL);
            filter.Insert(new OutPoint(uint256.Parse("0x000000d70786e899529d71dbeba91ba216982fb6ba58f3bdaab65e73b7e9260b"), 0));
            Assert.True(!filter.IsRelevantAndUpdate(tx), "Simple Bloom filter matched COutPoint for an output we didn't care about");
        }
Example #22
0
        public void FillRatio_WithNewFilter_ShouldBeZero()
        {
            var filter = BloomFilter <int> .Create(1000, 0.05);

            Assert.Equal(0d, filter.FillRatio);
        }
Example #23
0
        public void InitBloomFilter_GetDictionaryListSize_ReturnDictionaryListSize()
        {
            BloomFilter bloomFilter = new BloomFilter(dictionary, 150, 2);

            Assert.IsTrue(bloomFilter.ItemCount == dictionary.Length);
        }
Example #24
0
 public LiteralInfo(BloomFilter literalsFilter) =>
Example #25
0
        private bool SettingCustomValues(Int32 tag)
        {

            bool isOk = true;
            configModel = GetModelByRow();
            //爬虫配置
            filter = new BloomFilter<string>(200000);
            //线程
            if (radioThreadC.Checked && !(String.IsNullOrEmpty(txtThread.Text.Trim())))
            {
                Settings.ThreadCount = Convert.ToByte(txtThread.Text.Trim());
            }
            if (radioThreadM.Checked)
            {
                Settings.ThreadCount = 1;
            }
            //深度
            if (radioDepthC.Checked && !(String.IsNullOrEmpty(txtDepth.Text.Trim())))
            {
                Settings.Depth = Convert.ToByte(txtDepth.Text.Trim());
            }
            if (radioDepthM.Checked)
            {
                Settings.Depth = configModel.kPageTotal == null ? Convert.ToInt32(100) : Convert.ToInt32(configModel.kPageTotal + 1);
            }
            //速度1~5
            if (radioSpeedNo.Checked)
            {
                Settings.AutoSpeedLimit = false;
            }
            if (radioSpeedYes.Checked)
            {
                Settings.AutoSpeedLimit = true;
            }
            if (string.IsNullOrEmpty(configModel.kUrl))
            {
                isOk = false;
                MessageBox.Show("种子地址为空");
            }
            else
            {
                if (tag == 0)//0代表单个点击模式
                {
                    Settings.SeedsAddress.Clear();
                    Settings.SeedsAddress.Add(configModel.kUrl);
                }

            }
            // 设置爬取时忽略的 Link,通过后缀名的方式,可以添加多个
            Settings.EscapeLinks.Add(".jpg");
            // 设置 URL 关键字
            // Settings.HrefKeywords.Add(string.Format("/{0}/bj", CityName));
            // 设置都是锁定域名,去除二级域名后,判断域名是否相等,相等则认为是同一个站点
            // 例如:mail.pzcast.com 和 www.pzcast.com
            Settings.LockHost = false;
            //URL配置
            // 设置请求的 User-Agent HTTP 标头的值
            // settings.UserAgent 已提供默认值,如有特殊需求则自行设置

            // 设置请求页面的超时时间,默认值 15000 毫秒
            //Settings.Timeout = 60000; //按照自己的要求确定超时时间

            // 设置用于过滤的正则表达式
            //Settings.RegularFilterExpressions.Add("<a .+ href='(.+)'>下一页</a>");//  string strReg = "<a .+ href='(.+)'>下一页</a>";

            if (configModel.kDetailPatternType == "正则表达式")
            {
                if (string.IsNullOrEmpty(configModel.kDetailPattern))
                {
                    isOk = false;
                    MessageBox.Show("详细页提取模板为空");
                }
                //else
                //{
                //    detailRegStr = model.kDetailPattern;
                //}

            }
            if (configModel.kNextPagePatternType == "正则表达式")
            {
                if (string.IsNullOrEmpty(configModel.kNextPagePattern))
                {
                    isOk = false;
                    MessageBox.Show("下一页提取模板为空");
                }
                //else
                //{
                //    nextPageStr = model.kNextPagePattern;    
                //}                
            }
            return isOk;


        }
Example #26
0
		public BloomFilter CreateBloomFilter(double fp, BloomFlags flags = BloomFlags.UPDATE_ALL)
		{
			var toTrack = GetDataToTrack().ToArray();
			var scriptCount = _TrackedScripts.Count(s => !s.Value.IsInternal);
			var filter = new BloomFilter(scriptCount, fp, _Tweak, flags);
			foreach(var data in toTrack)
				filter.Insert(data);
			return filter;
		}
 /// <summary>
 /// Writes a BloomFilter as a byte array as well as the number of hash functions used to construct this BloomFilter
 /// </summary>
 /// <param name="filter">the BloomFilter to write</param>
 /// <seealso cref="BloomFilter.Filter"/>
 public void WriteFilter(BloomFilter.Filter<long> filter)
 {
     this.Write(new SqlBinary(filter.convertToByteArray()));
     this.Write((short) filter.hashFunctionCount);
 }
Example #28
0
 public void ComputeHashTest()
 {
     Approvals.Verify("ComputeHash of 1,2,2:" + BloomFilter.ComputeHash(1, 2, 2));
 }
 public BloomFilterTests()
 {
     TestObject = new BloomFilter <string>(1000);
 }
 /// <summary>
 /// 谁的那个
 /// </summary>
 /// <param name="_Settings"></param>
 /// <param name="filter"></param>
 public FangProjectDetailCrawler_JiangYin(CrawlSettings _Settings, BloomFilter <string> _filter, DataOperation _dataop)
 {
     Settings = _Settings; filter = _filter; dataop = _dataop;
 }
Example #31
0
        private static void Main(string[] args)
        {
            Crawler.Sample.BootStrapper.Startup.Configure();

            _IArticlesService = IocContainer.Default.Resolve <IArticlesService>();

            // 启动日志组件
            log4net.Config.XmlConfigurator.Configure();

            // 启动索引管理器
            IndexManager.Instance.Start();

            /*获取IE浏览器收藏夹中的URL
             * //获取IE浏览器收藏夹中的URL
             * BrowserCollection browserCollection = new BrowserCollection();
             * List<string> urlList = browserCollection.GetBrowserCollectionsUrl();
             */

            List <string> urlList = GetHtmlUrlLink(ReadFile(sourceFile));

            //urlList.Add("http://www.ithao123.cn/content-4285584.html");
            //urlList.Add("http://www.cnblogs.com/yangecnu/p/Introduce-RabbitMQ.html");
            //urlList.Add("http://www.cnblogs.com/Andon_liu/p/5401961.html");
            //urlList.Add("http://www.cnblogs.com/lsjwq/p/5509096.html");
            //urlList.Add("http://www.cnblogs.com/kid-blog/p/4796355.html");
            //urlList.Add("http://www.cnblogs.com/ants/p/5122068.html");
            //urlList.Add("http://www.cnblogs.com/zery/p/5215572.html");
            //urlList.Add("http://www.cnblogs.com/JamesLi2015/p/4744008.html");
            //urlList.Add("http://www.cnblogs.com/kklldog/p/helios_chat_room.html");

            filter = new BloomFilter <string>(200000);

            foreach (var url in urlList)
            {
                var result = _IArticlesService.GetByUrl(url);
                if (url.Length > 0 && !result)
                {
                    Settings.SeedsAddress.Add(string.Format(url));
                }
            }

            // 设置 URL 关键字
            //Settings.HrefKeywords.Add(string.Format("/{0}/bj", CityName));
            //Settings.HrefKeywords.Add(string.Format("/{0}/sj", CityName));

            // 设置爬取线程个数
            Settings.ThreadCount = 5;

            // 设置爬取深度
            Settings.Depth = 1;

            // 设置爬取时忽略的 Link,通过后缀名的方式,可以添加多个
            //Settings.EscapeLinks.Add(".jpg");

            // 设置自动限速,1~5 秒随机间隔的自动限速
            Settings.AutoSpeedLimit = false;

            // 设置都是锁定域名,去除二级域名后,判断域名是否相等,相等则认为是同一个站点
            // 例如:mail.pzcast.com 和 www.pzcast.com
            Settings.LockHost = false;

            // 设置请求的 User-Agent HTTP 标头的值
            // settings.UserAgent 已提供默认值,如有特殊需求则自行设置

            // 设置请求页面的超时时间,默认值 15000 毫秒
            // settings.Timeout 按照自己的要求确定超时时间

            // 设置用于过滤的正则表达式
            // settings.RegularFilterExpressions.Add("");
            var master = new CrawlMaster(Settings);

            master.AddUrlEvent       += MasterAddUrlEvent;
            master.DataReceivedEvent += MasterDataReceivedEvent;
            master.Crawl();

            Console.ReadKey();
        }
Example #32
0
        /// <summary>
        /// Once the blockchain headers have been synchronised this method will attempt to find all transactions relevant to a single address.
        /// To find the transactions there are two options: first option the full blocks can be completely downloaded and searched which is what a full node
        /// would do; second option is to set a bloom filter and then request the desired blocks from a connected full node.
        /// </summary>
        private static async Task <List <uint256> > GetTransactions(ConcurrentChain chain, Node node, BitcoinPubKeyAddress addr, DateTimeOffset start, DateTimeOffset end, CancellationToken ct)
        {
            logger.DebugFormat("Transaction search task commencing...");

            ct.ThrowIfCancellationRequested();

            ManualResetEventSlim searchCompleteSignal = new ManualResetEventSlim();
            List <uint256>       txs = new List <uint256>();

            var searchBlocks = chain.ToEnumerable(true).Where(x => x.Header.BlockTime > start && x.Header.BlockTime < end).ToList();

            // Only search if there are some blocks in the period of interest.
            if (searchBlocks.Count() > 0)
            {
                int searchBlocksIndex = 0;

                BloomFilter filter = new BloomFilter(_nElements, _falsePositiveRate, _nTweakIn, BloomFlags.UPDATE_NONE);
                logger.DebugFormat("Setting bloom for address " + addr.Hash + ".");
                filter.Insert(addr.Hash.ToBytes());

                node.MessageReceived += (node1, message) =>
                {
                    switch (message.Message.Payload)
                    {
                    case MerkleBlockPayload merkleBlk:
                        foreach (var tx in merkleBlk.Object.PartialMerkleTree.GetMatchedTransactions())
                        {
                            logger.DebugFormat("Matched merkle block TX ID {0}.", tx);
                            txs.Add(tx);
                        }

                        if (searchBlocksIndex < searchBlocks.Count())
                        {
                            var dp = new GetDataPayload(new InventoryVector(InventoryType.MSG_FILTERED_BLOCK, searchBlocks[searchBlocksIndex++].HashBlock));
                            node.SendMessage(dp);
                        }
                        else
                        {
                            searchCompleteSignal.Set();
                        }

                        break;

                    case TxPayload tx:
                        logger.DebugFormat("TX ID {0}.", tx.Object.GetHash());
                        break;
                    }
                };

                node.SendMessage(new FilterLoadPayload(filter));

                var dataPayload = new GetDataPayload(new InventoryVector(InventoryType.MSG_FILTERED_BLOCK, searchBlocks[searchBlocksIndex++].HashBlock));
                node.SendMessage(dataPayload);

                await Task.Run(() =>
                {
                    searchCompleteSignal.Wait(ct);
                    logger.DebugFormat("Block search task completed.");
                });
            }

            return(txs);
        }
Example #33
0
 /// <summary>
 /// 谁的那个
 /// </summary>
 /// <param name="_Settings"></param>
 /// <param name="filter"></param>
 public LandFangCityRegionEXCrawler(CrawlSettings _Settings, BloomFilter <string> _filter, DataOperation _dataop)
 {
     Settings = _Settings; filter = _filter; dataop = _dataop;
 }
Example #34
0
		public void merkle_block_1()
		{
			// Random real block (0000000000013b8ab2cd513b0261a14096412195a72a0c4827d229dcc7e0f7af)
			// With 9 txes
			Block block = new Block();
			block.ReadWrite(ParseHex("0100000090f0a9f110702f808219ebea1173056042a714bad51b916cb6800000000000005275289558f51c9966699404ae2294730c3c9f9bda53523ce50e9b95e558da2fdb261b4d4c86041b1ab1bf930901000000010000000000000000000000000000000000000000000000000000000000000000ffffffff07044c86041b0146ffffffff0100f2052a01000000434104e18f7afbe4721580e81e8414fc8c24d7cfacf254bb5c7b949450c3e997c2dc1242487a8169507b631eb3771f2b425483fb13102c4eb5d858eef260fe70fbfae0ac00000000010000000196608ccbafa16abada902780da4dc35dafd7af05fa0da08cf833575f8cf9e836000000004a493046022100dab24889213caf43ae6adc41cf1c9396c08240c199f5225acf45416330fd7dbd022100fe37900e0644bf574493a07fc5edba06dbc07c311b947520c2d514bc5725dcb401ffffffff0100f2052a010000001976a914f15d1921f52e4007b146dfa60f369ed2fc393ce288ac000000000100000001fb766c1288458c2bafcfec81e48b24d98ec706de6b8af7c4e3c29419bfacb56d000000008c493046022100f268ba165ce0ad2e6d93f089cfcd3785de5c963bb5ea6b8c1b23f1ce3e517b9f022100da7c0f21adc6c401887f2bfd1922f11d76159cbc597fbd756a23dcbb00f4d7290141042b4e8625a96127826915a5b109852636ad0da753c9e1d5606a50480cd0c40f1f8b8d898235e571fe9357d9ec842bc4bba1827daaf4de06d71844d0057707966affffffff0280969800000000001976a9146963907531db72d0ed1a0cfb471ccb63923446f388ac80d6e34c000000001976a914f0688ba1c0d1ce182c7af6741e02658c7d4dfcd388ac000000000100000002c40297f730dd7b5a99567eb8d27b78758f607507c52292d02d4031895b52f2ff010000008b483045022100f7edfd4b0aac404e5bab4fd3889e0c6c41aa8d0e6fa122316f68eddd0a65013902205b09cc8b2d56e1cd1f7f2fafd60a129ed94504c4ac7bdc67b56fe67512658b3e014104732012cb962afa90d31b25d8fb0e32c94e513ab7a17805c14ca4c3423e18b4fb5d0e676841733cb83abaf975845c9f6f2a8097b7d04f4908b18368d6fc2d68ecffffffffca5065ff9617cbcba45eb23726df6498a9b9cafed4f54cbab9d227b0035ddefb000000008a473044022068010362a13c7f9919fa832b2dee4e788f61f6f5d344a7c2a0da6ae740605658022006d1af525b9a14a35c003b78b72bd59738cd676f845d1ff3fc25049e01003614014104732012cb962afa90d31b25d8fb0e32c94e513ab7a17805c14ca4c3423e18b4fb5d0e676841733cb83abaf975845c9f6f2a8097b7d04f4908b18368d6fc2d68ecffffffff01001ec4110200000043410469ab4181eceb28985b9b4e895c13fa5e68d85761b7eee311db5addef76fa8621865134a221bd01f28ec9999ee3e021e60766e9d1f3458c115fb28650605f11c9ac000000000100000001cdaf2f758e91c514655e2dc50633d1e4c84989f8aa90a0dbc883f0d23ed5c2fa010000008b48304502207ab51be6f12a1962ba0aaaf24a20e0b69b27a94fac5adf45aa7d2d18ffd9236102210086ae728b370e5329eead9accd880d0cb070aea0c96255fae6c4f1ddcce1fd56e014104462e76fd4067b3a0aa42070082dcb0bf2f388b6495cf33d789904f07d0f55c40fbd4b82963c69b3dc31895d0c772c812b1d5fbcade15312ef1c0e8ebbb12dcd4ffffffff02404b4c00000000001976a9142b6ba7c9d796b75eef7942fc9288edd37c32f5c388ac002d3101000000001976a9141befba0cdc1ad56529371864d9f6cb042faa06b588ac000000000100000001b4a47603e71b61bc3326efd90111bf02d2f549b067f4c4a8fa183b57a0f800cb010000008a4730440220177c37f9a505c3f1a1f0ce2da777c339bd8339ffa02c7cb41f0a5804f473c9230220585b25a2ee80eb59292e52b987dad92acb0c64eced92ed9ee105ad153cdb12d001410443bd44f683467e549dae7d20d1d79cbdb6df985c6e9c029c8d0c6cb46cc1a4d3cf7923c5021b27f7a0b562ada113bc85d5fda5a1b41e87fe6e8802817cf69996ffffffff0280651406000000001976a9145505614859643ab7b547cd7f1f5e7e2a12322d3788ac00aa0271000000001976a914ea4720a7a52fc166c55ff2298e07baf70ae67e1b88ac00000000010000000586c62cd602d219bb60edb14a3e204de0705176f9022fe49a538054fb14abb49e010000008c493046022100f2bc2aba2534becbdf062eb993853a42bbbc282083d0daf9b4b585bd401aa8c9022100b1d7fd7ee0b95600db8535bbf331b19eed8d961f7a8e54159c53675d5f69df8c014104462e76fd4067b3a0aa42070082dcb0bf2f388b6495cf33d789904f07d0f55c40fbd4b82963c69b3dc31895d0c772c812b1d5fbcade15312ef1c0e8ebbb12dcd4ffffffff03ad0e58ccdac3df9dc28a218bcf6f1997b0a93306faaa4b3a28ae83447b2179010000008b483045022100be12b2937179da88599e27bb31c3525097a07cdb52422d165b3ca2f2020ffcf702200971b51f853a53d644ebae9ec8f3512e442b1bcb6c315a5b491d119d10624c83014104462e76fd4067b3a0aa42070082dcb0bf2f388b6495cf33d789904f07d0f55c40fbd4b82963c69b3dc31895d0c772c812b1d5fbcade15312ef1c0e8ebbb12dcd4ffffffff2acfcab629bbc8685792603762c921580030ba144af553d271716a95089e107b010000008b483045022100fa579a840ac258871365dd48cd7552f96c8eea69bd00d84f05b283a0dab311e102207e3c0ee9234814cfbb1b659b83671618f45abc1326b9edcc77d552a4f2a805c0014104462e76fd4067b3a0aa42070082dcb0bf2f388b6495cf33d789904f07d0f55c40fbd4b82963c69b3dc31895d0c772c812b1d5fbcade15312ef1c0e8ebbb12dcd4ffffffffdcdc6023bbc9944a658ddc588e61eacb737ddf0a3cd24f113b5a8634c517fcd2000000008b4830450221008d6df731df5d32267954bd7d2dda2302b74c6c2a6aa5c0ca64ecbabc1af03c75022010e55c571d65da7701ae2da1956c442df81bbf076cdbac25133f99d98a9ed34c014104462e76fd4067b3a0aa42070082dcb0bf2f388b6495cf33d789904f07d0f55c40fbd4b82963c69b3dc31895d0c772c812b1d5fbcade15312ef1c0e8ebbb12dcd4ffffffffe15557cd5ce258f479dfd6dc6514edf6d7ed5b21fcfa4a038fd69f06b83ac76e010000008b483045022023b3e0ab071eb11de2eb1cc3a67261b866f86bf6867d4558165f7c8c8aca2d86022100dc6e1f53a91de3efe8f63512850811f26284b62f850c70ca73ed5de8771fb451014104462e76fd4067b3a0aa42070082dcb0bf2f388b6495cf33d789904f07d0f55c40fbd4b82963c69b3dc31895d0c772c812b1d5fbcade15312ef1c0e8ebbb12dcd4ffffffff01404b4c00000000001976a9142b6ba7c9d796b75eef7942fc9288edd37c32f5c388ac00000000010000000166d7577163c932b4f9690ca6a80b6e4eb001f0a2fa9023df5595602aae96ed8d000000008a4730440220262b42546302dfb654a229cefc86432b89628ff259dc87edd1154535b16a67e102207b4634c020a97c3e7bbd0d4d19da6aa2269ad9dded4026e896b213d73ca4b63f014104979b82d02226b3a4597523845754d44f13639e3bf2df5e82c6aab2bdc79687368b01b1ab8b19875ae3c90d661a3d0a33161dab29934edeb36aa01976be3baf8affffffff02404b4c00000000001976a9144854e695a02af0aeacb823ccbc272134561e0a1688ac40420f00000000001976a914abee93376d6b37b5c2940655a6fcaf1c8e74237988ac0000000001000000014e3f8ef2e91349a9059cb4f01e54ab2597c1387161d3da89919f7ea6acdbb371010000008c49304602210081f3183471a5ca22307c0800226f3ef9c353069e0773ac76bb580654d56aa523022100d4c56465bdc069060846f4fbf2f6b20520b2a80b08b168b31e66ddb9c694e240014104976c79848e18251612f8940875b2b08d06e6dc73b9840e8860c066b7e87432c477e9a59a453e71e6d76d5fe34058b800a098fc1740ce3012e8fc8a00c96af966ffffffff02c0e1e400000000001976a9144134e75a6fcb6042034aab5e18570cf1f844f54788ac404b4c00000000001976a9142b6ba7c9d796b75eef7942fc9288edd37c32f5c388ac00000000"));

			BloomFilter filter = new BloomFilter(10, 0.000001, 0, BloomFlags.UPDATE_ALL);
			// Match the last transaction
			filter.Insert(uint256.Parse("0x74d681e0e03bafa802c8aa084379aa98d9fcd632ddc2ed9782b586ec87451f20"));

			MerkleBlock merkleBlock = block.Filter(filter);
			Assert.True(merkleBlock.Header.GetHash() == block.GetHash());
			Assert.True(merkleBlock.PartialMerkleTree.Check(block.Header.HashMerkleRoot));

			var vMatchedTxn = merkleBlock.PartialMerkleTree.GetMatchedTransactions().ToList();
			Assert.True(vMatchedTxn.Count == 1);

			AssertMatch(block, vMatchedTxn, "0x74d681e0e03bafa802c8aa084379aa98d9fcd632ddc2ed9782b586ec87451f20", 0, 8);

			// Also match the 8th transaction
			filter.Insert(uint256.Parse("0xdd1fd2a6fc16404faf339881a90adbde7f4f728691ac62e8f168809cdfae1053"));
			merkleBlock = block.Filter(filter);
			vMatchedTxn = merkleBlock.PartialMerkleTree.GetMatchedTransactions().ToList();
			Assert.True(merkleBlock.PartialMerkleTree.Check(block.Header.HashMerkleRoot));

			Assert.True(merkleBlock.Header.GetHash() == block.GetHash());
			Assert.True(vMatchedTxn.Count == 2);

			AssertMatch(block, vMatchedTxn, "0x74d681e0e03bafa802c8aa084379aa98d9fcd632ddc2ed9782b586ec87451f20", 1, 8);


			AssertMatch(block, vMatchedTxn, "0xdd1fd2a6fc16404faf339881a90adbde7f4f728691ac62e8f168809cdfae1053", 0, 7);
		}
Example #35
0
        public void Setup_BloomFilter()
        {
            var byteConverter = new ByteConverterStringMarshal();

            _bloomFilter = new BloomFilter <string>(ItemsToInsert, MaximumErrorRate, byteConverter);
        }
Example #36
0
		public void merkle_block_3_and_serialize()
		{
			// Random real block (000000000000dab0130bbcc991d3d7ae6b81aa6f50a798888dfe62337458dc45)
			// With one tx
			Block block = new Block();
			block.ReadWrite(ParseHex("0100000079cda856b143d9db2c1caff01d1aecc8630d30625d10e8b4b8b0000000000000b50cc069d6a3e33e3ff84a5c41d9d3febe7c770fdcc96b2c3ff60abe184f196367291b4d4c86041b8fa45d630101000000010000000000000000000000000000000000000000000000000000000000000000ffffffff08044c86041b020a02ffffffff0100f2052a01000000434104ecd3229b0571c3be876feaac0442a9f13c5a572742927af1dc623353ecf8c202225f64868137a18cdd85cbbb4c74fbccfd4f49639cf1bdc94a5672bb15ad5d4cac00000000"));

			BloomFilter filter = new BloomFilter(10, 0.000001, 0, BloomFlags.UPDATE_ALL);
			// Match the only transaction
			filter.Insert(uint256.Parse("0x63194f18be0af63f2c6bc9dc0f777cbefed3d9415c4af83f3ee3a3d669c00cb5"));

			MerkleBlock merkleBlock = new MerkleBlock(block, filter);
			Assert.True(merkleBlock.Header.GetHash() == block.GetHash());
			Assert.True(merkleBlock.PartialMerkleTree.Check(block.Header.HashMerkleRoot));

			var vMatchedTxn = merkleBlock.PartialMerkleTree.GetMatchedTransactions().ToList();
			Assert.True(vMatchedTxn.Count == 1);

			AssertMatch(block, vMatchedTxn, "0x63194f18be0af63f2c6bc9dc0f777cbefed3d9415c4af83f3ee3a3d669c00cb5", 0, 0);

			byte[] vch = ParseHex("0100000079cda856b143d9db2c1caff01d1aecc8630d30625d10e8b4b8b0000000000000b50cc069d6a3e33e3ff84a5c41d9d3febe7c770fdcc96b2c3ff60abe184f196367291b4d4c86041b8fa45d630100000001b50cc069d6a3e33e3ff84a5c41d9d3febe7c770fdcc96b2c3ff60abe184f19630101");

			AssertEx.CollectionEquals(merkleBlock.ToBytes(), vch);
		}
Example #37
0
        public void InitBloomFilter_GetHashCount_ReturnHashCount()
        {
            BloomFilter bloomFilter = new BloomFilter(dictionary, 150, 2);

            Assert.IsTrue(bloomFilter.HashCount == 2);
        }
Example #38
0
 /// <summary>
 /// 谁的那个
 /// </summary>
 /// <param name="_Settings"></param>
 /// <param name="filter"></param>
 public FangListCrawler_WenZhou(CrawlSettings _Settings, BloomFilter <string> _filter, DataOperation _dataop)
 {
     Settings = _Settings; filter = _filter; dataop = _dataop;
 }
Example #39
0
 private void OnSetFilter(BloomFilter filter)
 {
     bloom_filter = filter;
 }
Example #40
0
        public void Contains_WithFreshFilter_ShouldReturnFalse()
        {
            var filter = BloomFilter <int> .Create(50, 0.02);

            Assert.False(filter.Contains(42));
        }
Example #41
0
        /// <summary>
        /// The main.
        /// </summary>
        /// <param name="args">
        /// The args.
        /// </param>
        private static void Main(string[] args)
        {
            filter = new BloomFilter<string>(200000);

            const string CityName = "beijing";

                // 设置种子地址
                 //Settings.SeedsAddress.Add(string.Format("http://jobs.zhaopin.com/{0}", CityName));
                 // Settings.SeedsAddress.Add(string.Format("http://www.fzhouse.com.cn:7002/result_new.asp"));

            // 设置 URL 关键字
            //Settings.HrefKeywords.Add(string.Format("/{0}/bj", CityName));
            //Settings.HrefKeywords.Add(string.Format("/{0}/sj", CityName));

            //Settings.HrefKeywords.Add(string.Format("building.asp?ProjectID="));
            //Settings.HrefKeywords.Add(string.Format("result_new"));
            // 设置爬取线程个数
            Settings.ThreadCount = 5;
            // Settings.ThreadCount = 1;
            // 设置爬取深度
            Settings.Depth = 27;

            // 设置爬取时忽略的 Link,通过后缀名的方式,可以添加多个
            Settings.EscapeLinks.Add(".jpg");

            // 设置自动限速,1~5 秒随机间隔的自动限速
            Settings.AutoSpeedLimit = false;

            // 设置都是锁定域名,去除二级域名后,判断域名是否相等,相等则认为是同一个站点
            // 例如:mail.pzcast.com 和 www.pzcast.com
            Settings.LockHost = false;

            // 设置请求的 User-Agent HTTP 标头的值
            // settings.UserAgent 已提供默认值,如有特殊需求则自行设置

            // 设置请求页面的超时时间,默认值 15000 毫秒
            // settings.Timeout 按照自己的要求确定超时时间

            // 设置用于过滤的正则表达式
            // settings.RegularFilterExpressions.Add("");

            //云风Bloginit初始化
            //YunFengBlogInit();
            JGZFBlogInit();
            var master = new CrawlMaster(Settings);
            master.AddUrlEvent += MasterAddUrlEvent;
            master.DataReceivedEvent += MasterDataReceivedEvent;
            master.Crawl();
            //Console.WriteLine("遍历结束");
            Console.ReadKey();
        }
        ///// <summary>
        /////  分类信息
        ///// </summary>
        //public string DataTableNameCategory
        //{
        //    get { return "CategoryInfo_MT"; }

        //}

        /// <summary>
        ///  构造函数
        /// </summary>
        /// <param name="_Settings"></param>
        /// <param name="filter"></param>
        public DoctorHospitalDoctorSheduleAPPCrawler(CrawlSettings _Settings, BloomFilter <string> _filter, DataOperation _dataop)
        {
            Settings   = _Settings; filter = _filter; dataop = _dataop;
            guidFilter = new BloomFilter <string>(9000000);
        }
Example #43
0
        public void InitBloomFilter_GetBloomFilterSize_ReturnBloomFilterSize()
        {
            BloomFilter bloomFilter = new BloomFilter(dictionary, 150, 2);

            Assert.IsTrue(bloomFilter.BloomFilterSize == 150);
        }
Example #44
0
        public void CanGetMerkleRoot()
        {
            using (var builder = NodeBuilderEx.Create())
            {
                var node       = builder.CreateNode(true);
                var rpc        = node.CreateRPCClient();
                var nodeClient = node.CreateNodeClient();
                rpc.Generate(101);

                var knownAddresses = new List <TxDestination>();
                var batch          = rpc.PrepareBatch();
                for (var i = 0; i < 20; i++)
                {
                    var address = new Key().PubKey.GetAddress(rpc.Network);
                    knownAddresses.Add(address.Hash);
#pragma warning disable CS4014
                    batch.SendCommandAsync("sendtoaddress", address.ToString(), "0.5");
#pragma warning restore CS4014
                }

                batch.SendBatch();
                knownAddresses = knownAddresses.Take(10).ToList();
                var blockId = rpc.Generate(1)[0];
                var block   = rpc.GetBlock(blockId);
                Assert.Equal(21, block.Transactions.Count);
                var knownTx = block.Transactions[1].GetHash();
                nodeClient.VersionHandshake();
                using (var list = nodeClient.CreateListener()
                                  .Where(m => m.Message.Payload is MerkleBlockPayload || m.Message.Payload is TxPayload))
                {
                    var filter = new BloomFilter(1, 0.0001, 50, BloomFlags.UPDATE_NONE);
                    foreach (var a in knownAddresses)
                    {
                        filter.Insert(a.ToBytes());
                    }

                    nodeClient.SendMessageAsync(new FilterLoadPayload(filter));
                    nodeClient.SendMessageAsync(
                        new GetDataPayload(new InventoryVector(InventoryType.MSG_FILTERED_BLOCK, block.GetHash())));
                    var merkle = list.ReceivePayload <MerkleBlockPayload>();
                    var tree   = merkle.Object.PartialMerkleTree;
                    Assert.True(tree.Check(block.Header.HashMerkleRoot));
                    Assert.True(tree.GetMatchedTransactions().Count() >= 10);
                    Assert.True(tree.GetMatchedTransactions().Contains(knownTx));

                    var matched = new List <Transaction>();
                    for (var i = 0; i < tree.GetMatchedTransactions().Count(); i++)
                    {
                        matched.Add(list.ReceivePayload <TxPayload>().Object);
                    }

                    Assert.True(matched.Count >= 10);
                    tree = tree.Trim(knownTx);
                    Assert.True(tree.GetMatchedTransactions().Count() == 1);
                    Assert.True(tree.GetMatchedTransactions().Contains(knownTx));

                    Action act = () =>
                    {
                        foreach (var match in matched)
                        {
                            Assert.True(filter.IsRelevantAndUpdate(match));
                        }
                    };
                    act();
                    filter = filter.Clone();
                    act();

                    var unknownBlock =
                        uint256.Parse("00000000ad262227291eaf90cafdc56a8f8451e2d7653843122c5bb0bf2dfcdd");
                    nodeClient.SendMessageAsync(new GetDataPayload(new InventoryVector(InventoryType.MSG_FILTERED_BLOCK,
                                                                                       Network.RegTest.GetGenesis().GetHash())));

                    merkle = list.ReceivePayload <MerkleBlockPayload>();
                    tree   = merkle.Object.PartialMerkleTree;
                    Assert.True(tree.Check(merkle.Object.Header.HashMerkleRoot));
                    Assert.True(!tree.GetMatchedTransactions().Contains(knownTx));
                }
            }
        }
Example #45
0
		public void merkle_block_2_with_update_none()
		{
			// Random real block (000000005a4ded781e667e06ceefafb71410b511fe0d5adc3e5a27ecbec34ae6)
			// With 4 txes
			Block block = new Block();
			block.ReadWrite(ParseHex("0100000075616236cc2126035fadb38deb65b9102cc2c41c09cdf29fc051906800000000fe7d5e12ef0ff901f6050211249919b1c0653771832b3a80c66cea42847f0ae1d4d26e49ffff001d00f0a4410401000000010000000000000000000000000000000000000000000000000000000000000000ffffffff0804ffff001d029105ffffffff0100f2052a010000004341046d8709a041d34357697dfcb30a9d05900a6294078012bf3bb09c6f9b525f1d16d5503d7905db1ada9501446ea00728668fc5719aa80be2fdfc8a858a4dbdd4fbac00000000010000000255605dc6f5c3dc148b6da58442b0b2cd422be385eab2ebea4119ee9c268d28350000000049483045022100aa46504baa86df8a33b1192b1b9367b4d729dc41e389f2c04f3e5c7f0559aae702205e82253a54bf5c4f65b7428551554b2045167d6d206dfe6a2e198127d3f7df1501ffffffff55605dc6f5c3dc148b6da58442b0b2cd422be385eab2ebea4119ee9c268d2835010000004847304402202329484c35fa9d6bb32a55a70c0982f606ce0e3634b69006138683bcd12cbb6602200c28feb1e2555c3210f1dddb299738b4ff8bbe9667b68cb8764b5ac17b7adf0001ffffffff0200e1f505000000004341046a0765b5865641ce08dd39690aade26dfbf5511430ca428a3089261361cef170e3929a68aee3d8d4848b0c5111b0a37b82b86ad559fd2a745b44d8e8d9dfdc0cac00180d8f000000004341044a656f065871a353f216ca26cef8dde2f03e8c16202d2e8ad769f02032cb86a5eb5e56842e92e19141d60a01928f8dd2c875a390f67c1f6c94cfc617c0ea45afac0000000001000000025f9a06d3acdceb56be1bfeaa3e8a25e62d182fa24fefe899d1c17f1dad4c2028000000004847304402205d6058484157235b06028c30736c15613a28bdb768ee628094ca8b0030d4d6eb0220328789c9a2ec27ddaec0ad5ef58efded42e6ea17c2e1ce838f3d6913f5e95db601ffffffff5f9a06d3acdceb56be1bfeaa3e8a25e62d182fa24fefe899d1c17f1dad4c2028010000004a493046022100c45af050d3cea806cedd0ab22520c53ebe63b987b8954146cdca42487b84bdd6022100b9b027716a6b59e640da50a864d6dd8a0ef24c76ce62391fa3eabaf4d2886d2d01ffffffff0200e1f505000000004341046a0765b5865641ce08dd39690aade26dfbf5511430ca428a3089261361cef170e3929a68aee3d8d4848b0c5111b0a37b82b86ad559fd2a745b44d8e8d9dfdc0cac00180d8f000000004341046a0765b5865641ce08dd39690aade26dfbf5511430ca428a3089261361cef170e3929a68aee3d8d4848b0c5111b0a37b82b86ad559fd2a745b44d8e8d9dfdc0cac000000000100000002e2274e5fea1bf29d963914bd301aa63b64daaf8a3e88f119b5046ca5738a0f6b0000000048473044022016e7a727a061ea2254a6c358376aaa617ac537eb836c77d646ebda4c748aac8b0220192ce28bf9f2c06a6467e6531e27648d2b3e2e2bae85159c9242939840295ba501ffffffffe2274e5fea1bf29d963914bd301aa63b64daaf8a3e88f119b5046ca5738a0f6b010000004a493046022100b7a1a755588d4190118936e15cd217d133b0e4a53c3c15924010d5648d8925c9022100aaef031874db2114f2d869ac2de4ae53908fbfea5b2b1862e181626bb9005c9f01ffffffff0200e1f505000000004341044a656f065871a353f216ca26cef8dde2f03e8c16202d2e8ad769f02032cb86a5eb5e56842e92e19141d60a01928f8dd2c875a390f67c1f6c94cfc617c0ea45afac00180d8f000000004341046a0765b5865641ce08dd39690aade26dfbf5511430ca428a3089261361cef170e3929a68aee3d8d4848b0c5111b0a37b82b86ad559fd2a745b44d8e8d9dfdc0cac00000000"));

			BloomFilter filter = new BloomFilter(10, 0.000001, 0, BloomFlags.UPDATE_NONE);
			// Match the first transaction
			filter.Insert(uint256.Parse("0xe980fe9f792d014e73b95203dc1335c5f9ce19ac537a419e6df5b47aecb93b70"));

			MerkleBlock merkleBlock = new MerkleBlock(block, filter);
			Assert.True(merkleBlock.Header.GetHash() == block.GetHash());
			Assert.True(merkleBlock.PartialMerkleTree.Check(block.Header.HashMerkleRoot));

			var vMatchedTxn = merkleBlock.PartialMerkleTree.GetMatchedTransactions().ToList();

			Assert.True(vMatchedTxn.Count == 1);

			AssertMatch(block, vMatchedTxn, "0xe980fe9f792d014e73b95203dc1335c5f9ce19ac537a419e6df5b47aecb93b70", 0, 0);


			// Match an output from the second transaction (the pubkey for address 1DZTzaBHUDM7T3QvUKBz4qXMRpkg8jsfB5)
			// This should not match the third transaction though it spends the output matched
			// It will match the fourth transaction, which has another pay-to-pubkey output to the same address
			filter.Insert(ParseHex("044a656f065871a353f216ca26cef8dde2f03e8c16202d2e8ad769f02032cb86a5eb5e56842e92e19141d60a01928f8dd2c875a390f67c1f6c94cfc617c0ea45af"));

			merkleBlock = new MerkleBlock(block, filter);
			Assert.True(merkleBlock.Header.GetHash() == block.GetHash());
			Assert.True(merkleBlock.PartialMerkleTree.Check(block.Header.HashMerkleRoot));

			vMatchedTxn = merkleBlock.PartialMerkleTree.GetMatchedTransactions().ToList();

			Assert.True(vMatchedTxn.Count == 3);

			AssertMatch(block, vMatchedTxn, "0xe980fe9f792d014e73b95203dc1335c5f9ce19ac537a419e6df5b47aecb93b70", 0, 0);
			AssertMatch(block, vMatchedTxn, "0x28204cad1d7fc1d199e8ef4fa22f182de6258a3eaafe1bbe56ebdcacd3069a5f", 1, 1);
			AssertMatch(block, vMatchedTxn, "0x3c1d7e82342158e4109df2e0b6348b6e84e403d8b4046d7007663ace63cddb23", 2, 3);


		}
Example #46
0
        public async Task BloomFilter_can_be_round_tripped_through_JSON_serialization()
        {
            var filter = new BloomFilter(capacity: 10000);

            filter.Add("one");
            filter.Add("two");
            filter.Add("three");

            var json = JsonConvert.SerializeObject(filter, Formatting.Indented);

            Console.WriteLine(json);

            var filter2 = JsonConvert.DeserializeObject<BloomFilter>(json);

            filter2.MayContain("one").Should().BeTrue();
            filter2.MayContain("two").Should().BeTrue();
            filter2.MayContain("three").Should().BeTrue();
            filter2.MayContain("false").Should().BeFalse();
        }
Example #47
0
		public void merkle_block_4_test_update_none()
		{
			// Random real block (000000000000b731f2eef9e8c63173adfb07e41bd53eb0ef0a6b720d6cb6dea4)
			// With 7 txes
			Block block = new Block();
			block.ReadWrite(ParseHex("0100000082bb869cf3a793432a66e826e05a6fc37469f8efb7421dc880670100000000007f16c5962e8bd963659c793ce370d95f093bc7e367117b3c30c1f8fdd0d9728776381b4d4c86041b554b85290701000000010000000000000000000000000000000000000000000000000000000000000000ffffffff07044c86041b0136ffffffff0100f2052a01000000434104eaafc2314def4ca98ac970241bcab022b9c1e1f4ea423a20f134c876f2c01ec0f0dd5b2e86e7168cefe0d81113c3807420ce13ad1357231a2252247d97a46a91ac000000000100000001bcad20a6a29827d1424f08989255120bf7f3e9e3cdaaa6bb31b0737fe048724300000000494830450220356e834b046cadc0f8ebb5a8a017b02de59c86305403dad52cd77b55af062ea10221009253cd6c119d4729b77c978e1e2aa19f5ea6e0e52b3f16e32fa608cd5bab753901ffffffff02008d380c010000001976a9142b4b8072ecbba129b6453c63e129e643207249ca88ac0065cd1d000000001976a9141b8dd13b994bcfc787b32aeadf58ccb3615cbd5488ac000000000100000003fdacf9b3eb077412e7a968d2e4f11b9a9dee312d666187ed77ee7d26af16cb0b000000008c493046022100ea1608e70911ca0de5af51ba57ad23b9a51db8d28f82c53563c56a05c20f5a87022100a8bdc8b4a8acc8634c6b420410150775eb7f2474f5615f7fccd65af30f310fbf01410465fdf49e29b06b9a1582287b6279014f834edc317695d125ef623c1cc3aaece245bd69fcad7508666e9c74a49dc9056d5fc14338ef38118dc4afae5fe2c585caffffffff309e1913634ecb50f3c4f83e96e70b2df071b497b8973a3e75429df397b5af83000000004948304502202bdb79c596a9ffc24e96f4386199aba386e9bc7b6071516e2b51dda942b3a1ed022100c53a857e76b724fc14d45311eac5019650d415c3abb5428f3aae16d8e69bec2301ffffffff2089e33491695080c9edc18a428f7d834db5b6d372df13ce2b1b0e0cbcb1e6c10000000049483045022100d4ce67c5896ee251c810ac1ff9ceccd328b497c8f553ab6e08431e7d40bad6b5022033119c0c2b7d792d31f1187779c7bd95aefd93d90a715586d73801d9b47471c601ffffffff0100714460030000001976a914c7b55141d097ea5df7a0ed330cf794376e53ec8d88ac0000000001000000045bf0e214aa4069a3e792ecee1e1bf0c1d397cde8dd08138f4b72a00681743447000000008b48304502200c45de8c4f3e2c1821f2fc878cba97b1e6f8807d94930713aa1c86a67b9bf1e40221008581abfef2e30f957815fc89978423746b2086375ca8ecf359c85c2a5b7c88ad01410462bb73f76ca0994fcb8b4271e6fb7561f5c0f9ca0cf6485261c4a0dc894f4ab844c6cdfb97cd0b60ffb5018ffd6238f4d87270efb1d3ae37079b794a92d7ec95ffffffffd669f7d7958d40fc59d2253d88e0f248e29b599c80bbcec344a83dda5f9aa72c000000008a473044022078124c8beeaa825f9e0b30bff96e564dd859432f2d0cb3b72d3d5d93d38d7e930220691d233b6c0f995be5acb03d70a7f7a65b6bc9bdd426260f38a1346669507a3601410462bb73f76ca0994fcb8b4271e6fb7561f5c0f9ca0cf6485261c4a0dc894f4ab844c6cdfb97cd0b60ffb5018ffd6238f4d87270efb1d3ae37079b794a92d7ec95fffffffff878af0d93f5229a68166cf051fd372bb7a537232946e0a46f53636b4dafdaa4000000008c493046022100c717d1714551663f69c3c5759bdbb3a0fcd3fab023abc0e522fe6440de35d8290221008d9cbe25bffc44af2b18e81c58eb37293fd7fe1c2e7b46fc37ee8c96c50ab1e201410462bb73f76ca0994fcb8b4271e6fb7561f5c0f9ca0cf6485261c4a0dc894f4ab844c6cdfb97cd0b60ffb5018ffd6238f4d87270efb1d3ae37079b794a92d7ec95ffffffff27f2b668859cd7f2f894aa0fd2d9e60963bcd07c88973f425f999b8cbfd7a1e2000000008c493046022100e00847147cbf517bcc2f502f3ddc6d284358d102ed20d47a8aa788a62f0db780022100d17b2d6fa84dcaf1c95d88d7e7c30385aecf415588d749afd3ec81f6022cecd701410462bb73f76ca0994fcb8b4271e6fb7561f5c0f9ca0cf6485261c4a0dc894f4ab844c6cdfb97cd0b60ffb5018ffd6238f4d87270efb1d3ae37079b794a92d7ec95ffffffff0100c817a8040000001976a914b6efd80d99179f4f4ff6f4dd0a007d018c385d2188ac000000000100000001834537b2f1ce8ef9373a258e10545ce5a50b758df616cd4356e0032554ebd3c4000000008b483045022100e68f422dd7c34fdce11eeb4509ddae38201773dd62f284e8aa9d96f85099d0b002202243bd399ff96b649a0fad05fa759d6a882f0af8c90cf7632c2840c29070aec20141045e58067e815c2f464c6a2a15f987758374203895710c2d452442e28496ff38ba8f5fd901dc20e29e88477167fe4fc299bf818fd0d9e1632d467b2a3d9503b1aaffffffff0280d7e636030000001976a914f34c3e10eb387efe872acb614c89e78bfca7815d88ac404b4c00000000001976a914a84e272933aaf87e1715d7786c51dfaeb5b65a6f88ac00000000010000000143ac81c8e6f6ef307dfe17f3d906d999e23e0189fda838c5510d850927e03ae7000000008c4930460221009c87c344760a64cb8ae6685a3eec2c1ac1bed5b88c87de51acd0e124f266c16602210082d07c037359c3a257b5c63ebd90f5a5edf97b2ac1c434b08ca998839f346dd40141040ba7e521fa7946d12edbb1d1e95a15c34bd4398195e86433c92b431cd315f455fe30032ede69cad9d1e1ed6c3c4ec0dbfced53438c625462afb792dcb098544bffffffff0240420f00000000001976a9144676d1b820d63ec272f1900d59d43bc6463d96f888ac40420f00000000001976a914648d04341d00d7968b3405c034adc38d4d8fb9bd88ac00000000010000000248cc917501ea5c55f4a8d2009c0567c40cfe037c2e71af017d0a452ff705e3f1000000008b483045022100bf5fdc86dc5f08a5d5c8e43a8c9d5b1ed8c65562e280007b52b133021acd9acc02205e325d613e555f772802bf413d36ba807892ed1a690a77811d3033b3de226e0a01410429fa713b124484cb2bd7b5557b2c0b9df7b2b1fee61825eadc5ae6c37a9920d38bfccdc7dc3cb0c47d7b173dbc9db8d37db0a33ae487982c59c6f8606e9d1791ffffffff41ed70551dd7e841883ab8f0b16bf04176b7d1480e4f0af9f3d4c3595768d068000000008b4830450221008513ad65187b903aed1102d1d0c47688127658c51106753fed0151ce9c16b80902201432b9ebcb87bd04ceb2de66035fbbaf4bf8b00d1cfe41f1a1f7338f9ad79d210141049d4cf80125bf50be1709f718c07ad15d0fc612b7da1f5570dddc35f2a352f0f27c978b06820edca9ef982c35fda2d255afba340068c5035552368bc7200c1488ffffffff0100093d00000000001976a9148edb68822f1ad580b043c7b3df2e400f8699eb4888ac00000000"));

			BloomFilter filter = new BloomFilter(10, 0.000001, 0, BloomFlags.UPDATE_NONE);
			// Match the generation pubkey
			filter.Insert(ParseHex("04eaafc2314def4ca98ac970241bcab022b9c1e1f4ea423a20f134c876f2c01ec0f0dd5b2e86e7168cefe0d81113c3807420ce13ad1357231a2252247d97a46a91"));
			// ...and the output address of the 4th transaction
			filter.Insert(ParseHex("b6efd80d99179f4f4ff6f4dd0a007d018c385d21"));

			MerkleBlock merkleBlock = new MerkleBlock(block, filter);
			Assert.True(merkleBlock.Header.GetHash() == block.GetHash());

			// We shouldn't match any outpoints (UPDATE_NONE)
			Assert.True(!filter.Contains(new OutPoint(uint256.Parse("0x147caa76786596590baa4e98f5d9f48b86c7765e489f7a6ff3360fe5c674360b"), 0)));
			Assert.True(!filter.Contains((new OutPoint(uint256.Parse("0x02981fa052f0481dbc5868f4fc2166035a10f27a03cfd2de67326471df5bc041"), 0))));
		}
Example #48
0
 public void BitArrayToStringTest()
 {
     Approvals.Verify("Bit Aray of 1024:" + BloomFilter.BitArrayToString(new BitArray(new int[] { 1024 })));
 }
Example #49
0
		public void bloom_match()
		{
			// Random real transaction (b4749f017444b051c44dfd2720e88f314ff94f3dd6d56d40ef65854fcd7fff6b)
			Transaction tx = new Transaction();
			tx.ReadWrite(ParseHex("01000000010b26e9b7735eb6aabdf358bab62f9816a21ba9ebdb719d5299e88607d722c190000000008b4830450220070aca44506c5cef3a16ed519d7c3c39f8aab192c4e1c90d065f37b8a4af6141022100a8e160b856c2d43d27d8fba71e5aef6405b8643ac4cb7cb3c462aced7f14711a0141046d11fee51b0e60666d5049a9101a72741df480b96ee26488a4d3466b95c9a40ac5eeef87e10a5cd336c19a84565f80fa6c547957b7700ff4dfbdefe76036c339ffffffff021bff3d11000000001976a91404943fdd508053c75000106d3bc6e2754dbcff1988ac2f15de00000000001976a914a266436d2965547608b9e15d9032a7b9d64fa43188ac00000000"));


			// and one which spends it (e2769b09e784f32f62ef849763d4f45b98e07ba658647343b915ff832b110436)
			var ch = new byte[] { 0x01, 0x00, 0x00, 0x00, 0x01, 0x6b, 0xff, 0x7f, 0xcd, 0x4f, 0x85, 0x65, 0xef, 0x40, 0x6d, 0xd5, 0xd6, 0x3d, 0x4f, 0xf9, 0x4f, 0x31, 0x8f, 0xe8, 0x20, 0x27, 0xfd, 0x4d, 0xc4, 0x51, 0xb0, 0x44, 0x74, 0x01, 0x9f, 0x74, 0xb4, 0x00, 0x00, 0x00, 0x00, 0x8c, 0x49, 0x30, 0x46, 0x02, 0x21, 0x00, 0xda, 0x0d, 0xc6, 0xae, 0xce, 0xfe, 0x1e, 0x06, 0xef, 0xdf, 0x05, 0x77, 0x37, 0x57, 0xde, 0xb1, 0x68, 0x82, 0x09, 0x30, 0xe3, 0xb0, 0xd0, 0x3f, 0x46, 0xf5, 0xfc, 0xf1, 0x50, 0xbf, 0x99, 0x0c, 0x02, 0x21, 0x00, 0xd2, 0x5b, 0x5c, 0x87, 0x04, 0x00, 0x76, 0xe4, 0xf2, 0x53, 0xf8, 0x26, 0x2e, 0x76, 0x3e, 0x2d, 0xd5, 0x1e, 0x7f, 0xf0, 0xbe, 0x15, 0x77, 0x27, 0xc4, 0xbc, 0x42, 0x80, 0x7f, 0x17, 0xbd, 0x39, 0x01, 0x41, 0x04, 0xe6, 0xc2, 0x6e, 0xf6, 0x7d, 0xc6, 0x10, 0xd2, 0xcd, 0x19, 0x24, 0x84, 0x78, 0x9a, 0x6c, 0xf9, 0xae, 0xa9, 0x93, 0x0b, 0x94, 0x4b, 0x7e, 0x2d, 0xb5, 0x34, 0x2b, 0x9d, 0x9e, 0x5b, 0x9f, 0xf7, 0x9a, 0xff, 0x9a, 0x2e, 0xe1, 0x97, 0x8d, 0xd7, 0xfd, 0x01, 0xdf, 0xc5, 0x22, 0xee, 0x02, 0x28, 0x3d, 0x3b, 0x06, 0xa9, 0xd0, 0x3a, 0xcf, 0x80, 0x96, 0x96, 0x8d, 0x7d, 0xbb, 0x0f, 0x91, 0x78, 0xff, 0xff, 0xff, 0xff, 0x02, 0x8b, 0xa7, 0x94, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x19, 0x76, 0xa9, 0x14, 0xba, 0xde, 0xec, 0xfd, 0xef, 0x05, 0x07, 0x24, 0x7f, 0xc8, 0xf7, 0x42, 0x41, 0xd7, 0x3b, 0xc0, 0x39, 0x97, 0x2d, 0x7b, 0x88, 0xac, 0x40, 0x94, 0xa8, 0x02, 0x00, 0x00, 0x00, 0x00, 0x19, 0x76, 0xa9, 0x14, 0xc1, 0x09, 0x32, 0x48, 0x3f, 0xec, 0x93, 0xed, 0x51, 0xf5, 0xfe, 0x95, 0xe7, 0x25, 0x59, 0xf2, 0xcc, 0x70, 0x43, 0xf9, 0x88, 0xac, 0x00, 0x00, 0x00, 0x00, 0x00 };
			var vch = ch.Take(ch.Length - 1).ToArray();

			Transaction spendingTx = new Transaction();
			spendingTx.ReadWrite(vch);

			BloomFilter filter = new BloomFilter(10, 0.000001, 0, BloomFlags.UPDATE_ALL);
			filter.Insert(uint256.Parse("0xb4749f017444b051c44dfd2720e88f314ff94f3dd6d56d40ef65854fcd7fff6b"));
			Assert.True(filter.IsRelevantAndUpdate(tx), "Simple Bloom filter didn't match tx hash");

			filter = new BloomFilter(10, 0.000001, 0, BloomFlags.UPDATE_ALL);
			// byte-reversed tx hash
			filter.Insert(ParseHex("6bff7fcd4f8565ef406dd5d63d4ff94f318fe82027fd4dc451b04474019f74b4"));
			Assert.True(filter.IsRelevantAndUpdate(tx), "Simple Bloom filter didn't match manually serialized tx hash");

			filter = new BloomFilter(10, 0.000001, 0, BloomFlags.UPDATE_ALL);
			filter.Insert(ParseHex("30450220070aca44506c5cef3a16ed519d7c3c39f8aab192c4e1c90d065f37b8a4af6141022100a8e160b856c2d43d27d8fba71e5aef6405b8643ac4cb7cb3c462aced7f14711a01"));
			Assert.True(filter.IsRelevantAndUpdate(tx), "Simple Bloom filter didn't match input signature");

			filter = new BloomFilter(10, 0.000001, 0, BloomFlags.UPDATE_ALL);
			filter.Insert(ParseHex("046d11fee51b0e60666d5049a9101a72741df480b96ee26488a4d3466b95c9a40ac5eeef87e10a5cd336c19a84565f80fa6c547957b7700ff4dfbdefe76036c339"));
			Assert.True(filter.IsRelevantAndUpdate(tx), "Simple Bloom filter didn't match input pub key");

			filter = new BloomFilter(10, 0.000001, 0, BloomFlags.UPDATE_ALL);
			filter.Insert(ParseHex("04943fdd508053c75000106d3bc6e2754dbcff19"));
			Assert.True(filter.IsRelevantAndUpdate(tx), "Simple Bloom filter didn't match output address");
			Assert.True(filter.IsRelevantAndUpdate(spendingTx), "Simple Bloom filter didn't add output");

			filter = new BloomFilter(10, 0.000001, 0, BloomFlags.UPDATE_ALL);
			filter.Insert(ParseHex("a266436d2965547608b9e15d9032a7b9d64fa431"));
			Assert.True(filter.IsRelevantAndUpdate(tx), "Simple Bloom filter didn't match output address");

			filter = new BloomFilter(10, 0.000001, 0, BloomFlags.UPDATE_ALL);
			filter.Insert(new OutPoint(uint256.Parse("0x90c122d70786e899529d71dbeba91ba216982fb6ba58f3bdaab65e73b7e9260b"), 0));
			Assert.True(filter.IsRelevantAndUpdate(tx), "Simple Bloom filter didn't match COutPoint");

			filter = new BloomFilter(10, 0.000001, 0, BloomFlags.UPDATE_ALL);
			OutPoint prevOutPoint = new OutPoint(uint256.Parse("0x90c122d70786e899529d71dbeba91ba216982fb6ba58f3bdaab65e73b7e9260b"), 0);
			{
				var data = prevOutPoint.ToBytes();
				filter.Insert(data);
			}
			Assert.True(filter.IsRelevantAndUpdate(tx), "Simple Bloom filter didn't match manually serialized COutPoint");

			filter = new BloomFilter(10, 0.000001, 0, BloomFlags.UPDATE_ALL);
			filter.Insert(uint256.Parse("00000009e784f32f62ef849763d4f45b98e07ba658647343b915ff832b110436"));
			Assert.True(!filter.IsRelevantAndUpdate(tx), "Simple Bloom filter matched random tx hash");

			filter = new BloomFilter(10, 0.000001, 0, BloomFlags.UPDATE_ALL);
			filter.Insert(ParseHex("0000006d2965547608b9e15d9032a7b9d64fa431"));
			Assert.True(!filter.IsRelevantAndUpdate(tx), "Simple Bloom filter matched random address");

			filter = new BloomFilter(10, 0.000001, 0, BloomFlags.UPDATE_ALL);
			filter.Insert(new OutPoint(uint256.Parse("0x90c122d70786e899529d71dbeba91ba216982fb6ba58f3bdaab65e73b7e9260b"), 1));
			Assert.True(!filter.IsRelevantAndUpdate(tx), "Simple Bloom filter matched COutPoint for an output we didn't care about");

			filter = new BloomFilter(10, 0.000001, 0, BloomFlags.UPDATE_ALL);
			filter.Insert(new OutPoint(uint256.Parse("0x000000d70786e899529d71dbeba91ba216982fb6ba58f3bdaab65e73b7e9260b"), 0));
			Assert.True(!filter.IsRelevantAndUpdate(tx), "Simple Bloom filter matched COutPoint for an output we didn't care about");
		}
 /// <summary>
 /// Reset duplicate check.
 /// </summary>
 public void ResetDuplicateCheck()
 {
     _counter = Interlocked.Exchange(ref _counter, 0);
     _bloomFilter.Clear();
     _bloomFilter = new BloomFilter(_options.FalsePositiveProbability, _options.ExpectedInsertions);
 }
Example #51
0
        /// <summary>
        /// The main.
        /// </summary>
        /// <param name="args">
        /// The args.
        /// </param>
        private static void Main(string[] args)
        {
            filter = new BloomFilter<string>(200000);
            //const string CityName = "beijing";

            // 设置种子地址
            //Settings.SeedsAddress.Add(string.Format("http://jobs.zhaopin.com/{0}", CityName));//
            //Settings.SeedsAddress.Add("http://news.sdau.edu.cn/list.php?pid=3"); sdau
            Settings.SeedsAddress.Add("http://www.shdrc.gov.cn/gcxm/sub1.jsp?lb=001001");
            //Settings.SeedsAddress.Add("   ");
            // 设置 URL 关键字
            //Settings.HrefKeywords.Add(string.Format("/{0}/bj", CityName));
            //Settings.HrefKeywords.Add(string.Format("/{0}/sj", CityName));

            // 设置爬取线程个数
            Settings.ThreadCount = 1;

            // 设置爬取深度
            Settings.Depth = 62;//页码数+1

            // 设置爬取时忽略的 Link,通过后缀名的方式,可以添加多个
            Settings.EscapeLinks.Add(".jpg");

            // 设置自动限速,1~5 秒随机间隔的自动限速
            Settings.AutoSpeedLimit = false;

            // 设置都是锁定域名,去除二级域名后,判断域名是否相等,相等则认为是同一个站点
            // 例如:mail.pzcast.com 和 www.pzcast.com
            Settings.LockHost = false;

            // 设置请求的 User-Agent HTTP 标头的值
            // settings.UserAgent 已提供默认值,如有特殊需求则自行设置

            // 设置请求页面的超时时间,默认值 15000 毫秒
            // settings.Timeout 按照自己的要求确定超时时间

            // 设置用于过滤的正则表达式
            //Settings.RegularFilterExpressions.Add("<a .+ href='(.+)'>下一页</a>");//  string strReg = "<a .+ href='(.+)'>下一页</a>";

            var master = new CrawlMaster(Settings);
            master.AddUrlEvent += MasterAddUrlEvent;
            master.DataReceivedEvent += MasterDataReceivedEvent;
            // master.CustomParseLinkEvent2 += Master_CustomParseLinkEvent2;
            master.CustomParseLinkEvent3 += Master_CustomParseLinkEvent3;
            master.Crawl();

            Console.ReadKey();
        }
Example #52
0
 private void OnFilterLoadMessageReceived(FilterLoadPayload payload)
 {
     bloom_filter = new BloomFilter(payload.Filter.Length * 8, payload.K, payload.Tweak, payload.Filter);
 }
Example #53
0
        public void CanGetMerkleRoot()
        {
            using (var builder = NodeBuilder.Create())
            {
                var node = builder.CreateNode(true).CreateNodeClient();
                builder.Nodes[0].Generate(101);
                var rpc = builder.Nodes[0].CreateRPCClient();
                builder.Nodes[0].Split(Money.Coins(50m), 50);
                builder.Nodes[0].SelectMempoolTransactions();
                builder.Nodes[0].Generate(1);
                for (int i = 0; i < 20; i++)
                {
                    rpc.SendToAddress(new Key().PubKey.GetAddress(rpc.Network), Money.Coins(0.5m));
                }
                builder.Nodes[0].SelectMempoolTransactions();
                builder.Nodes[0].Generate(1);
                var block        = builder.Nodes[0].CreateRPCClient().GetBlock(103);
                var knownTx      = block.Transactions[0].GetHash();
                var knownAddress = block.Transactions[0].Outputs[0].ScriptPubKey.GetDestination();
                node.VersionHandshake();
                using (var list = node.CreateListener()
                                  .Where(m => m.Message.Payload is MerkleBlockPayload || m.Message.Payload is TxPayload))
                {
                    BloomFilter filter = new BloomFilter(1, 0.005, 50, BloomFlags.UPDATE_NONE);
                    filter.Insert(knownAddress.ToBytes());
                    node.SendMessageAsync(new FilterLoadPayload(filter));
                    node.SendMessageAsync(new GetDataPayload(new InventoryVector(InventoryType.MSG_FILTERED_BLOCK, block.GetHash())));
                    var merkle = list.ReceivePayload <MerkleBlockPayload>();
                    var tree   = merkle.Object.PartialMerkleTree;
                    Assert.True(tree.Check(block.Header.HashMerkleRoot));
                    Assert.True(tree.GetMatchedTransactions().Count() > 1);
                    Assert.True(tree.GetMatchedTransactions().Contains(knownTx));

                    List <Transaction> matched = new List <Transaction>();
                    for (int i = 0; i < tree.GetMatchedTransactions().Count(); i++)
                    {
                        matched.Add(list.ReceivePayload <TxPayload>().Object);
                    }
                    Assert.True(matched.Count > 1);
                    tree = tree.Trim(knownTx);
                    Assert.True(tree.GetMatchedTransactions().Count() == 1);
                    Assert.True(tree.GetMatchedTransactions().Contains(knownTx));

                    Action act = () =>
                    {
                        foreach (var match in matched)
                        {
                            Assert.True(filter.IsRelevantAndUpdate(match));
                        }
                    };
                    act();
                    filter = filter.Clone();
                    act();

                    var unknownBlock = uint256.Parse("00000000ad262227291eaf90cafdc56a8f8451e2d7653843122c5bb0bf2dfcdd");
                    node.SendMessageAsync(new GetDataPayload(new InventoryVector(InventoryType.MSG_FILTERED_BLOCK, Network.RegTest.GetGenesis().GetHash())));

                    merkle = list.ReceivePayload <MerkleBlockPayload>();
                    tree   = merkle.Object.PartialMerkleTree;
                    Assert.True(tree.Check(merkle.Object.Header.HashMerkleRoot));
                    Assert.True(!tree.GetMatchedTransactions().Contains(knownTx));
                }
            }
        }
Example #54
0
 /// <summary>
 /// 谁的那个
 /// </summary>
 /// <param name="_Settings"></param>
 /// <param name="filter"></param>
 public LandFangUserUpdateCrawler(CrawlSettings _Settings, BloomFilter <string> _filter, DataOperation _dataop)
 {
     Settings = _Settings; filter = _filter; dataop = _dataop;
 }
Example #55
0
        public void Size_comparison_vs_full_set()
        {
            var capacity = 100000;
            var filter = new BloomFilter(capacity, .0000001);
            var list = Enumerable.Range(1, capacity).Select(i => Guid.NewGuid().ToString()).ToList();

            foreach (var s in list)
            {
                filter.Add(s);
            }

            File.WriteAllText(@"c:\temp\list.txt", string.Join("", list));
            File.WriteAllText(@"c:\temp\filter.txt", filter.ToString());
        }
Example #56
0
 public BloomFilterDuplicateRemover(int expectedInsertions, double fpp)
 {
     _expectedInsertions = expectedInsertions;
     _fpp         = fpp;
     _bloomFilter = RebuildBloomFilter();
 }
 public LiteralInfo(BloomFilter literalsFilter)
 {
     _literalsFilter = literalsFilter ?? throw new ArgumentNullException(nameof(literalsFilter));
 }
 /// <summary>
 /// 构造方法
 /// </summary>
 public BloomFilterDuplicateRemover(BloomFilterOptions options)
 {
     _options     = options;
     _bloomFilter = new BloomFilter(_options.FalsePositiveProbability, _options.ExpectedInsertions);
     _counter     = 0;
 }
Example #59
0
 private void OnFilterClearMessageReceived()
 {
     bloom_filter = null;
 }
Example #60
0
 /// <summary>
 /// 谁的那个
 /// </summary>
 /// <param name="_Settings"></param>
 /// <param name="filter"></param>
 public PlantDetailListCrawler(CrawlSettings _Settings, BloomFilter <string> _filter, DataOperation _dataop)
 {
     Settings = _Settings; filter = _filter; dataop = _dataop;
 }