Beispiel #1
0
        static void Main(string[] args)
        {
            Console.WriteLine("Init");
            InitRecvThread();

            SpiderSeeds seeds = new SpiderSeeds();
            for(int i = 0; i <= 10; ++i)
            {
                seeds.AddSeed("http://www.cozy.com/cn/actresses/currentPage/" + i);
            }

            IUrlMatch match = new FindStringMatch()
            {
                StringFind  = "www.cozy.com/cn/star/",
                NoCase      = true
            };

            IUrlFilter filter = new BloomFilter();

            IUrlReader reader = new DefaultReader();

            SpiderSetting setting = new SpiderSetting();
            setting.Depth           = 1;
            setting.WorkerCount     = 8;
            setting.Seeds           = seeds;
            setting.Match           = match;
            setting.Filter          = filter;
            setting.Reader          = reader;

            SpiderMaster master = new SpiderMaster();
            master.Init(setting);
            master.AddUrlEventHandler       += OnEvent;
            master.DataReceivedEventHandler += OnEvent;
            master.ErrorEventHandler        += OnEvent;

            Console.WriteLine("Begin");
            master.Crawl();

            master.Stop();
            StopRecvThread();
            Console.WriteLine("search Finish");

            var parsetask = new Task(Parse, 5);
            parsetask.Start();
            parsetask.Wait();

            Console.WriteLine("Parse Finish");
            foreach (var obj in InfoList)
            {
                Console.WriteLine(obj.ToString());
            }
            Console.ReadKey();
        }
Beispiel #2
0
        static void Main(string[] args)
        {
            Console.WriteLine("Init");
            InitRecvThread();

            SpiderSeeds seeds = new SpiderSeeds();
            seeds.AddSeed("http://www.javfee.com/cn");
            IUrlMatch match = new FindStringMatch()
            {
                StringFind  = "www.javfee.com",
                NoCase      = true
            };

            IUrlFilter filter = new BloomFilter();

            IUrlReader reader = new DefaultReader();

            SpiderSetting setting = new SpiderSetting();
            setting.Depth           = 2;
            setting.WorkerCount     = 8;
            setting.Seeds           = seeds;
            setting.Match           = match;
            setting.Filter          = filter;
            setting.Reader          = reader;

            SpiderMaster master = new SpiderMaster();
            master.Init(setting);
            master.AddUrlEventHandler       += OnEvent;
            master.DataReceivedEventHandler += OnEvent;
            master.ErrorEventHandler        += OnEvent;

            Console.WriteLine("Begin");
            master.Crawl();

            master.Stop();
            StopRecvThread();
            Console.WriteLine("Finish");
            Console.ReadKey();
        }