Exemplo n.º 1
0
        static void Main(string[] args)
        {
            Console.WriteLine("Init");
            InitRecvThread();

            SpiderSeeds seeds = new SpiderSeeds();
            for (int i = 0; i <= 100; ++i)
            {
                seeds.AddSeed("https://kyfw.12306.cn/otn/passcodeNew/getPassCodeNew?module=login&rand=sjrand&" + r.NextDouble().ToString().Substring(0, 10));
            }

            IUrlReader reader = new ImageReader();

            SpiderSetting setting = new SpiderSetting();
            setting.Depth = 1;
            setting.WorkerCount = 8;
            setting.Seeds = seeds;
            setting.Reader = reader;

            SpiderMaster master = new SpiderMaster();
            master.Init(setting);
            master.DataReceivedEventHandler += OnDataEvent;

            Console.WriteLine("Begin");
            master.Crawl();

            master.Stop();
            StopRecvThread();
            Console.WriteLine("search Finish");
            Console.ReadKey();
        }
Exemplo n.º 2
0
 public static string UrlRead(string url, SpiderSetting setting)
 {
     if(url != null && setting.Reader != null)
     {
         return setting.Reader.Read(url);
     }
     return null;
 }
Exemplo n.º 3
0
 public static bool UrlMatch(string url, SpiderSetting setting)
 {
     if (url != null && setting.Match != null)
     {
         return setting.Match.Match(url);
     }
     return false;
 }
Exemplo n.º 4
0
 public static bool UrlFilter(string url, SpiderSetting setting)
 {
     if (url != null && setting.Filter != null)
     {
         return setting.Filter.Filter(url);
     }
     return false;
 }
Exemplo n.º 5
0
 public static string UrlRead(string url, SpiderSetting setting)
 {
     if (url != null && setting.Reader != null)
     {
         return(setting.Reader.Read(url));
     }
     return(null);
 }
Exemplo n.º 6
0
 public static bool UrlMatch(string url, SpiderSetting setting)
 {
     if (url != null && setting.Match != null)
     {
         return(setting.Match.Match(url));
     }
     return(false);
 }
Exemplo n.º 7
0
 public static bool UrlFilter(string url, SpiderSetting setting)
 {
     if (url != null && setting.Filter != null)
     {
         return(setting.Filter.Filter(url));
     }
     return(false);
 }
Exemplo n.º 8
0
 public void Init(SpiderSetting setting)
 {
     Setting = setting;
     Workers = new SpiderWorkerList(urlQueue);
     Workers.CreateWorker(setting.WorkerCount);
     Workers.SetWorkAction(WorkerAction);
     Workers.Start();
 }
Exemplo n.º 9
0
 public void Init(SpiderSetting setting)
 {
     Setting = setting;
     Workers = new SpiderWorkerList(urlQueue);
     Workers.CreateWorker(setting.WorkerCount);
     Workers.SetWorkAction(WorkerAction);
     Workers.Start();
 }
Exemplo n.º 10
0
        static void Main(string[] args)
        {
            Console.WriteLine("Init");
            InitRecvThread();

            SpiderSeeds seeds = new SpiderSeeds();
            for(int i = 0; i <= 10; ++i)
            {
                seeds.AddSeed("http://www.cozy.com/cn/actresses/currentPage/" + i);
            }

            IUrlMatch match = new FindStringMatch()
            {
                StringFind  = "www.cozy.com/cn/star/",
                NoCase      = true
            };

            IUrlFilter filter = new BloomFilter();

            IUrlReader reader = new DefaultReader();

            SpiderSetting setting = new SpiderSetting();
            setting.Depth           = 1;
            setting.WorkerCount     = 8;
            setting.Seeds           = seeds;
            setting.Match           = match;
            setting.Filter          = filter;
            setting.Reader          = reader;

            SpiderMaster master = new SpiderMaster();
            master.Init(setting);
            master.AddUrlEventHandler       += OnEvent;
            master.DataReceivedEventHandler += OnEvent;
            master.ErrorEventHandler        += OnEvent;

            Console.WriteLine("Begin");
            master.Crawl();

            master.Stop();
            StopRecvThread();
            Console.WriteLine("search Finish");

            var parsetask = new Task(Parse, 5);
            parsetask.Start();
            parsetask.Wait();

            Console.WriteLine("Parse Finish");
            foreach (var obj in InfoList)
            {
                Console.WriteLine(obj.ToString());
            }
            Console.ReadKey();
        }
Exemplo n.º 11
0
 public static void Seed2Queue(
     UrlAddressQueue urlQueue,
     SpiderSetting setting)
 {
     if (setting.Seeds != null && urlQueue != null)
     {
         var c = setting.Seeds.GetSeeds();
         foreach (var i in c)
         {
             urlQueue.EnQueue(new UrlInfo(i, 0));
         }
     }
 }
Exemplo n.º 12
0
 public static void Seed2Queue(
     UrlAddressQueue urlQueue,
     SpiderSetting setting)
 {
     if (setting.Seeds != null && urlQueue != null)
     {
         var c = setting.Seeds.GetSeeds();
         foreach (var i in c)
         {
             urlQueue.EnQueue(new UrlInfo(i, 0));
         }
     }
 }
Exemplo n.º 13
0
        static void Main(string[] args)
        {
            Console.WriteLine("Init");
            InitRecvThread();

            SpiderSeeds seeds = new SpiderSeeds();
            seeds.AddSeed("http://www.javfee.com/cn");
            IUrlMatch match = new FindStringMatch()
            {
                StringFind  = "www.javfee.com",
                NoCase      = true
            };

            IUrlFilter filter = new BloomFilter();

            IUrlReader reader = new DefaultReader();

            SpiderSetting setting = new SpiderSetting();
            setting.Depth           = 2;
            setting.WorkerCount     = 8;
            setting.Seeds           = seeds;
            setting.Match           = match;
            setting.Filter          = filter;
            setting.Reader          = reader;

            SpiderMaster master = new SpiderMaster();
            master.Init(setting);
            master.AddUrlEventHandler       += OnEvent;
            master.DataReceivedEventHandler += OnEvent;
            master.ErrorEventHandler        += OnEvent;

            Console.WriteLine("Begin");
            master.Crawl();

            master.Stop();
            StopRecvThread();
            Console.WriteLine("Finish");
            Console.ReadKey();
        }