static void Main(string[] args) { Console.WriteLine("Init"); InitRecvThread(); SpiderSeeds seeds = new SpiderSeeds(); for (int i = 0; i <= 100; ++i) { seeds.AddSeed("https://kyfw.12306.cn/otn/passcodeNew/getPassCodeNew?module=login&rand=sjrand&" + r.NextDouble().ToString().Substring(0, 10)); } IUrlReader reader = new ImageReader(); SpiderSetting setting = new SpiderSetting(); setting.Depth = 1; setting.WorkerCount = 8; setting.Seeds = seeds; setting.Reader = reader; SpiderMaster master = new SpiderMaster(); master.Init(setting); master.DataReceivedEventHandler += OnDataEvent; Console.WriteLine("Begin"); master.Crawl(); master.Stop(); StopRecvThread(); Console.WriteLine("search Finish"); Console.ReadKey(); }
public static string UrlRead(string url, SpiderSetting setting) { if(url != null && setting.Reader != null) { return setting.Reader.Read(url); } return null; }
public static bool UrlMatch(string url, SpiderSetting setting) { if (url != null && setting.Match != null) { return setting.Match.Match(url); } return false; }
public static bool UrlFilter(string url, SpiderSetting setting) { if (url != null && setting.Filter != null) { return setting.Filter.Filter(url); } return false; }
public static string UrlRead(string url, SpiderSetting setting) { if (url != null && setting.Reader != null) { return(setting.Reader.Read(url)); } return(null); }
public static bool UrlMatch(string url, SpiderSetting setting) { if (url != null && setting.Match != null) { return(setting.Match.Match(url)); } return(false); }
public static bool UrlFilter(string url, SpiderSetting setting) { if (url != null && setting.Filter != null) { return(setting.Filter.Filter(url)); } return(false); }
public void Init(SpiderSetting setting) { Setting = setting; Workers = new SpiderWorkerList(urlQueue); Workers.CreateWorker(setting.WorkerCount); Workers.SetWorkAction(WorkerAction); Workers.Start(); }
static void Main(string[] args) { Console.WriteLine("Init"); InitRecvThread(); SpiderSeeds seeds = new SpiderSeeds(); for(int i = 0; i <= 10; ++i) { seeds.AddSeed("http://www.cozy.com/cn/actresses/currentPage/" + i); } IUrlMatch match = new FindStringMatch() { StringFind = "www.cozy.com/cn/star/", NoCase = true }; IUrlFilter filter = new BloomFilter(); IUrlReader reader = new DefaultReader(); SpiderSetting setting = new SpiderSetting(); setting.Depth = 1; setting.WorkerCount = 8; setting.Seeds = seeds; setting.Match = match; setting.Filter = filter; setting.Reader = reader; SpiderMaster master = new SpiderMaster(); master.Init(setting); master.AddUrlEventHandler += OnEvent; master.DataReceivedEventHandler += OnEvent; master.ErrorEventHandler += OnEvent; Console.WriteLine("Begin"); master.Crawl(); master.Stop(); StopRecvThread(); Console.WriteLine("search Finish"); var parsetask = new Task(Parse, 5); parsetask.Start(); parsetask.Wait(); Console.WriteLine("Parse Finish"); foreach (var obj in InfoList) { Console.WriteLine(obj.ToString()); } Console.ReadKey(); }
public static void Seed2Queue( UrlAddressQueue urlQueue, SpiderSetting setting) { if (setting.Seeds != null && urlQueue != null) { var c = setting.Seeds.GetSeeds(); foreach (var i in c) { urlQueue.EnQueue(new UrlInfo(i, 0)); } } }
static void Main(string[] args) { Console.WriteLine("Init"); InitRecvThread(); SpiderSeeds seeds = new SpiderSeeds(); seeds.AddSeed("http://www.javfee.com/cn"); IUrlMatch match = new FindStringMatch() { StringFind = "www.javfee.com", NoCase = true }; IUrlFilter filter = new BloomFilter(); IUrlReader reader = new DefaultReader(); SpiderSetting setting = new SpiderSetting(); setting.Depth = 2; setting.WorkerCount = 8; setting.Seeds = seeds; setting.Match = match; setting.Filter = filter; setting.Reader = reader; SpiderMaster master = new SpiderMaster(); master.Init(setting); master.AddUrlEventHandler += OnEvent; master.DataReceivedEventHandler += OnEvent; master.ErrorEventHandler += OnEvent; Console.WriteLine("Begin"); master.Crawl(); master.Stop(); StopRecvThread(); Console.WriteLine("Finish"); Console.ReadKey(); }