/// <summary> /// 处理网页 /// </summary> /// <param name="html"></param> protected override void Process(string html) { try { Regex re = new Regex(@"href=(?<web_url>[\s\S]*?)>|href=""(?<web_url>[\s\S]*?)""|href='(?<web_url>[\s\S]*?)'"); MatchCollection mc = re.Matches(html); foreach (Match m in mc) { string url = m.Groups["web_url"].ToString(); url = this.RemoveQuotation(url); if (url.IndexOf("http://") != -1) { UrlQueue.GetInstance().Enqueue(url); } } string title = string.Empty; re = new Regex(@"<title[\s\S]*?>(?<title>[\s\S]*?)</title>"); Match temp = re.Match(html.ToLower()); title = temp.Groups["title"].ToString(); if (!string.IsNullOrEmpty(title)) { Console.WriteLine(string.Format("网页标题:{0}", title)); Console.WriteLine(string.Format("网页URL:{0}", this.Url)); } } catch { } }
static void Main(string[] args) { try { Console.Title = System.Configuration.ConfigurationManager.AppSettings["Title"].ToString(); Console.WriteLine("Process is running!"); string url = System.Configuration.ConfigurationManager.AppSettings["URL"].ToString(); UrlQueue.GetInstance().Enqueue(url); ThreadManager thread = new ThreadManager(); thread.Start(); } catch (Exception ex) { } Console.ReadLine(); }