private static void Parser() { QueryElement qe; while (ParserWork) { while (WaitingQueue.Count != 0) { qe = WaitingQueue.Dequeue(); qe.DoJob(); DoneQueue.Enqueue(qe); qe = null; } Thread.Sleep(50); } }
public void HandUrl(UrlGroup urlGroup, int depath) { if (DoneQueue.Contains(urlGroup)) { return; } string html = HttpHelper.GetString(urlGroup.Url, Encoding.Default); lock (DoneQueueLock) { DoneQueue.Enqueue(urlGroup); } //1、获取页面所有url List <string> listAllUrl = MatchDomainURL(html); //2、过滤:去掉外站、js、图片等url string[] extArray = new string[] { ".jpg", ".png", ".gif", ".js" }; List <string> mlstUrl = new List <string>(); foreach (string url in listAllUrl) { var tmpurl = url.ToLower(); bool isfile = false; foreach (string ext in extArray) { if (tmpurl.Contains(ext)) { isfile = true; break; } } //匹配特殊url SepcialMatch(url); if (!isfile && !DoneQueue.Contains(url) && !UrlQueue.Contains(url)) { mlstUrl.Add(url); } } if (depath + 1 > MaxSearchDepth) { return; //达到最大搜索深度 } lock (UrlQueueLock) { foreach (string url in mlstUrl) { UrlQueue.Enqueue(new UrlGroup() { Url = url, DepathNumber = depath + 1 }); if (UrlQueue.Count > 10000) { Thread.Sleep(3000); } else if (UrlQueue.Count > 50000) { Thread.Sleep(10000); } } } }