/// <summary> /// 开始下载提取 /// </summary> /// <param name="item"></param> private void _begin(UrlItem item) { var request = new Request(); var html = request.Get(item.Url); #region 控制深度 if (item.Depth < Depth || Depth <= 0) { var htmlHelper = new Html(html); var links = htmlHelper.GetLinks(item.Url); foreach (var newItem in from link in links let newItem = new UrlItem(link, item.Depth + 1, UrlStatus.Waiting) where !UrlsList.Contains <UrlItem>(newItem, new UrlItemComparer()) && (Kind != 1 || htmlHelper.Comparer(item.Url, link)) select newItem) { lock (_object) { Application.Current.Dispatcher.Invoke(() => { UrlsList.Add(newItem); }); } } } if (!string.IsNullOrEmpty(_pattern) && Regex.IsMatch(html, _pattern)) { return; } var helper = new NumberHelper(); var lists = helper.GetNumberWithHtml(html); #endregion lock (_object) { _numberList.AddRange(lists); _numberList = _numberList.Distinct().ToList(); Application.Current.Dispatcher.Invoke(() => { Message = $"目前已提取{_numberList.Count}个号码!"; }); } }
private void ExecuteStartCommand() { if (UrlsList.Count == 0) { UrlsList.Add(new UrlItem(UrlHelper.GetUrl(BaseUrl))); } Message = "自动爬虫启动!"; IsEnable = false; _init(); if (!string.IsNullOrWhiteSpace(Keywords)) { _pattern = Keywords.Replace(" ", "|"); } #region 创造主线程,去分配多个下载线程 _tokenSource = new CancellationTokenSource(); var token = _tokenSource.Token; Task.Factory.StartNew(() => { var index = 0; while (!token.IsCancellationRequested) { #region 创建执行下载的线程数组 var tasksLength = Math.Min(Count, UrlsList.Count - index); var tasks = new Task[tasksLength]; for (var i = 0; i < tasksLength; i++) { var index1 = index; tasks[i] = new Task(() => { _begin(UrlsList[index1]); }); index++; } #endregion #region 监视线程数组完成 var continuation = Task.Factory.ContinueWhenAll(tasks, (task) => { }, token); foreach (var task in tasks) { task.Start(); } while (!continuation.IsCompleted) { Thread.Sleep(1000); } #endregion _changedStatus(index - tasksLength, tasksLength, UrlStatus.Completed); if (index >= UrlsList.Count) { _tokenSource.Cancel(); break; } } Application.Current.Dispatcher.Invoke(() => { IsEnable = true; }); }, token); #endregion #region 计时器 做自动导出操作 var token1 = _tokenSource.Token; Task.Factory.StartNew(() => { var time = Space; while (!token1.IsCancellationRequested) { Thread.Sleep(10000); time--; if (time > 0 && !token1.IsCancellationRequested) { continue; } var file = ExportHelper.ExportRandomName(_numberList, "爬虫导出-"); lock (_object) { _numberList.Clear(); Message = "执行自动保存操作,保存到:" + file; } time = Space; } }, token1); #endregion }