public Task Run() { IAggregateFluent <FailChar> query = null; if (failCode.HasValue) { query = failCharSet.Where(w => w.FailCode == failCode.Value); } else { query = failCharSet.Aggregate(); } return(query.ForEachAsync(async f => { try { using (var httpClient = new HttpClient()) { httpClient.MaxResponseContentBufferSize = 256000; httpClient.DefaultRequestHeaders.Add("user-agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.143 Safari/537.36"); ChineseChar chineseChar = await CrawlerWorkItem.GetOneChinese(httpClient, uri, f.Unicode, len1, len2); if (chineseChar == null) { CrawlerWorkItem.PrintFail(0, f.Unicode, f.Text); CrawlerWorkItem.AddFailChar(failCharSet, 0, f.Unicode, f.Text, ""); return; } if (chineseCharSet.Any(a => a.Unicode == f.Unicode) == false) { chineseCharSet.Add(chineseChar); } failCharSet.Remove(f.Id); Console.WriteLine($"已成功修复 Unicode:{f.Unicode} 内容:{f.Text}"); } } catch (Exception ex) { CrawlerWorkItem.PrintFail(1, f.Unicode, f.Text); CrawlerWorkItem.AddFailChar(failCharSet, 1, f.Unicode, f.Text, ex.Message); } finally { await Task.Delay(200); } }).ContinueWith(t => { var foregroundColor = ConsoleColor.White; if (t.IsCompletedSuccessfully == false) { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine(t.Exception.ToString()); Console.ForegroundColor = foregroundColor; } t.Dispose(); })); }
async Task Init() { await Task.Delay(100); crawlerInfo = crawlerInfoSet.FirstOrDefault(a => a.BeginUnicode == beginUnicode && a.EndUnicode == endUnicode); if (crawlerInfo == null) { crawlerInfo = new CrawlerInfo { BeginUnicode = beginUnicode, EndUnicode = endUnicode, Unicode = 0, ModifiedTime = DateTime.Now, }; crawlerInfoSet.Add(crawlerInfo); } }
public static void AddFailChar(IMongoCollection <FailChar> failCharSet, int failCode, ushort unicode, string text, string failMessage) { FailChar failChar = null; if ((failChar = failCharSet.FirstOrDefault(f => f.Unicode == unicode)) != null) { failChar.FailCode = failCode; failChar.FailMessage = failMessage; failChar.ModifiedTime = DateTime.Now; failCharSet.Update(failChar); } else { failCharSet.Add(new FailChar { FailCode = 1, Unicode = unicode, Text = text, FailMessage = failMessage, CreatedTime = DateTime.Now, ModifiedTime = DateTime.Now, }); } }
public Task GetTask() { Task task = Task.Run(async() => { await Init(); ushort begIndex = crawlerInfo.Unicode == 0 ? beginUnicode : crawlerInfo.Unicode; if (crawlerInfo.IsCompleted) { return; } string text = string.Empty; if (chineseCharSet.Any(a => a.Unicode == begIndex)) { begIndex++; } for (ushort i = begIndex; i <= endUnicode; ++i) { text = Convert.ToChar(i).ToString(); try { ChineseChar chineseChar = await GetOneChinese(httpClient, uri, i, len1, len2); if (chineseChar == null) { PrintFail(0, i, text); AddFailChar(failCharSet, 0, i, text, ""); continue; } chineseCharSet.Add(chineseChar); crawlerInfo.Unicode = i; crawlerInfo.ModifiedTime = DateTime.Now; crawlerInfoSet.Update(crawlerInfo); Console.WriteLine($"完成记录 Unicode编码:{i} 字符内容:{text}"); } catch (Exception ex) { PrintFail(1, i, text); AddFailChar(failCharSet, 1, i, text, ex.Message); } finally { await Task.Delay(200); } } }).ContinueWith(t => { var foregroundColor = ConsoleColor.White; if (t.IsCompletedSuccessfully == false) { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine(t.Exception.ToString()); Console.ForegroundColor = foregroundColor; } Console.ForegroundColor = ConsoleColor.Green; Console.WriteLine($"{beginUnicode}~{endUnicode} 爬虫完成"); Console.ForegroundColor = foregroundColor; httpClient.Dispose(); if (t.IsCompletedSuccessfully) { crawlerInfo.IsCompleted = true; crawlerInfoSet.Update(crawlerInfo); } t.Dispose(); }); return(task); }