public void OnFailed(string processUrl, int processLevel, Exception ex) { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine($"{processLevel}, {processUrl}, {CollectTools.GetExceptionMessages(ex)}"); Console.ResetColor(); //delete url from 0 //move url to 1 CollectionStore.MoveUrl(rootUrl, new CollectionStore.UrlModel() { Url = processUrl, Level = processLevel, localFileName = null }); }
public string LoadHtml() { if (string.IsNullOrEmpty(this.url)) { throw new ArgumentNullException("Url"); } string htmlContent = CollectTools.GetHtmlContent(url, encoding); if (string.IsNullOrEmpty(htmlContent)) { throw new Exception($"Load html content by url = {url} and encoding = {encoding.ToString()} failure."); } HtmlDocument document = new HtmlDocument(); document.LoadHtml(htmlContent); this.htmlNode = document.DocumentNode; return(htmlContent); }
public string Save(string rootUrl, ExtractResult result) { if (result == null || result.ContentResults == null) { return(null); } if (result.ContentResults.ContainsKey("title") && !result.ContentResults["title"].HasValue()) { return(null); } if (result.ContentResults.ContainsKey("content") && !result.ContentResults["content"].HasValue()) { return(null); } string saveDir = $"E:\\hwl-collect\\{HttpUtility.UrlEncode(rootUrl)}"; string savePath = $"{saveDir}\\{Guid.NewGuid().ToString()}.txt"; try { if (!Directory.Exists(saveDir)) { Directory.CreateDirectory(saveDir); } File.AppendAllLines(savePath, new string[] { result.OriginUrl, result.Level.ToString(), result.ConvertContentToJsonString() }, Encoding.UTF8); return(savePath); } catch (Exception ex) { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine($"Save html content to path of {savePath} failed, {CollectTools.GetExceptionMessages(ex)}."); Console.ResetColor(); } return(null); }