public bool Any(Func <XPathHandler, XPathHandler> xpath) { XPathHandler xpathHandler = new XPathHandler(); var node = this._currentNode; if (xpath != null) { node = node.SelectSingleNode(xpath(xpathHandler).GetPath()); } if (node != null) { return(HtmlHandler.GetImplement(_logPrefix, node) != null); } return(false); }
public async Task <string> GetHtmlAsync(string key, int retry) { var taskData = _tasks[key]; var webHandler = WebHandler.GetImplement(_logPrefix, taskData.ParserEnging, _proxy); Task task = Task.Run(() => webHandler.LoadHtml(taskData.Url, taskData.HttpTimeout), _cancellationToken); Logger.Info($"URL[{taskData.Url}] Parser Start. Retry[{taskData.Retry - retry}] use VPS[{webHandler.ProxyName}]"); if (await Task.WhenAny(task, Task.Delay(TimeSpan.FromSeconds(taskData.HttpTimeout - 2), _cancellationToken)) != task) { Logger.Info($"URL[{taskData.Url}] Parser Failure, because timeout. Retry[{taskData.Retry - retry}] use VPS[{webHandler.ProxyName}]"); webHandler.Cancel(); } var content = webHandler.Content; var parserResult = !string.IsNullOrEmpty(content); if (parserResult && taskData.Filter != null) { parserResult = taskData.Filter(HtmlHandler.GetImplement(_logPrefix, content), content); } if (webHandler.IsLoadSuccess == LoadStatusEnum.Success && parserResult) { Logger.Info($"URL[{taskData.Url}] Parser Success. Retry[{taskData.Retry - retry}] use VPS[{webHandler.ProxyName}]"); return(content); } else { if (webHandler.IsLoadSuccess == LoadStatusEnum.Success && !parserResult) { Logger.Info($"URL[{taskData.Url}] Parser Success. but content filter failure. Retry[{taskData.Retry - retry}] use VPS[{webHandler.ProxyName}]"); } if (retry > 0) { await Task.Delay(1500); return(await GetHtmlAsync(key, (retry - 1))); } else { Logger.Info($"URL[{taskData.Url}] Parser Failure. use VPS[{webHandler.ProxyName}]"); return(""); } } }
public IDictionary <int, string> Gets(Func <XPathHandler, XPathHandler> xpath, Func <string, string> strHandler) { XPathHandler xpathHandler = new XPathHandler(); var node = this._currentNode; var nodes = node.ChildNodes; if (xpath != null) { nodes = node.SelectNodes(xpath(xpathHandler).GetPath()); } if (nodes != null && nodes.Count > 0) { return((from a in nodes select this.StrHandle(strHandler, HtmlHandler.GetImplement(_logPrefix, a).InnerHtml)) .Select((value, index) => new { value, index }) .ToDictionary(a => a.index, a => a.value)); } return(null); }
public T Get <T>(Func <XPathHandler, XPathHandler> xpath, Func <string, string> strHandler) { XPathHandler xpathHandler = new XPathHandler(); var node = this._currentNode; if (xpath != null) { node = node.SelectSingleNode(xpath(xpathHandler).GetPath()); } if (node != null) { var result = this.StrHandle(strHandler, HtmlHandler.GetImplement(_logPrefix, node).InnerHtml).Trim(); if (!string.IsNullOrEmpty(result)) { return((T)Convert.ChangeType(result, typeof(T)));; } } return(default(T)); }