Esempio n. 1
0
        public bool Any(Func <XPathHandler, XPathHandler> xpath)
        {
            XPathHandler xpathHandler = new XPathHandler();

            var node = this._currentNode;

            if (xpath != null)
            {
                node = node.SelectSingleNode(xpath(xpathHandler).GetPath());
            }
            if (node != null)
            {
                return(HtmlHandler.GetImplement(_logPrefix, node) != null);
            }
            return(false);
        }
Esempio n. 2
0
        public async Task <string> GetHtmlAsync(string key, int retry)
        {
            var taskData   = _tasks[key];
            var webHandler = WebHandler.GetImplement(_logPrefix, taskData.ParserEnging, _proxy);

            Task task = Task.Run(() => webHandler.LoadHtml(taskData.Url, taskData.HttpTimeout), _cancellationToken);

            Logger.Info($"URL[{taskData.Url}] Parser Start. Retry[{taskData.Retry - retry}] use VPS[{webHandler.ProxyName}]");
            if (await Task.WhenAny(task, Task.Delay(TimeSpan.FromSeconds(taskData.HttpTimeout - 2), _cancellationToken)) != task)
            {
                Logger.Info($"URL[{taskData.Url}] Parser Failure, because timeout. Retry[{taskData.Retry - retry}] use VPS[{webHandler.ProxyName}]");
                webHandler.Cancel();
            }

            var content = webHandler.Content;

            var parserResult = !string.IsNullOrEmpty(content);

            if (parserResult && taskData.Filter != null)
            {
                parserResult = taskData.Filter(HtmlHandler.GetImplement(_logPrefix, content), content);
            }

            if (webHandler.IsLoadSuccess == LoadStatusEnum.Success && parserResult)
            {
                Logger.Info($"URL[{taskData.Url}] Parser Success. Retry[{taskData.Retry - retry}] use VPS[{webHandler.ProxyName}]");
                return(content);
            }
            else
            {
                if (webHandler.IsLoadSuccess == LoadStatusEnum.Success && !parserResult)
                {
                    Logger.Info($"URL[{taskData.Url}] Parser Success. but content filter failure. Retry[{taskData.Retry - retry}] use VPS[{webHandler.ProxyName}]");
                }
                if (retry > 0)
                {
                    await Task.Delay(1500);

                    return(await GetHtmlAsync(key, (retry - 1)));
                }
                else
                {
                    Logger.Info($"URL[{taskData.Url}] Parser Failure. use VPS[{webHandler.ProxyName}]");
                    return("");
                }
            }
        }
Esempio n. 3
0
        public IDictionary <int, string> Gets(Func <XPathHandler, XPathHandler> xpath, Func <string, string> strHandler)
        {
            XPathHandler xpathHandler = new XPathHandler();
            var          node         = this._currentNode;
            var          nodes        = node.ChildNodes;

            if (xpath != null)
            {
                nodes = node.SelectNodes(xpath(xpathHandler).GetPath());
            }
            if (nodes != null && nodes.Count > 0)
            {
                return((from a in nodes
                        select this.StrHandle(strHandler, HtmlHandler.GetImplement(_logPrefix, a).InnerHtml))
                       .Select((value, index) => new { value, index })
                       .ToDictionary(a => a.index, a => a.value));
            }
            return(null);
        }
Esempio n. 4
0
        public T Get <T>(Func <XPathHandler, XPathHandler> xpath, Func <string, string> strHandler)
        {
            XPathHandler xpathHandler = new XPathHandler();

            var node = this._currentNode;

            if (xpath != null)
            {
                node = node.SelectSingleNode(xpath(xpathHandler).GetPath());
            }
            if (node != null)
            {
                var result = this.StrHandle(strHandler, HtmlHandler.GetImplement(_logPrefix, node).InnerHtml).Trim();
                if (!string.IsNullOrEmpty(result))
                {
                    return((T)Convert.ChangeType(result, typeof(T)));;
                }
            }
            return(default(T));
        }