Beispiel #1
0
        /// <summary>
        /// 启动去采集单本书
        /// </summary>
        /// <param name="url"></param>
        private static void _startCollectionTaskSort(object o)
        {
            KeyValuePair <string, string> pair = (KeyValuePair <string, string>)o;
            string sortUrl  = pair.Key;
            string sortName = pair.Value;

            //读取分类下的数据,提取分类里的url信息
            URLLoader UrlLoader = new URLLoader();
            var       result    = UrlLoader.RequestByGBK(sortUrl);

            HtmlDocument document = new HtmlDocument();

            document.LoadHtml(result);
            var documentNode = document.DocumentNode;


            HtmlNodeCollection linkNodes = documentNode.SelectNodes("//div[@class='title-info']/h2");

            List <string> list = new List <string>();

            foreach (var liNode in linkNodes)
            {
                var title = liNode.SelectSingleNode("a")?.InnerText;
                var url   = liNode.SelectSingleNode("a")?.GetAttributeValue("href", "");
                list.Add(url);
            }


            foreach (var item in list)
            {
                WorkerController controller = new WorkerController(sortName, item);
                controller.Execute(interval);
            }
        }
Beispiel #2
0
        /************************************/

        public virtual void Load(bool isUTF8)
        {
            Regex reg = new Regex(@"POST:\[(.+?)\]");
            Match m   = reg.Match(InternalUrl);

            if (m.Success)
            {
                SetPostBody(reg.Replace(m.Value, "$1"));
            }

            InternalRealUrl = getRealUrl(InternalUrl);

            if (string.IsNullOrEmpty(InternalRealUrl))
            {
                throw new Exception("没有初始化Url设置");
            }

            if (UrlLoader == null)
            {
                UrlLoader = new URLLoader();
            }

            string result = string.Empty;

            if (isUTF8)
            {
                result = UrlLoader.RequestByUTF8(InternalRealUrl);
            }
            else
            {
                result = UrlLoader.RequestByGBK(InternalRealUrl);
            }


            if (!string.IsNullOrEmpty(result))
            {
                parseHtmlString(result);
            }
        }
Beispiel #3
0
        /************************************/

        public virtual void Load(bool isUTF8, int bookId = 0)
        {
            Regex reg = new Regex(@"POST:\[(.+?)\]");
            Match m   = reg.Match(InternalUrl);

            if (m.Success)
            {
                SetPostBody(reg.Replace(m.Value, "$1"));
            }

            InternalRealUrl = getRealUrl(InternalUrl);

            if (string.IsNullOrEmpty(InternalRealUrl))
            {
                throw new Exception("没有初始化Url设置");
            }

            if (UrlLoader == null)
            {
                UrlLoader = new URLLoader();
            }

            string result = string.Empty;

            Log.ShowLine("请求:" + InternalRealUrl, ConsoleColor.DarkGray);

            if (isUTF8)
            {
                result = UrlLoader.RequestByUTF8(InternalRealUrl);
            }
            else
            {
                result = UrlLoader.RequestByGBK(InternalRealUrl);
            }

            if (!string.IsNullOrEmpty(result))
            {
                if (PageType == PageTypeEnum.NONE)
                {
                    PageType = PageFeature.MatchHtml(result);
                }

                if (PluginGeneral.DEBUG_MODE)
                {
                    parseHtmlString(result, bookId);
                }
                else
                {
                    try
                    {
                        parseHtmlString(result, bookId);
                    }
                    catch (Exception exp)
                    {
                        FinalData = null;
                        Log.ShowLine(exp, ConsoleColor.Red);
                        _logger.FatalFormat("访问网页:{0}。出错{1}。返回内容为空", InternalRealUrl, exp.Message);
                    }
                }
            }
            else
            {
                FinalData = null;
                Log.ShowLine(InternalRealUrl + " 错误~!", ConsoleColor.Red);
                _logger.FatalFormat("访问网页:{0}。出错。返回内容为空", InternalRealUrl);
            }
        }