Пример #1
0
        /// <summary>
        /// 开始爬取高手操作数据
        /// </summary>
        /// <param name="pageNum"></param>
        /// <param name="loopTime"></param>
        public static Message Excute(int pageNum = 1, int loopTime = 0)
        {
            System.GC.Collect();
            LogUtility.LogAction(string.Format(@"爬取第{0}页", pageNum));

            isStopPoint = false;
            string _refererUrl = "http://group.eastmoney.com/Master.html?type=GSCZ";
            string url         = "http://group.eastmoney.com/findmaster{0}.html";

            IHttpProvider httpProvider = new HttpProvider();

            url = string.Format(url, pageNum);

            //Get请求方式
            HttpResponseParameter responseParameter1 = httpProvider.Excute(new HttpRequestParameter
            {
                Url        = url,
                RefererUrl = _refererUrl,
                IsPost     = false,
                Encoding   = Encoding.UTF8
            });
            string htmlContent = responseParameter1.Body;

            HtmlDocument htmlDoc = new HtmlDocument();

            htmlDoc.LoadHtml(htmlContent);
            string   nodeName  = "//tbody";
            HtmlNode tbodyNode = htmlDoc.DocumentNode.SelectSingleNode(nodeName);

            if (tbodyNode != null && !string.IsNullOrWhiteSpace(tbodyNode.InnerText.Trim()))
            {
                bool isSave = AnalysisHtml(htmlDoc, pageNum);

                #region 入库失败,重试

                if (!isSave && loopTime <= 3)
                {
                    Thread.Sleep(5000);
                    LogUtility.LogAction(string.Format(@"入库失败,重新爬取第{0}次", loopTime + 1));
                    Excute(pageNum, loopTime + 1);
                }
                if (loopTime > 3)
                {
                    LogUtility.LogAction(string.Format(@"已重新爬取{0}次,退出重试", loopTime + 1));
                }
                #endregion

                if (!isStopPoint)
                {
                    Excute(pageNum + 1);
                }
            }
            else
            {
                LogUtility.LogAction("爬取到最后一页,从头开始爬取");
                //加载停止点
                EastMoneyMain.InitInfo();
                //从第一页开始爬取
                Excute();
            }
            return(new Message());
        }
Пример #2
0
        /// <summary>
        /// 开始解析html,并入库
        /// </summary>
        /// <param name="htmlContent"></param>
        /// <returns></returns>
        private static bool AnalysisHtml(HtmlDocument htmlDoc, int pageNum)
        {
            List <EM_MasterOperation> masterOperation = new List <EM_MasterOperation>();

            #region 解析

            //获取所有节点为<tbody>的节点
            string   nodeName  = "//tbody";
            HtmlNode tbodyNode = htmlDoc.DocumentNode.SelectSingleNode(nodeName);
            if (tbodyNode != null)
            {
                var trNodes = tbodyNode.SelectNodes("tr");
                foreach (HtmlNode nodes in trNodes)
                {
                    var nodeItems = nodes.SelectNodes("td");
                    if (nodeItems != null)
                    {
                        var master = new EM_MasterOperation()
                        {
                            OpreDate         = Convert.ToDateTime(DateTime.Now.Year + "-" + nodeItems[0].InnerText),
                            GroupName        = nodeItems[1].InnerText,
                            GroupNameUrl     = "http://group.eastmoney.com/" + nodeItems[1].FirstChild.Attributes["href"].Value,
                            OpreType         = nodeItems[2].InnerText,
                            ZhengQuanName    = nodeItems[3].InnerText,
                            ZhengQuanNameUrl = nodeItems[3].FirstChild.Attributes["href"].Value,
                            MasterType       = nodeItems[4].InnerText,
                            MasterRate       = nodeItems[5].InnerText,
                            RiRate           = nodeItems[6].InnerText,
                            DealWinCnt       = nodeItems[7].InnerText,
                            WinCntRate       = nodeItems[8].InnerText,
                            ManageName       = nodeItems[9].InnerText,
                            ManageNameUrl    = "http://group.eastmoney.com/" + nodeItems[9].FirstChild.Attributes["href"].Value
                        };

                        #region 判断停止点


                        if (StopPoint.MasterOperationSP != null && ((master.OpreDate == StopPoint.MasterOperationSP.OpreDate && master.GroupName == StopPoint.MasterOperationSP.GroupName &&
                                                                     master.ZhengQuanName == StopPoint.MasterOperationSP.ZhengQuanName) || master.OpreDate < StopPoint.MasterOperationSP.OpreDate))
                        {
                            isStopPoint = true;
                            break;
                        }

                        #endregion

                        masterOperation.Add(master);
                    }
                }
            }
            #endregion

            //保存入库
            bool isSave = eastMoneyImp.SaveFindMast(masterOperation);
            if (isSave && isStopPoint && pageNum == 1 && masterOperation.Count > 0)
            {
                StopPoint.MasterOperationSP = masterOperation.First();
            }
            else if (isSave && isStopPoint && masterOperation.Count > 0)
            {
                EastMoneyMain.InitInfo();
            }

            return(isSave);
        }