示例#1
0
#pragma warning disable 1998
        protected override async Task <BaseEvt> ParseHtml(string sn, string html)
#pragma warning restore 1998
        {
            /*
             * <tr class="table-plate3" data-url="//www.cyzone.cn/event/489229.html">
             * <td class="tp1">
             * <a href="//www.cyzone.cn/company/292562.html"><img src="//oss.cyzone.cn/2015/0601/20150601023926587.png?x-oss-process=image/resize,w_140,h_140,limit_0"></a>
             * </td>
             * <td class="tp2">
             * <span class="tp2_tit"><a href="//www.cyzone.cn/company/292562.html" target="_blank">云迹科技</a></span><br>
             * <span class="tp2_com">北京云迹科技有限公司</span>
             * </td>
             * <td class="tp-mean">
             * <div class="money">未公开</div>
             * </td>
             * <td>战略投资</td>
             * <td class="tp3" title="292562">
             * <a href="//www.cyzone.cn/capital/201245.html" target="_blank">携程</a><br></td>
             * <td><a href="//www.cyzone.cn/event/list-3551-1-0-0-0-0/">人工智能</a><br>
             * </td>
             * <td>2019-01-03</td>
             * <td><a href="//www.cyzone.cn/event/489229.html" class="show-detail" target="_blank" rel="nofollow">详情</a></td>
             * </tr>
             */
            var evt = new InvestEvt();

            evt.Sn  = sn;
            evt.Url = $"{domain}/event/{sn}.html";

            // 第一个是图片
            var match = _regexTd.Match(html);

            match = match.NextMatch();
            var titleAndEnt = TrimVal(match).Split(new char[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries);

            evt.Title      = titleAndEnt[0];
            evt.RecieveEnt = titleAndEnt[titleAndEnt.Length - 1];

            match     = match.NextMatch();
            evt.Money = TrimVal(match);

            match      = match.NextMatch();
            evt.Rounds = TrimVal(match);

            match      = match.NextMatch();
            evt.PayEnt = TrimVal(match);

            match        = match.NextMatch();
            evt.Industry = TrimVal(match);

            match    = match.NextMatch();
            evt.Date = TrimVal(match);
            return(evt);
        }
        internal bool RealParse(InvestEvt evt, string html)
        {
            var idx = html.IndexOf("投资事件详情", StringComparison.Ordinal);

            if (idx < 0)
            {
                Util.Error($"no data: {evt.Url}: {html}");
                return(false);
            }

            var match = _regexMain.Match(html, idx);

            if (!match.Success)
            {
                Util.Error($"no match data: {evt.Url}: {html}");
                return(false);
            }
            var dataStr  = match.Value;
            var matTitle = _regexTitle.Match(dataStr);

            if (!matTitle.Success)
            {
                Util.Error($"no match title: {evt.Url}: {dataStr}");
                return(false);
            }
            evt.Title = TrimVal(matTitle, 1);

            var matData = _regexData.Match(dataStr);

            if (!matData.Success)
            {
                Util.Error($"no match dataStr: {evt.Url}: {dataStr}");
                return(false);
            }
            evt.RecieveEnt = TrimVal(matData, 1);

            matData    = matData.NextMatch();
            evt.PayEnt = TrimVal(matData, 1);

            matData   = matData.NextMatch();
            evt.Money = TrimVal(matData, 1);

            matData    = matData.NextMatch();
            evt.Rounds = TrimVal(matData, 1);

            matData  = matData.NextMatch();
            evt.Date = TrimVal(matData, 1).Replace("年", "-").Replace("月", "-").Replace("日", "");

            matData      = matData.NextMatch();
            evt.Industry = TrimVal(matData, 1);
            return(true);
        }
        protected override async Task <BaseEvt> ParseHtml(string sn, string html)
        {
            var evt = new InvestEvt();

            evt.Sn  = sn;
            evt.Url = $"{domain}/inv/show{sn}/";

            if (await ParseHtml(evt))
            {
                return(evt);
            }
            return(null);
        }
        /// <summary>
        /// 拉取单个上市详细数据
        /// </summary>
        /// <param name="evt"></param>
        /// <returns></returns>
        async Task <bool> ParseHtml(InvestEvt evt)
        {
            if (string.IsNullOrEmpty(evt.Url))
            {
                throw new ArgumentException("未赋值Url,无法执行");
            }
            string html;

            try
            {
                html = await Util.GetPage(evt.Url);
            }
            catch (Exception exp)
            {
                Util.Error($"error: {evt.Url}: {exp}");
                return(false);
            }

            return(RealParse(evt, html));
        }
示例#5
0
        /// <summary>
        /// 拉取单个上市详细数据
        /// </summary>
        /// <param name="evt"></param>
        /// <returns></returns>
        async Task <bool> ParseHtml(InvestEvt evt)
        {
            if (string.IsNullOrEmpty(evt.Url))
            {
                throw new ArgumentException("未赋值Url,无法执行");
            }
            string html;

            try
            {
                html = await Util.GetPage(evt.Url);
            }
            catch (Exception exp)
            {
                Util.Error($"error: {evt.Url}: {exp}");
                return(false);
            }
            var idx = html.IndexOf("投资事件详情", StringComparison.Ordinal);

            if (idx < 0)
            {
                Util.Error($"no data: {evt.Url}: {html}");
                return(false);
            }

            var match = _regexMain.Match(html, idx);

            if (!match.Success)
            {
                Util.Error($"no match data: {evt.Url}: {html}");
                return(false);
            }
            var dataStr  = match.Value;
            var matTitle = _regexTitle.Match(dataStr);

            if (!matTitle.Success)
            {
                Util.Error($"no match title: {evt.Url}: {dataStr}");
                return(false);
            }
            evt.Title = TrimVal(matTitle, 1);

            var matData = _regexData.Match(dataStr);

            if (!matData.Success)
            {
                Util.Error($"no match dataStr: {evt.Url}: {dataStr}");
                return(false);
            }
            evt.RecieveEnt = TrimVal(matData, 1);

            matData    = matData.NextMatch();
            evt.PayEnt = TrimVal(matData, 1);

            matData   = matData.NextMatch();
            evt.Money = TrimVal(matData, 1);

            matData    = matData.NextMatch();
            evt.Rounds = TrimVal(matData, 1);

            matData  = matData.NextMatch();
            evt.Date = TrimVal(matData, 1);

            matData      = matData.NextMatch();
            evt.Industry = TrimVal(matData, 1);
            return(true);
        }