#pragma warning disable 1998 protected override async Task <BaseEvt> ParseHtml(string sn, string html) #pragma warning restore 1998 { /* * <tr class="table-plate3" data-url="//www.cyzone.cn/event/489229.html"> * <td class="tp1"> * <a href="//www.cyzone.cn/company/292562.html"><img src="//oss.cyzone.cn/2015/0601/20150601023926587.png?x-oss-process=image/resize,w_140,h_140,limit_0"></a> * </td> * <td class="tp2"> * <span class="tp2_tit"><a href="//www.cyzone.cn/company/292562.html" target="_blank">云迹科技</a></span><br> * <span class="tp2_com">北京云迹科技有限公司</span> * </td> * <td class="tp-mean"> * <div class="money">未公开</div> * </td> * <td>战略投资</td> * <td class="tp3" title="292562"> * <a href="//www.cyzone.cn/capital/201245.html" target="_blank">携程</a><br></td> * <td><a href="//www.cyzone.cn/event/list-3551-1-0-0-0-0/">人工智能</a><br> * </td> * <td>2019-01-03</td> * <td><a href="//www.cyzone.cn/event/489229.html" class="show-detail" target="_blank" rel="nofollow">详情</a></td> * </tr> */ var evt = new InvestEvt(); evt.Sn = sn; evt.Url = $"{domain}/event/{sn}.html"; // 第一个是图片 var match = _regexTd.Match(html); match = match.NextMatch(); var titleAndEnt = TrimVal(match).Split(new char[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries); evt.Title = titleAndEnt[0]; evt.RecieveEnt = titleAndEnt[titleAndEnt.Length - 1]; match = match.NextMatch(); evt.Money = TrimVal(match); match = match.NextMatch(); evt.Rounds = TrimVal(match); match = match.NextMatch(); evt.PayEnt = TrimVal(match); match = match.NextMatch(); evt.Industry = TrimVal(match); match = match.NextMatch(); evt.Date = TrimVal(match); return(evt); }
internal bool RealParse(InvestEvt evt, string html) { var idx = html.IndexOf("投资事件详情", StringComparison.Ordinal); if (idx < 0) { Util.Error($"no data: {evt.Url}: {html}"); return(false); } var match = _regexMain.Match(html, idx); if (!match.Success) { Util.Error($"no match data: {evt.Url}: {html}"); return(false); } var dataStr = match.Value; var matTitle = _regexTitle.Match(dataStr); if (!matTitle.Success) { Util.Error($"no match title: {evt.Url}: {dataStr}"); return(false); } evt.Title = TrimVal(matTitle, 1); var matData = _regexData.Match(dataStr); if (!matData.Success) { Util.Error($"no match dataStr: {evt.Url}: {dataStr}"); return(false); } evt.RecieveEnt = TrimVal(matData, 1); matData = matData.NextMatch(); evt.PayEnt = TrimVal(matData, 1); matData = matData.NextMatch(); evt.Money = TrimVal(matData, 1); matData = matData.NextMatch(); evt.Rounds = TrimVal(matData, 1); matData = matData.NextMatch(); evt.Date = TrimVal(matData, 1).Replace("年", "-").Replace("月", "-").Replace("日", ""); matData = matData.NextMatch(); evt.Industry = TrimVal(matData, 1); return(true); }
protected override async Task <BaseEvt> ParseHtml(string sn, string html) { var evt = new InvestEvt(); evt.Sn = sn; evt.Url = $"{domain}/inv/show{sn}/"; if (await ParseHtml(evt)) { return(evt); } return(null); }
/// <summary> /// 拉取单个上市详细数据 /// </summary> /// <param name="evt"></param> /// <returns></returns> async Task <bool> ParseHtml(InvestEvt evt) { if (string.IsNullOrEmpty(evt.Url)) { throw new ArgumentException("未赋值Url,无法执行"); } string html; try { html = await Util.GetPage(evt.Url); } catch (Exception exp) { Util.Error($"error: {evt.Url}: {exp}"); return(false); } return(RealParse(evt, html)); }
/// <summary> /// 拉取单个上市详细数据 /// </summary> /// <param name="evt"></param> /// <returns></returns> async Task <bool> ParseHtml(InvestEvt evt) { if (string.IsNullOrEmpty(evt.Url)) { throw new ArgumentException("未赋值Url,无法执行"); } string html; try { html = await Util.GetPage(evt.Url); } catch (Exception exp) { Util.Error($"error: {evt.Url}: {exp}"); return(false); } var idx = html.IndexOf("投资事件详情", StringComparison.Ordinal); if (idx < 0) { Util.Error($"no data: {evt.Url}: {html}"); return(false); } var match = _regexMain.Match(html, idx); if (!match.Success) { Util.Error($"no match data: {evt.Url}: {html}"); return(false); } var dataStr = match.Value; var matTitle = _regexTitle.Match(dataStr); if (!matTitle.Success) { Util.Error($"no match title: {evt.Url}: {dataStr}"); return(false); } evt.Title = TrimVal(matTitle, 1); var matData = _regexData.Match(dataStr); if (!matData.Success) { Util.Error($"no match dataStr: {evt.Url}: {dataStr}"); return(false); } evt.RecieveEnt = TrimVal(matData, 1); matData = matData.NextMatch(); evt.PayEnt = TrimVal(matData, 1); matData = matData.NextMatch(); evt.Money = TrimVal(matData, 1); matData = matData.NextMatch(); evt.Rounds = TrimVal(matData, 1); matData = matData.NextMatch(); evt.Date = TrimVal(matData, 1); matData = matData.NextMatch(); evt.Industry = TrimVal(matData, 1); return(true); }