private string HtmlText(string sourceHtml) { hParser.Parser parser = hParser.Parser.CreateParser(sourceHtml.Replace(System.Environment.NewLine, ""), "utf-8"); StringBuilder builderHead = new StringBuilder(); StringBuilder builderBody = new StringBuilder(); hParser.NodeFilter html = new TagNameFilter("TR"); hParser.INode nodes = parser.Parse(html)[0]; builderHead.Append(nodes.Children[0].ToHtml()); hParser.INode body = nodes.Children[1]; hParser.INode div = body.Children[0]; for (int i = 0; i < div.Children.Count; i++) { if (div.Children[i] is hParser.ITag) { builderBody.Append(div.Children[i].ToHtml()); } } StringBuilder builder = new StringBuilder(); builder.Append("<html>"); builder.Append(builderHead.ToString()); builder.Append("<body>"); builder.Append(string.Format("<{0}>", div.GetText())); builder.Append(builderBody.ToString()); builder.Append("</div>"); builder.Append("</body>"); builder.Append("</html>"); return(builder.ToString()); }
static void GetBlogLink(string htmlContent) { Lexer lexer = new Lexer(htmlContent); Parser parser = new Parser(lexer); NodeList articleList = parser.Parse(articleFilter); if (articleList.Count == 1) { NodeList candidateNodeList = articleList[0].Children.ExtractAllNodesThatMatch(wrapFilter, true); for (int i = 0; i < candidateNodeList.Count; i++) { NodeList linkNodeList = candidateNodeList[i].Children.ExtractAllNodesThatMatch(new NodeClassFilter(typeof(ATag)), false); if (linkNodeList.Count == 1) { string blogLink = ((ATag)linkNodeList[0]).ExtractLink(); blogLinkList.Add(blogLink); } else { Console.WriteLine("第" + i + "个条目中,判断链接出错!"); } } } else { Console.WriteLine("获取包含日志列表出错!"); } }
/// <summary> /// 获取目标数据 /// </summary> /// <param name="parser">目标html文件</param> /// <param name="tag">标签名称</param> /// <param name="attribute">标签里面的属性名称</param> /// <param name="attValue">属性的值</param> /// <returns>标签内的目标数据</returns> public static string getValue(string html, string tag, string attribute, string attValue) { Lexer lexer = new Lexer(html); Parser parser = new Parser(lexer); string value = string.Empty; NodeFilter nodeFilter = new TagNameFilter(tag); NodeList nodeList = parser.Parse(nodeFilter); for (int i = 0; i < nodeList.Count; i++) { INode node = nodeList[i]; ITag tagNode = (node as ITag); if (tagNode.Attributes != null && tagNode.Attributes.Count > 0) { foreach (string key in tagNode.Attributes.Keys) { if (key.Contains("<TAGNAME>")) { continue; } if (key.Contains(attribute)) { if (tagNode.Attributes[key].ToString() == attValue) { value = tagNode.ToPlainTextString(); return value; } } } } } return null; }
/// <summary> /// 增加一条数据 /// </summary> public string Add(string scheduleID, string companyids, string historyids,DateTime time) { WebClientBLL bll = new WebClientBLL(); string[] companyidArr = companyids.Split(','); string[] historyidArr = historyids.Split(','); int count = 0; if (companyidArr.Length == historyidArr.Length) { dal.Delete(scheduleID); for (int i = 0; i < companyidArr.Length; i++) { string s = bll.GetOddsHistoryContent(historyidArr[i]); Lexer lexer = new Lexer(s); Parser parser = new Parser(lexer); NodeList bodyNodes = parser.Parse(new TagNameFilter("HTML"))[0].Children.ExtractAllNodesThatMatch(new TagNameFilter("BODY"))[0].Children; ITag table = bodyNodes.SearchFor(typeof(Winista.Text.HtmlParser.Tags.TableTag))[0] as ITag; NodeList tableRows = table.Children.SearchFor(typeof(Winista.Text.HtmlParser.Tags.TableRow)); for (int f = 0; f < tableRows.Count; f++) { ITag row = tableRows[f] as ITag; if (row.Attributes["ALIGN"].Equals("center") && row.Attributes["BGCOLOR"].Equals("#FFFFFF")){ Odds1x2History model = new Odds1x2History(); model.companyid = int.Parse(companyidArr[i]); model.scheduleid = int.Parse(scheduleID); model.home = float.Parse(row.Children[0].ToPlainTextString()); model.draw = float.Parse(row.Children[1].ToPlainTextString()); model.away = float.Parse(row.Children[2].ToPlainTextString()); this.FillOdds1x2History(model); string[] t2 = row.Children[3].ToPlainTextString().Replace("showtime(", "").Replace(")", "").Split(','); int yy = int.Parse(t2[0]); int mm = int.Parse(t2[1].Remove(2)); int dd = int.Parse(t2[2]); int hh = int.Parse(t2[3]); int mi = int.Parse(t2[4]); int ss = int.Parse(t2[5]); model.time = new DateTime(yy, mm, dd, hh, mi, ss, DateTimeKind.Utc).AddHours(8d); if (model.time > time) { continue; } dal.Add(model); count++; } } } } JSONHelper json = new JSONHelper(); json.success = true; json.totlalCount = count; return json.ToString(); }
static void GetStoryOfRevolution() { StreamReader reader = new StreamReader("catalogue.htm"); Lexer lexer = new Lexer(reader.ReadToEnd()); Parser parser = new Parser(lexer); HasAttributeFilter linkFilterByParent = new HasAttributeFilter("class", "row zhangjieUl"); HasAttributeFilter linkFilterByClass = new HasAttributeFilter("class", "fontStyle2 colorStyleLink"); AndFilter linkFilter = new AndFilter(new HasParentFilter(linkFilterByParent, true), linkFilterByClass); NodeList linkNodeList = parser.Parse(linkFilter); List<string> linkUrlList = new List<string>(linkNodeList.Size()); List<string> chapterHtmlContentList = new List<string>(linkNodeList.Size()); HttpWebRequest httpWebRequest; StreamReader chapterReader = null; for (int i = 0; i < linkNodeList.Size(); i++) { ATag linkNode = (ATag)linkNodeList[i]; linkUrlList.Add(linkNode.Link); httpWebRequest = HttpWebRequest.CreateHttp("http://www.mlxiaoshuo.com" + linkUrlList[linkUrlList.Count - 1]); chapterReader = new StreamReader(new BufferedStream(httpWebRequest.GetResponse().GetResponseStream(), 4 * 200 * 1024)); string chapterHtmlContent = chapterReader.ReadToEnd(); chapterHtmlContentList.Add(chapterHtmlContent); Console.WriteLine("第" + (i + 1) + "个页面获取完毕!"); } chapterReader.Close(); HasAttributeFilter praghFilter = new HasAttributeFilter("class", "textP fontStyle2 colorStyleText"); StreamWriter writer = new StreamWriter("革命逸事.txt"); for (int i = 0; i < chapterHtmlContentList.Count; i++) { writer.WriteLine("第" + (i + 1) + "章"); lexer = new Lexer(chapterHtmlContentList[i]); parser = new Parser(lexer); NodeList praghNodeList = parser.Parse(praghFilter); if (praghNodeList.Size() == 1) { for (int j = 0; j < praghNodeList[0].Children.Size(); j++) { if (praghNodeList[0].Children[j].GetType().Equals(typeof(ParagraphTag))) { ParagraphTag praghTag = (ParagraphTag)praghNodeList[0].Children[j]; writer.WriteLine(" " + praghTag.StringText); } } writer.WriteLine(); } else { Console.WriteLine("第" + (i + 1) + "页中,判断段落的标准出错!"); } } writer.Close(); }
static string GetBlogTitle(string htmlContent) { string result = ""; Lexer lexer = new Lexer(htmlContent); Parser parser = new Parser(lexer); NodeList titleList = parser.Parse(titleFilter); if (titleList.Count == 1) { TitleTag titleTag = (TitleTag)titleList[0]; result = titleTag.Title; } else { Console.WriteLine("获取标题信息出错!"); } return result; }
/// <summary> /// 获得列表 /// </summary> /// <returns></returns> public List<OddsLiveMatch> GetMatchScrollOdds(string matchid,string urlparams) { List<OddsLiveMatch> liveMatchList = new List<OddsLiveMatch>(); try { HttpHelper h = new HttpHelper(); Cookie lng = new Cookie("lng", "2"); lng.Domain = domain; h.CookieContainer.Add(lng); //string zoudi = h.GetHtml("https://" + domain + "/default.aspx" + urlparams); string zoudi = h.GetHtml(urlparams); if (!string.IsNullOrEmpty(zoudi)) { #region 分析网页html节点 Lexer lexer = new Lexer(zoudi); Parser parser = new Parser(lexer); NodeList bodyNodes = parser.Parse(new TagNameFilter("HTML"))[0].Children.ExtractAllNodesThatMatch(new TagNameFilter("BODY"))[0].Children; ITag divNode = bodyNodes.ExtractAllNodesThatMatch(new TagNameFilter("FORM"))[0].Children.ExtractAllNodesThatMatch(new TagNameFilter("DIV"))[0] as ITag; if (divNode.Attributes["ID"].Equals("PageBody")) { NodeList dataDivList = divNode.Children.SearchFor(typeof(Winista.Text.HtmlParser.Tags.Div)); if (dataDivList[0].ToPlainTextString() == "走地盤") { if (dataDivList[2].ToPlainTextString() == "全場賽果") { OddsLiveHistory liveHistory = new OddsLiveHistory(); liveHistory.matchid = matchid; liveHistory.home = float.Parse(dataDivList[3].ToPlainTextString().Split(' ')[0]); liveHistory.draw = float.Parse(dataDivList[5].ToPlainTextString().Split(' ')[0]); liveHistory.away = float.Parse(dataDivList[7].ToPlainTextString().Split(' ')[0]); liveHistory.time = DateTime.Now; dal.AddHistory(liveHistory); } } } #endregion 分析网页html节点 } } catch (Exception) { } return liveMatchList; }
public void StartCrawl()// private void BtnDownload_Click(object sender, RoutedEventArgs e) { this.parseResult = ""; Uri uri = new Uri(this.TargetUri); #region //<N>基于Httphelper,这样下载会要求程序自己实现验证授权 //HttpHelper httpHelper = new HttpHelper(); //HttpItem rq = new HttpItem(); //rq.URL = uri.AbsoluteUri; //HttpResult html = httpHelper.GetHtml(rq); //Debug.WriteLine(html.Html); //直接基于WebBrowser,授权是由用户手动实现的 mshtml.IHTMLDocument2 doc2 = null;//(mshtml.IHTMLDocument2)webBox.Document; string html = string.Compare(this.IsOffline, "1", StringComparison.InvariantCultureIgnoreCase) == 0 ? s_htmlFake : doc2.body.innerHTML; Debug.WriteLine(html); #endregion #region 使用HtmlParser提取HTML Lexer lexer = new Lexer(html); hParser.Parser parser = new hParser.Parser(lexer); hParser.NodeFilter filter = new NodeClassFilter(typeof(Winista.Text.HtmlParser.Tags.TableRow)); NodeList nodeList = parser.Parse(filter); if (nodeList.Count == 0) { MessageBox.Show("没有符合要求的节点"); } else { for (int i = 0; i < nodeList.Count; i++) { parserTR(nodeList[i]); } MessageBox.Show(parseResult); } /* parseResult = HtmlText(html); * MessageBox.Show(parseResult);*/ #endregion }
//分析HtmlContents中给定索引条目的内容,提取信息 private static void GetInfoFromHtml(int index) { //使用Winista.HtmlParser库解析HTML //建立HTML分析工具对象 Lexer lexer = new Lexer(HtmlContents[index]); Parser parser = new Parser(lexer); //按属性的过滤器:两个参数分别代表要过滤的属性和属性值 HasAttributeFilter nameFilter = new HasAttributeFilter("class", "lrg"); HasAttributeFilter priceFilter = new HasAttributeFilter("class", "bld lrg red"); //获得所有满足过滤条件的HTML节点 NodeList nameList = parser.Parse(nameFilter); for (int j = 0; j < nameList.Size(); j++) { //确定节点nameList[j]为Span类型的标签;HttpUtility.HtmlDecode方法把HTML编码转为文本编码,使中文正常显示 string name = HttpUtility.HtmlDecode(((Span)nameList[j]).StringText); //Parent表示该HTML节点的父节点 //NextSobling表示该HTML节点的下一个兄弟节点 //Children表示该HTML节点的所有孩子节点组成的集合 //ExtractAllNodesThatMatch表示获取所有满足给定过滤器条件的节点,两个参数分别代表过滤器和是否进入孩子节点中迭代查找 //注意:对Winista.HtmlParser来说,“空文本节点”也是一个节点(在IE的开发者工具中显示“空文本节点”,而Chrome则不显示);形似<del>内容</ del>在Children中会表达成三个节点 NodeList priceList = nameList[j].Parent.Parent.NextSibling.NextSibling.Children.ExtractAllNodesThatMatch(priceFilter, true); if (priceList.Size() == 1) { string priceStr = ((Span)priceList[0]).StringText; double price = Double.Parse(priceStr.Substring(2, priceStr.Length - 2)); TradeList.Add(new Commodity(name, price, "RMB")); } else { badRecordCount++; } } Console.WriteLine("第" + (index + 1) + "个页面处理完成!"); //保存当前页面到本地文件 //StreamWriter writer = new StreamWriter("searchresult"+i+".html"); //writer.Write(s); //writer.Close(); }
/// <summary> /// 从网页版微博中获取微博信息 /// </summary> /// <param name="fansList">保存爬得的粉丝数组</param> public void GetInfoFromHtml(List<Fan> fansList) { Lexer lexer = new Lexer(currentHtmlContent); Parser parser = new Parser(lexer); //获取包含每条微博的div标记列表 NodeList fansNodeList = parser.Parse(fanFilter); for (int i = 0; i < fansNodeList.Size(); i++) { Fan fan = new Fan(); //获取包含一个粉丝的<li>标记 Bullet fanBullet = (Bullet)fansNodeList[i]; #region 获取该粉丝头像 NodeList fanPortraitNodeList = fanBullet.Children.ExtractAllNodesThatMatch(portraitFilter, true); if (fanPortraitNodeList.Size() == 1) { Div fanPortraitDiv = (Div)fanPortraitNodeList[0]; NodeList imgNodeList = fanPortraitDiv.Children.ExtractAllNodesThatMatch(new NodeClassFilter(typeof(ImageTag)), true); if (imgNodeList.Size() == 1) { ImageTag imgNode = (ImageTag)imgNodeList[0]; if (imgNode.Attributes.ContainsKey("SRC") && imgNode.Attributes.ContainsKey("ALT")) { string imgUrl = imgNode.GetAttribute("SRC"); string imgName = imgNode.GetAttribute("ALT"); fan.Name = imgName; WebClient wc = new WebClient();//使用WebClient是因为下载用户头像不用登录cookie wc.DownloadFileAsync(new Uri(imgUrl), @"portrait\" + imgName + ".jpg"); wc.DownloadFileCompleted += wc_DownloadFileCompleted; } else { Console.WriteLine("第" + i + "个粉丝中,<img>标记缺少必要的属性!"); } } else { Console.WriteLine("第" + i + "个粉丝中,获取img标记出错!"); } } else { Console.WriteLine("第" + i + "个粉丝中,获取粉丝头像的标准出错!"); } #endregion #region 获取该粉丝的关注数/粉丝数/微博数 NodeList fanConnectNodeList = fanBullet.Children.ExtractAllNodesThatMatch(fanConnectFilter, true); if (fanConnectNodeList.Size() == 1) { NodeList ATagList = fanConnectNodeList[0].Children.ExtractAllNodesThatMatch(new NodeClassFilter(typeof(ATag)), true); if (ATagList.Size() == 3) { for (int j = 0; j < 3; j++) { ATag aTag = (ATag)ATagList[j]; switch (j) { case 0: if (aTag.Attributes.ContainsKey("HREF") && aTag.GetAttribute("HREF").Contains("follow")) { fan.FollowCount = Int32.Parse(aTag.StringText); } else { Console.WriteLine("第" + i + "个粉丝中,获取粉丝的关注数出错!"); } break; case 1: if (aTag.Attributes.ContainsKey("HREF") && aTag.GetAttribute("HREF").Contains("fans")) { fan.FansCount = Int32.Parse(aTag.StringText); } else { Console.WriteLine("第" + i + "个粉丝中,获取粉丝的粉丝数出错!"); } break; default: fan.FeedsCount = Int32.Parse(aTag.StringText); break; } } } else { Console.WriteLine("第" + i + "个粉丝中,获取粉丝关注数/粉丝数/微博数的数量出错!"); } } else { Console.WriteLine("第" + i + "个粉丝中,获取粉丝关注数/粉丝数/微博数的标准出错!"); } #endregion #region 获取该粉丝的简介信息 NodeList fanInfoNodeList = fanBullet.Children.ExtractAllNodesThatMatch(fanInfoFilter, true); if (fanInfoNodeList.Size() == 1) { //Console.WriteLine(fanInfoNodeList[0].Parent.ToHtml()); Div fanInfoDiv = (Div)fanInfoNodeList[0]; string intro = fanInfoDiv.StringText; if (intro.Substring(0, 2).Equals("简介")) { fan.Introduction = intro.Substring(3, intro.Length - 3).Replace("\n", " ").Replace("\t", " "); } } else { if (fanInfoNodeList.Size() == 0) { fan.Introduction = ""; } else { Console.WriteLine("第" + i + "个粉丝中,获取粉丝简介的标准出错!"); } } #endregion #region 获取该粉丝的UserID、地点和性别信息;校验该粉丝的用户名信息 NodeList fanLocationNodeList = fanBullet.Children.ExtractAllNodesThatMatch(fanNameFilter, true); if (fanLocationNodeList.Size() == 1) { //获取粉丝的UserID信息;校验该粉丝的用户名信息 NodeList aTagNodeList = fanLocationNodeList[0].Children.ExtractAllNodesThatMatch(new NodeClassFilter(typeof(ATag)), true); if (aTagNodeList.Size() >= 1) { ATag nameNode = (ATag)aTagNodeList[0]; if (nameNode.Attributes.ContainsKey("USERCARD") && nameNode.Attributes.ContainsKey("HREF")) { //获取粉丝的UserID信息 string uidStr = nameNode.GetAttribute("USERCARD"); if (uidStr.Substring(0, 3).Equals("id=")) { fan.UserID = uidStr.Substring(3, uidStr.Length - 3); } //获取粉丝的微博链接 string linkUrl = nameNode.GetAttribute("HREF"); fan.LinkURL = "http://www.weibo.com" + linkUrl; } else { Console.WriteLine("第" + i + "个粉丝中,包含用户id和链接的<a>标记中缺少必要的属性!"); } //校验该粉丝的用户名信息 if (!nameNode.StringText.Equals(fan.Name)) { Console.WriteLine("第" + i + "个粉丝中,用户名与用户头像文字描述不一致!"); } } //获取粉丝的性别和地点信息 NodeList locationNodeList = fanLocationNodeList[0].Children.ExtractAllNodesThatMatch(new HasAttributeFilter("class", "addr"), true); if (locationNodeList.Size() == 1) { string locationStr = ""; for (int j = 0; j < locationNodeList[0].Children.Size(); j++) { INode node = locationNodeList[0].Children[j]; if (node.GetType().Equals(typeof(TextNode))) { TextNode tNode = (TextNode)node; locationStr += tNode.ToPlainTextString(); } if (node.GetType().Equals(typeof(TagNode))) { TagNode tNode = (TagNode)node; if (tNode.Attributes.ContainsKey("CLASS")) { if (tNode.GetAttribute("CLASS").Contains("female"))//必须先female,因为female中也含有male,如果male在前,则所有用户均符合该条件了= = { fan.Gender = "female"; } else { if (tNode.GetAttribute("CLASS").Contains("male")) { fan.Gender = "male"; } else { fan.Gender = "unknown"; Console.WriteLine("第" + i + "个粉丝性别不明!"); } } } } } fan.Location = locationStr.Trim(); } else { Console.WriteLine("第" + i + "个粉丝中,获取粉丝地点的标准出错!"); } } else { Console.WriteLine("第" + i + "个粉丝中,获取该粉丝的UserID、地点和性别信息的标准出错!"); } #endregion #region 获取该粉丝关注用户的方式 NodeList followMethodNodeList = fanBullet.Children.ExtractAllNodesThatMatch(followMethodFilter, true); if (followMethodNodeList.Size() == 1) { NodeList methodNodeList = followMethodNodeList[0].Children.ExtractAllNodesThatMatch(new NodeClassFilter(typeof(ATag))); if (methodNodeList.Size() == 1) { ATag methodNode = (ATag)methodNodeList[0]; fan.FollowMethod = methodNode.StringText.Trim(); } else { Console.WriteLine("第" + i + "个粉丝中,获取该粉丝关注用户的方式的数量出错!"); } } else { Console.WriteLine("第" + i + "个粉丝中,获取该粉丝关注用户的方式的标准出错!"); } #endregion fansList.Add(fan); } }
public void GetInfoFromHtml(int currentPage) { Lexer lexer = new Lexer(currentHtml); Parser parser = new Parser(lexer); NodeList poiHeadList = parser.Parse(poiListFilter); if (poiHeadList.Count == 1) { NodeList poiNodeList = poiHeadList[0].Children.ExtractAllNodesThatMatch(poiFilter, false); int numCount = 0; for (int i = 0; i < poiNodeList.Count; i++) { POI poi = new POI(); DefinitionListBullet poiNode = (DefinitionListBullet)poiNodeList[i]; if (poiNode.TagName.Equals("DD")) { numCount++; poi.Page = currentPage; poi.Number = numCount; #region 获取口味、环境和服务评分,以及获取星级 NodeList tasteNodeList = poiNode.Children.ExtractAllNodesThatMatch(tasteFilter, true); NodeList environmentNodeList = poiNode.Children.ExtractAllNodesThatMatch(environmentFilter, true); NodeList serviceNodeList = poiNode.Children.ExtractAllNodesThatMatch(serviceFilter, true); if (tasteNodeList.Count == 1 && environmentNodeList.Count == 1 && serviceNodeList.Count == 1) { Span spanNode = (Span)tasteNodeList[0]; if (!spanNode.ToPlainTextString().Equals("-")) { poi.TasteRemark = Int32.Parse(spanNode.ToPlainTextString()); } spanNode = (Span)environmentNodeList[0]; if (!spanNode.ToPlainTextString().Equals("-")) { poi.EnvironmentRemark = Int32.Parse(spanNode.ToPlainTextString()); } spanNode = (Span)serviceNodeList[0]; if (!spanNode.ToPlainTextString().Equals("-")) { poi.ServiceRemark = Int32.Parse(spanNode.ToPlainTextString()); } #region 获取星级 INode rankNodeOfParent = spanNode.Parent.NextSibling.NextSibling; if (rankNodeOfParent.Children != null && rankNodeOfParent.Children.Count >= 1) { INode rankNodeCandidate = rankNodeOfParent.Children[0]; if (rankNodeCandidate.GetType().Equals(typeof(Span))) { Span rankNode = (Span)rankNodeCandidate; string rank = rankNode.GetAttribute("TITLE"); if (rank.Contains("五")) { poi.Rank = 5; } else { if (rank.Contains("四")) { poi.Rank = 4; } else { if (rank.Contains("三")) { poi.Rank = 3; } else { if (rank.Contains("二")) { poi.Rank = 2; } else { if (rank.Contains("一")) { poi.Rank = 1; } } } } } } } #endregion } else { Console.WriteLine("第" + i + "条POI中,判断口味、环境和服务的标准出错!"); } #endregion #region 获取平均消费 NodeList averageNodeList = poiNode.Children.ExtractAllNodesThatMatch(averageFilter, true); if (averageNodeList.Count == 1) { INode averageNode = averageNodeList[0]; if (averageNode.NextSibling.NextSibling.GetType().Equals(typeof(TextNode))) { string cost = ((TextNode)averageNode.NextSibling.NextSibling).ToPlainTextString(); poi.AverageCost = Int32.Parse(cost); } } else { Console.WriteLine("第" + i + "条POI中,判断平均消费的标准出错!"); } #endregion #region 获取点评数 NodeList commentNodeList = poiNode.Children.ExtractAllNodesThatMatch(commentFilter, true); if (commentNodeList.Count == 1) { INode commentNode = commentNodeList[0]; if (commentNode.GetType().Equals(typeof(ATag))) { string commentNum = ((ATag)commentNode).StringText; if (commentNum.Substring(commentNum.Length - 3, 3).Equals("封点评")) { commentNum = commentNum.Substring(0, commentNum.Length - 3); } poi.CommentCount = Int32.Parse(commentNum); } } else { Console.WriteLine("第" + i + "条POI中,判断点评数的标准出错!"); } #endregion #region 获取店名 NodeList nameNodeList = poiNode.Children.ExtractAllNodesThatMatch(nameFilter, true); if (nameNodeList.Count == 1) { INode nameNode = nameNodeList[0]; if (nameNode.GetType().Equals(typeof(ATag))) { poi.Name = ((ATag)nameNode).StringText; } } else { Console.WriteLine("第" + i + "条POI中,判断店名的标准出错!"); } #endregion #region 获取地址和电话 NodeList addressNodeList = poiNode.Children.ExtractAllNodesThatMatch(addressFilter, true); if (addressNodeList.Count == 1) { NodeList districtNodeList = addressNodeList[0].Children.ExtractAllNodesThatMatch(new NodeClassFilter(typeof(ATag))); if (districtNodeList.Count == 1) { ATag districtTag = (ATag)districtNodeList[0]; string address = districtTag.ToPlainTextString(); if (districtTag.NextSibling.GetType().Equals(typeof(TextNode))) { TextNode detailAddressNode = (TextNode)districtTag.NextSibling; string detailAddress = detailAddressNode.ToPlainTextString(); detailAddress = detailAddress.Trim(); string phoneStr = detailAddress.Substring(detailAddress.Length - 8, 8); poi.Phone = phoneStr; address += detailAddress.Substring(0, detailAddress.Length - 8); } char[] removeChrVector = { ' ', '\n', '\t' }; address = address.Trim(removeChrVector); foreach (char c in removeChrVector) { address = address.Replace(c.ToString(), ""); } poi.Address = address; } else { Console.WriteLine("第" + i + "条POI中,判断含地址的<a>标记的标准出错!"); } } else { Console.WriteLine("第" + i + "条POI中,判断地址的标准出错!"); } #endregion #region 获取标签 NodeList tagsNodeList = poiNode.Children.ExtractAllNodesThatMatch(tagsFilter, true); if (tagsNodeList.Count == 1) { INode tagsNode = tagsNodeList[0]; if (tagsNode.Children != null) { for (int j = 0; j < tagsNode.Children.Count; j++) { INode node = tagsNode.Children[j]; if (node.GetType().Equals(typeof(ATag))) { poi.Tags.Add(node.ToPlainTextString()); } } } } else { Console.WriteLine("第" + i + "条POI中,判断标签的标准出错!"); } #endregion poiList.Add(poi); } } } else { Console.WriteLine("获取POI列表出错"); } }
public void GetLinkForPage(string url) { Lexer lexer = new Lexer(GetHtml(url)); Parser parse = new Parser(lexer); parse.Encoding = "gb2312"; NodeFilter linkFilter = new LinkRegexFilter(@"^http\://item\.taobao\.com/item\.htm\?id\=\d+$"); NodeFilter classFilter = new HasAttributeFilter("class", "EventCanSelect"); AndFilter andFilter = new AndFilter(linkFilter, classFilter); NodeList result = parse.Parse(andFilter); int length = result.Count; for (int i = 0; i < length; i++) { ItemLink.Add(result[i]); } }
public List<ATag> ParseCatelog(string html) { List<ATag> atags = new List<ATag>(); Lexer lexer = new Lexer(html); Parser parser = new Parser(lexer); NodeFilter nav = new HasAttributeFilter("class", "fenlei_list"); NodeList navNodes = parser.Parse(nav); NodeFilter catelog = new LinkRegexFilter(@"^\.\./product/index\.php\?cplm\=\-\d\d\d\-$"); catelog = new HasChildFilter(catelog); NodeList catelogNodes = navNodes[0].Children.ExtractAllNodesThatMatch(catelog); if(catelogNodes==null){ return atags; } int length = catelogNodes.Count; for (int i=0;i<length;i++) { INode node = catelogNodes[i]; ATag a = node.Children[0] as ATag; atags.Add(a); } return atags; }
/// <summary> /// 辅助函数:从HTML中获得max_id /// </summary> /// <param name="htmlContent">HTML文本</param> /// <returns></returns> private bool GetMaxIdFromHtml(string htmlContent) { Lexer lexer = new Lexer(htmlContent); Parser parser = new Parser(lexer); NodeList feedNodeList = parser.Parse(idFilter[(int)Type]); if (feedNodeList.Size() >= 1) { max_id = ((TagNode)feedNodeList[feedNodeList.Size() - 1]).GetAttribute("MID"); return true; } else { return false; } }
/// <summary> /// 辅助函数:从HTML中获得end_id /// </summary> /// <param name="htmlContent">HTML文本</param> /// <returns></returns> private bool GetEndIdFromHtml(string htmlContent) { Lexer lexer = new Lexer(htmlContent); Parser parser = new Parser(lexer); NodeList feedNodeList = parser.Parse(idFilter[(int)Type]); if (feedNodeList.Size() >= 1 && feedNodeList[0].GetType().Equals(typeof(Div)) && ((TagNode)feedNodeList[0]).Attributes.ContainsKey("MID")) { end_id = ((TagNode)feedNodeList[0]).GetAttribute("MID"); return true; } else { return false; } }
/// <summary> /// 从网页版微博中获取微博信息 /// </summary> /// <param name="currentPage">爬得的微博所在页面序号</param> /// <param name="feedList">保存爬得的微博的数组</param> public void GetInfoFromHtml(int currentPage, List<Feed> feedList) { foreach (string htmlContent in htmlContentList) { Lexer lexer = new Lexer(htmlContent); Parser parser = new Parser(lexer); //获取包含每条微博的div标记列表 NodeList feedNodeList = parser.Parse(feedFilter); for (int i = 0; i < feedNodeList.Size(); i++) { serialNumber++; Feed feed = new Feed(); feed.Page = currentPage; feed.Number = serialNumber; //类似微博转发的数量 int similarFeedCount = 0; //取得第i条微博的div TagNode feedDiv = (TagNode)feedNodeList[i]; //判断是否含有“还有X条对原微博的转发” NodeList similarfeedCountNodeList = feedDiv.Children.ExtractAllNodesThatMatch(similarFeedCountFilter, true); switch (similarfeedCountNodeList.Size()) { case 1: //说明存在“还有X条对原微博的转发”的div;此处看起来此HTML解析器不认<b>标记,而把其中包含的内容作为其下一个兄弟节点= = similarFeedCount = Int32.Parse(((TextNode)(similarfeedCountNodeList[0].NextSibling)).ToPlainTextString()); break; case 0: //说明不存在“还有X条对原微博的转发” similarFeedCount = 0; break; default: Console.WriteLine("第" + i + "条微博中,判断是否含有类似微博转发的标准出错!"); break; } #region 获取微博作者 NodeList feedAuthorNodeList = feedDiv.Children.ExtractAllNodesThatMatch(feedAuthorFilter, true); //在整个一条微博的范围(即一个feedDiv)内,满足feedAuthorFilter过滤器的div节点数量应该是本条微博作者加上转发类似微博的作者(如果有的话),所以是(1 + similarFeedCount) if (feedAuthorNodeList.Size() == (1 + similarFeedCount)) { ATag feedAuthorTag = (ATag)feedAuthorNodeList[0].Children[0]; string author = feedAuthorTag.GetAttribute("TITLE"); feed.Author = author; //如果存在,则获取该作者的备注名 INode remarkNameNode = feedAuthorTag.NextSibling; if (remarkNameNode.GetType().Equals(typeof(Span))) { string remarkName = ((Span)remarkNameNode).StringText; //去掉前后括号 remarkName = remarkName.Substring(1, remarkName.Length - 2); feed.RemarkName = remarkName; } } else { //从首页爬取微博时,微博来自不同的被关注者,所以是有微博作者的;而从个人主页爬取微博时,由于所有微博作者都是该用户,所以是没有微博作者相关节点的 if (!user.NickName.Equals("")) { feed.Author = user.NickName; feed.RemarkName = user.RemarkName; } else { Console.WriteLine("第" + i + "条微博中,判断微博作者的标准出错!"); } } #endregion #region 获取转发微博 NodeList reFeedNodeList = feedDiv.Children.ExtractAllNodesThatMatch(reFeedFilter, true); //转发微博;(1 + similarFeedCount)的理由和获取微博作者时相同 if (reFeedNodeList.Size() == (1 + similarFeedCount)) { //获取转发微博的div TagNode reFeedDiv = (TagNode)reFeedNodeList[0]; //先获取本次转发微博的相关信息 GetReFeedInfo(i, feed, reFeedDiv, feedDiv); #region 考虑“还有X条对原微博的转发”的情况 if (similarFeedCount > 0) { NodeList similarFeedNodeList = feedDiv.Children.ExtractAllNodesThatMatch(similarFeedFilter, true); if (similarFeedNodeList.Size() == similarFeedCount) { for (int j = 0; j < similarFeedCount; j++) { feedList.Add(GetSimilarFeed(currentPage, i, (TagNode)similarFeedNodeList[j], feed.OriginalAuthor, feed.Content)); } } else { Console.WriteLine("第" + i + "条微博中,获取转发微博的数量出错!"); } } #endregion } else { if (reFeedNodeList.Size() == 0) { //获取本条微博内容作为微博内容 NodeList feedContentNodeList = feedDiv.Children.ExtractAllNodesThatMatch(feedContentFilter, true); if (feedContentNodeList.Size() == 1) { feed.Content = GetContentFromChildren(feed, feedContentNodeList[0], false); #region 由于存在某些情况,转发微博被删除后更不过滤不到reFeedDiv,所以需要再次检查是否存在已删除的转发微博 NodeList deletedFeedList = feedDiv.Children.ExtractAllNodesThatMatch(refeedDeletedFilter2, true); if (deletedFeedList.Size() > 0) { feed.OriginalAuthor = "Unknown"; feed.ReFeedOrNot = true; feed.ReFeedReason = feed.Content; feed.Content = "微博已删除"; } #endregion } else { Console.WriteLine("第" + i + "条微博中,判断微博内容的标准出错!"); } } else { Console.WriteLine("第" + i + "条微博中,判断转发微博的标准出错!"); } } #endregion //获取包含微博发送地点的div feed.Location = GetLocationInfo(i, feedDiv); //获取包含微博“赞”数的标记 feed.LikeCount = GetFeedLikeInfo(i, feedDiv); //获取包含微博转发数的链接标记 feed.ReFeedCount = GetFeedForwardCount(i, feedDiv); //获取包含微博评论数的链接标记 feed.CommentCount = GetFeedCommentCount(i, feedDiv); //获取包含微博发送时间的链接标记 feed.Time = GetFeedTimeInfo(i, feedDiv); //获取包含微博发送设备的链接标记 feed.Device = GetFeedSendTypeInfo(i, feedDiv); feedList.Add(feed); } } }
public void InitPage() { Lexer lexer = new Lexer(GetHtml(urlBase)); Parser parse = new Parser(lexer); parse.Encoding = "gb2312"; NodeFilter linkFilter = new LinkRegexFilter(@"s\=\d+\#J_FilterTabBar"); _linkResult = parse.Parse(linkFilter); }
public void StartCrawl()// private void BtnDownload_Click(object sender, RoutedEventArgs e) { List <ImportInvoiceDTO> list = new List <ImportInvoiceDTO>(); List <hParser.Tags.TableRow> validRowList = new List <hParser.Tags.TableRow>(); this.parseResult = ""; Uri uri = new Uri(this.TargetUri); #region <N>基于Httphelper,这样下载会要求程序自己实现验证授权 //<N>基于Httphelper,这样下载会要求程序自己实现验证授权 //HttpHelper httpHelper = new HttpHelper(); //HttpItem rq = new HttpItem(); //rq.URL = uri.AbsoluteUri; //HttpResult html = httpHelper.GetHtml(rq); //Debug.WriteLine(html.Html); //直接基于WebBrowser,授权是由用户手动实现的 mshtml.IHTMLDocument2 doc2 = null;//(mshtml.IHTMLDocument2)webBox.Document; string html = string.Compare(this.IsOffline, "1", StringComparison.InvariantCultureIgnoreCase) == 0 ? s_htmlFake : doc2.body.innerHTML; Debug.WriteLine(html); #endregion #region 使用HtmlParser提取HTML Lexer lexer = new Lexer(html); hParser.Parser parser = new hParser.Parser(lexer); hParser.NodeFilter filter = new NodeClassFilter(typeof(Winista.Text.HtmlParser.Tags.TableRow)); NodeList nodeList = parser.Parse(filter); if (nodeList.Count == 0) { MessageBox.Show("没有符合要求的节点"); } else { for (int i = 0; i < nodeList.Count; i++) { //抓取一行 var tagTR = parserTR(nodeList[i]); #region 充填有效行 if (tagTR != null) { validRowList.Add(tagTR); } #endregion } parserValidTR(validRowList, ref list); #if DEBUG // MessageBox.Show(parseResult); #endif } /* parseResult = HtmlText(html); * MessageBox.Show(parseResult);*/ #endregion #region 步 if (list == null || list.Count == 0) { MessageBox.Show("该页面上没有检测到预期数据"); return; } ImportInvoiceListDTO soap = new ImportInvoiceListDTO { List = list, Result = new ImportInvoiceResultDTO { Message = "CALLBACK", Status = 9 } }; //using (var factory = new ChannelFactory<ISyncImportInvoiceService>("*")) //{ // var chl = factory.CreateChannel(); // soap = chl.PullImportInvoices(soap); // if (soap.Result.Status == 0) // { // //重试 // soap = chl.PullImportInvoices(soap); // } //} //if (soap.Result.Status == -1) //{ // // 修改UI线程 // MessageBox.Show(soap.Result.Message); //} CallWS(soap); MessageBox.Show("本页已同步完成,请点击下一页继续同步"); //FakeBusy(); #endregion }
public void ParseProducts(ATag a) { string html = GetHtml(a.Link.Replace("../", "http://rrxf.cn/")); Lexer lexer = new Lexer(html); Parser parser = new Parser(lexer); NodeFilter nav = new HasAttributeFilter("class", "photoyi"); NodeList navNodes = parser.Parse(nav); if (navNodes == null) return; int length = navNodes.Count; for (int i = 0; i < length; i++) { ATag link = ParseProductUrl(navNodes[i].ToHtml()); Console.WriteLine(link.Link); ParseProduct(link); } }
public override IEnumerable <string> Process(string fileName) { System.Net.WebClient aWebClient = new System.Net.WebClient(); aWebClient.Encoding = System.Text.Encoding.Default; string html = aWebClient.DownloadString(this.url); //string html = File.ReadAllText(fileName); Lexer lexer = new Lexer(html); Winista.Text.HtmlParser.Parser parser = new Winista.Text.HtmlParser.Parser(lexer); NodeList htmlNodes = parser.Parse(null); List <IsotopicAtom> atoms = new List <IsotopicAtom>(); var node = FindFirstNode(htmlNodes, "tbody"); INode nextNode; IsotopicAtom atom = null; while ((nextNode = FindFirstNode(node, "tr")) != null) { if (nextNode.Children != null) { var tds = nextNode.Children.ExtractAllNodesThatMatch(new NameFilter("td")); if (tds.Count == 1) { atom = null; } if (tds.Count >= 3 && tds[0].FirstChild != null) { var t1 = tds[0].FirstChild.GetText().Trim(); var t2 = tds[1].FirstChild.GetText().Trim(); if (Char.IsDigit(t1[0]) && Char.IsLetter(t2[0])) { atom = new IsotopicAtom(); atoms.Add(atom); atom.Name = tds[1].FirstChild.GetText().Trim(); Peak p = new Peak(); p.Mz = GetDouble(tds[3]); p.Intensity = GetDouble(tds[4]); atom.Isotopics.Add(p); } else if (atom != null) { var txt = tds[0].FirstChild.GetText().Trim(); if (txt.Length > 0 && Char.IsLetter(txt[0])) { tds.Remove(0); } Peak p = new Peak(); p.Mz = GetDouble(tds[1]); p.Intensity = GetDouble(tds[2]); atom.Isotopics.Add(p); } } } node = nextNode.NextSibling; if (node == null) { break; } } atoms.ForEach(m => m.Isotopics.RemoveAll(n => n.Intensity == 0.0)); atoms.RemoveAll(m => m.Isotopics.Count == 0); var dic = atoms.ToDictionary(m => m.Name); var x = new IsotopicAtom(); x.Name = "X"; x.Isotopics.Add(new Peak(1, 0.9)); x.Isotopics.Add(new Peak(2, 0.1)); atoms.Insert(0, x); atoms.Add(AddHevayAtom("(H2)", "H", 1, dic)); atoms.Add(AddHevayAtom("(C13)", "C", 1, dic)); atoms.Add(AddHevayAtom("(N15)", "N", 1, dic)); atoms.Add(AddHevayAtom("(O18)", "O", 2, dic)); using (StreamWriter sw = new StreamWriter(fileName)) { atoms.ForEach(m => { sw.WriteLine("{0}\t{1}", m.Name, m.Isotopics.Count); m.Isotopics.ForEach(n => sw.WriteLine("{0:0.000000}\t{1:0.000000}", n.Mz, n.Intensity)); sw.WriteLine(); }); } return(new string[] { fileName }); }
/// <summary> /// 获取目标数据 /// </summary> /// <param name="parser">目标html文件</param> /// <param name="tag">标签名称</param> /// <param name="attribute">标签里面的属性名称</param> /// <param name="attValue">属性的值</param> /// <returns>标签内的目标数据</returns> public static string getValue(string html, string tag, string attribute, string attValue) { Lexer lexer = new Lexer(html); Parser parser = new Parser(lexer); string value = string.Empty; NodeFilter nodeFilter = new HasAttributeFilter(attribute, attValue); NodeList nodeList = parser.Parse(nodeFilter); if (nodeList.Count >= 1) { ITag tagNode = (nodeList[0] as ITag); return tagNode.ToPlainTextString(); } //for (int i = 0; i < nodeList.Count; i++) //{ // INode node = nodeList[i]; // ITag tagNode = (node as ITag); // if (tagNode.Attributes != null && tagNode.Attributes.Count > 0) // { // foreach (string key in tagNode.Attributes.Keys) // { // if (key.Contains("<TAGNAME>")) // { // continue; // } // if (key.Contains(attribute)) // { // if (tagNode.Attributes[key].ToString() == attValue) // { // value = tagNode.ToPlainTextString(); // return value; // } // } // } // } //} return null; }
public ATag ParseProductUrl(string html) { Lexer lexer = new Lexer(html); Parser parser = new Parser(lexer); NodeFilter filter = new LinkRegexFilter(@"lookcp\.php\?cpid\=\d{0,}"); NodeList alist = parser.Parse(filter); ATag a = alist[0] as ATag; a.Link = "http://rrxf.cn/product/" + a.Link; return a; }
/// <summary> /// 获得列表 /// </summary> /// <returns></returns> public List<OddsLiveMatch> GetScrollMatchList() { List<OddsLiveMatch> liveMatchList = new List<OddsLiveMatch>(); try { HttpHelper h = new HttpHelper(); Cookie lng = new Cookie("lng", "2"); lng.Domain = domain; h.CookieContainer.Add(lng); string zoudi = h.GetHtml("https://" +domain+ "/default.aspx"+ zoudiUrl); if (!string.IsNullOrEmpty(zoudi)) { #region 分析网页html节点 Lexer lexer = new Lexer(zoudi); Parser parser = new Parser(lexer); NodeList bodyNodes = parser.Parse(new TagNameFilter("HTML"))[0].Children.ExtractAllNodesThatMatch(new TagNameFilter("BODY"))[0].Children; ITag divNode = bodyNodes.ExtractAllNodesThatMatch(new TagNameFilter("FORM"))[0].Children.ExtractAllNodesThatMatch(new TagNameFilter("DIV"))[0] as ITag; if (divNode.Attributes["ID"].Equals("PageBody")) { NodeList dataDivList = divNode.Children.SearchFor(typeof(Winista.Text.HtmlParser.Tags.Div)); if (dataDivList[0].ToPlainTextString() == "走地盤") { if (dataDivList[2].ToPlainTextString() == "全場賽果") { return liveMatchList; } for (int i = 0; i < dataDivList.Count; i++) { ITag div = dataDivList[i] as ITag; if (div.Attributes["CLASS"] != null && div.Attributes["CLASS"].Equals("menuRow")) { OddsLiveMatch oddsLive = new OddsLiveMatch(); oddsLive.urlparams = (div.FirstChild as ITag).Attributes["HREF"].ToString(); oddsLive.id = oddsLive.urlparams.Split('&')[0].Substring(4); oddsLive.time = DateTime.Now; oddsLive.name = div.ToPlainTextString(); liveMatchList.Add(oddsLive); } } } } #endregion 分析网页html节点 } } catch (Exception) { } return liveMatchList; }
private void CrawlCurrentPage(WebBrowser wb, bool isOffline, bool IsUnConfirmChecked, ref bool hasValidData) { mshtml.IHTMLDocument2 doc2 = isOffline ? null : (mshtml.IHTMLDocument2)wb.Document; string html = isOffline ? s_htmlFake : doc2.body.innerHTML; Debug.WriteLine(html); List <ImportInvoiceDTO> list = new List <ImportInvoiceDTO>(); List <hParser.Tags.TableRow> validRowList = new List <hParser.Tags.TableRow>(); //this.parseResult = ""; #region 使用IHTMLDocument2提取HTML mshtml.HTMLTableClass table = IsUnConfirmChecked ? (mshtml.HTMLTableClass)doc2.all.item("example1", 0) : (mshtml.HTMLTableClass)doc2.all.item("example", 0); if (table == null) { hasValidData = false; //throw new InvalidOperationException("无效table"); return; } mshtml.HTMLTableSectionClass tbody = (mshtml.HTMLTableSectionClass)table.lastChild; if (tbody == null) { hasValidData = false; //throw new InvalidOperationException("无效tbody"); return; } var tbodyHtml = tbody.innerHTML; if (0 == string.Compare(tbody.innerText, "没找到记录", StringComparison.InvariantCultureIgnoreCase)) { hasValidData = false; //throw new InvalidOperationException("无效tbody"); return; } #region WPF WebBroswer交互源代码DOM元素总结 #if RESEARCH //HTMLDocument doc01 = wb.Document as HTMLDocument; ////IHTMLDocument2 doc02 = wb.Document as IHTMLDocument2; //Debug.WriteLine(doc01.body.innerHTML); ///读/写元素 /// mshtml.IHTMLElement login_pass = (mshtml.IHTMLElement)doc2.all.item("login_pass", 0); mshtml.IHTMLElement password = (mshtml.IHTMLElement)doc2.all.item("password", 0); password.setAttribute("value", "12345678"); login_pass.setAttribute("style", ""); mshtml.IHTMLElement login_pass1 = (mshtml.IHTMLElement)doc2.all.item("login_pass1", 0); mshtml.IHTMLElement password1 = (mshtml.IHTMLElement)doc2.all.item("password1", 0); login_pass1.setAttribute("style", "display:none;"); //password1.setAttribute("style", "width:1px"); //IHTMLElement item = doc01.getElementById("ptmm"); //item.innerHTML = "<INPUT id=\"pwd\" class=\"login_input password\" type=\"text\" value=\"\" />"; //// doc01.body.insertAdjacentHTML(,); //MessageBox.Show(item.innerText); //wb.NavigateToString(doc01.body.innerHTML); /// Trigger event //点击确定按钮 loginBT.click(); /// script injection /// //Basic ds = new Basic(); //wb.ObjectForScripting = ds;//该对象可由显示在WebBrowser控件中的网页所包含的脚本代码访问 ///Levarage JS /// mshtml.IHTMLWindow2 win = (mshtml.IHTMLWindow2)doc2.parentWindow; win.execScript("Login('12345678', '', 1)", "javascript"); return; #endif #endregion #endregion #region 使用HtmlParser提取tbodyHtml Lexer lexer = new Lexer(tbodyHtml); hParser.Parser parser = new hParser.Parser(lexer); hParser.NodeFilter filter = new NodeClassFilter(typeof(Winista.Text.HtmlParser.Tags.TableRow)); NodeList nodeList = parser.Parse(filter); if (nodeList.Count == 0) { hasValidData = false; MessageBox.Show("没有符合要求的节点"); } else { for (int i = 0; i < nodeList.Count; i++) { //抓取一行 var tagTR = parserTR(nodeList[i]); #region 充填有效行 if (tagTR != null) { validRowList.Add(tagTR); } #endregion } parserValidTR(validRowList, IsUnConfirmChecked, ref list); } #endregion #region 使用HtmlParser提取HTML /* Lexer lexer = new Lexer(html); * hParser.Parser parser = new hParser.Parser(lexer); * hParser.NodeFilter filter = new NodeClassFilter(typeof(Winista.Text.HtmlParser.Tags.TableRow)); * NodeList nodeList = parser.Parse(filter); * if (nodeList.Count == 0) * MessageBox.Show("没有符合要求的节点"); * else * { * for (int i = 0; i < nodeList.Count; i++) * { * //抓取一行 * var tagTR = parserTR(nodeList[i]); * #region 充填有效行 * if (tagTR != null) * validRowList.Add(tagTR); #endregion * * } * * parserValidTR(validRowList, ref list); * * } */ #endregion #region 日志 & 导出 & 持久化 if (list == null || list.Count == 0) { MessageBox.Show("该页面上没有检测到预期数据"); hasValidData = false; } ImportInvoiceListDTO soap = new ImportInvoiceListDTO { List = list, Result = new ImportInvoiceResultDTO { Message = "CALLBACK", Status = 9 } }; Debug.Write(soap); #region Log if (this.IfLog == "1") { soap.List.ForEach(impinfo => { if (IsUnConfirmChecked) { LogHelper.WriteLog(typeof(WebBoxView), string.Format("发票代码{0} 发票号码{1} 开票日期{2} 销方税号{3} 金额{4} 税额{5} 来源{6} 发票状态{7} 勾选标志{8} 操作时间{9}", impinfo.InvoiceCode, impinfo.InvoiceNumber, impinfo.CreateDate, impinfo.SalesTaxNumber, impinfo.Amount, impinfo.Tax, impinfo.From, impinfo.Status, impinfo.SelectTag, impinfo.ChosenTime)); } else { LogHelper.WriteLog(typeof(WebBoxView), string.Format("发票代码{0} 发票号码{1} 开票日期{2} 销方税号{3} 金额{4} 税额{5} 来源{6} 发票状态{7} 确认月份{8}", impinfo.InvoiceCode, impinfo.InvoiceNumber, impinfo.CreateDate, impinfo.SalesTaxNumber, impinfo.Amount, impinfo.Tax, impinfo.From, impinfo.Status, impinfo.SelectTag)); } }); } #endregion if (this.IfCallWS == "1") { CallWS(soap); } Debug.Write("本页已同步完成,请点击下一页继续同步"); //FakeBusy(); #endregion }