protected bool IsVailCode(string html) { Parser parser = new Parser(new Lexer(html)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("input"), new HasAttributeFilter("id", "code_input"))); if (nodeList != null && nodeList.Count > 0) { InputTag input = nodeList[0] as InputTag; return(input.GetAttribute("placeholder") == "请输入验证码"); } return(false); }
/// <summary> /// 得到asp.net页面中的eventValidation值 /// </summary> /// <param name="parser"></param> /// <returns></returns> public static string GetAspNetEventValidation(Parser parser) { string validataion = string.Empty; parser.Reset(); NodeList viewNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("input"), new HasAttributeFilter("name", "__EVENTVALIDATION"))); if (viewNodes != null && viewNodes.Count > 0) { InputTag viewTag = (InputTag)viewNodes[0]; validataion = viewTag.GetAttribute("value"); } return(validataion); }
/// <summary> /// 得到asp.net页面中的viewState值 /// </summary> /// <param name="parser"></param> /// <returns></returns> public static string GetAspNetViewState(Parser parser) { string viewState = string.Empty; parser.Reset(); NodeList viewNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("input"), new HasAttributeFilter("name", "__VIEWSTATE"))); if (viewNodes != null && viewNodes.Count > 0) { InputTag viewTag = (InputTag)viewNodes[0]; viewState = viewTag.GetAttribute("value"); } return(viewState); }
protected override IList ExecuteCrawl(bool crawlAll) { IList list = new List <InviteInfo>(); int pageInt = 1; string html = string.Empty; string viewState = string.Empty; string cookiestr = string.Empty; string eventValidation = string.Empty; try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8); } catch (Exception ex) { return(list); } Parser parser = new Parser(new Lexer(html)); NodeList tdNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("class", "input-group-addon"))); if (tdNodes != null && tdNodes.Count > 0) { try { string reTemp = tdNodes.AsString().GetRegexBegEnd("共", "项"); string pageTemp = tdNodes.AsString().GetRegexBegEnd("项", "页").GetReplace("共,项,页," + reTemp + ",,"); pageInt = int.Parse(pageTemp); } catch (Exception) { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { try { html = this.ToolWebSite.GetHtmlByUrl(SiteUrl + "?pi=" + (i - 1), Encoding.UTF8); } catch { continue; } } parser = new Parser(new Lexer(html)); NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "inside_table"))); if (nodeList != null && nodeList.Count > 0) { TableTag table = (TableTag)nodeList[0]; for (int j = 1; j < table.RowCount; j++) { string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, bidType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty; TableRow tr = table.Rows[j]; prjName = tr.Columns[1].ToPlainTextString().Trim(); buildUnit = tr.Columns[2].ToPlainTextString().Trim(); beginDate = tr.Columns[3].ToPlainTextString().Trim(); InfoUrl = "http://www.bajsjy.com/" + tr.Columns[1].GetATagHref(); string htmldetail = string.Empty; try { htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).Replace("<th", "<td").Replace("</th>", "</td>").Replace(" ", ""); } catch (Exception) { continue; } Parser parserdetail = new Parser(new Lexer(htmldetail)); NodeList nodeDetailList = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "inside_table"))); if (nodeDetailList != null && nodeDetailList.Count > 0) { HtmlTxt = nodeDetailList.AsHtml(); TableTag tabledetail = (TableTag)nodeDetailList[0]; for (int r = 0; r < tabledetail.RowCount; r++) { TableRow trdetail = tabledetail.Rows[r]; for (int c = 0; c < trdetail.ColumnCount; c++) { string tr1 = string.Empty; string tr2 = string.Empty; NodeList inptList; NodeList selList; if (trdetail.ColumnCount <= 1) { continue; } tr1 = trdetail.Columns[c].ToPlainTextString().Trim(); tr2 = trdetail.Columns[c + 1].ToPlainTextString().Trim(); inptList = trdetail.Columns[c + 1].SearchFor(typeof(InputTag), true); selList = trdetail.Columns[c + 1].SearchFor(typeof(SelectTag), true); if (inptList != null && inptList.Count > 0) { if (inptList.Count > 1) { for (int inp = 0; inp < inptList.Count; inp++) { InputTag inputTage = (InputTag)inptList[inp]; if (inputTage.GetAttribute("checked") == "checked") { tr2 = inputTage.GetAttribute("value"); } } } else { InputTag inputTage = (InputTag)inptList[0]; tr2 = inputTage.GetAttribute("value"); } } if (selList != null && selList.Count > 0) { SelectTag selTag = (SelectTag)selList[0]; NodeList opList = new NodeList(); selTag.CollectInto(opList, new HasAttributeFilter("selected", "selected")); tr2 = opList.AsString(); } inviteCtx += tr1 + ":" + tr2 + "\r\n"; if (trdetail.ColumnCount > (c + 1)) { c = c + 1; } } } Regex regPrjAddr = new Regex(@"工程地址:[^\r\n]+\r\n"); prjAddress = regPrjAddr.Match(inviteCtx).Value.Replace("工程地址:", "").Trim(); Regex regoType = new Regex(@"工程类型:[^\r\n]+\r\n"); string oType = regoType.Match(inviteCtx).Value.Replace("工程类型:", "").Trim(); if (oType.Contains("房建")) { otherType = "房建及工业民用建筑"; } else if (oType.Contains("市政")) { otherType = "市政工程"; } else if (oType.Contains("园林绿化")) { otherType = "园林绿化工程"; } else if (oType.Contains("装饰") || oType.Contains("装修")) { otherType = "装饰装修工程"; } else if (oType.Contains("电力")) { otherType = "电力工程"; } else if (oType.Contains("水利")) { otherType = "水利工程"; } if (oType.Contains("环保")) { otherType = "环保工程"; } msgType = "深圳市建设工程交易中心宝安分中心"; specType = "建设工程"; bidType = ToolHtml.GetInviteTypes(prjName); InviteInfo info = ToolDb.GenInviteInfo("广东省", "深圳宝安区工程", "宝安区", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, bidType, specType, otherType, InfoUrl, HtmlTxt); list.Add(info); if (!crawlAll && list.Count >= this.MaxCount) { return(list); } } } } } return(list); }
protected List <QualInfo> GetQual() { List <QualInfo> quals = ToolFile.Deserialize <QualInfo>(ToolFile.WebQualPath); if (quals == null || quals.Count < 1) { quals = new List <QualInfo>(); int pageInt = 1; int totalPage = 0; string url = "http://jzsc.mohurd.gov.cn/asite/qualapt/aptData?apt_type="; string html = string.Empty; try { html = ToolWeb.GetHtmlByUrl(url); } catch { } Parser parser = new Parser(new Lexer(html)); NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "clearfix"))); if (pageNode != null && pageNode.Count > 0) { try { string temp = pageNode.AsString().Replace(",", ","); string page = temp.GetRegexBegEnd("total", ",").GetReplace("\":"); totalPage = int.Parse(page); pageInt = totalPage / 10 + 1; } catch { } } for (int i = 1; i <= pageInt; i++) { if (i > 1) { NameValueCollection nvc = ToolWeb.GetNameValueCollection(new string[] { "$total", "$reload", "$pg", "$pgsz" }, new string[] { totalPage.ToString(), "0", i.ToString(), "10" }); try { html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8); } catch { } } parser = new Parser(new Lexer(html)); NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "table_box"))); if (listNode != null && listNode.Count > 0) { TableTag table = listNode[0] as TableTag; for (int j = 0; j < table.RowCount - 1; j++) { TableRow tr = table.Rows[j]; parser = new Parser(new Lexer(tr.ToHtml())); try { NodeList input = parser.ExtractAllNodesThatMatch(new TagNameFilter("input")); InputTag tag = input[0] as InputTag; string json = tag.GetAttribute("value"); JavaScriptSerializer serializer = new JavaScriptSerializer(); Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(json); QualInfo info = new QualInfo(); info.QualCode = Convert.ToString(smsTypeJson["apt_code"]); info.QualName = Convert.ToString(smsTypeJson["apt_scope"]); quals.Add(info); } catch (Exception ex) { Logger.Error(i); Logger.Error(tr.ToHtml()); } } } Thread.Sleep(1000 * 1); } quals = quals.OrderBy(x => x.QualCode).ToList(); ToolFile.Serialize <QualInfo>(quals, ToolFile.WebQualPath); } return(quals); }