Exemplo n.º 1
0
        protected bool IsVailCode(string html)
        {
            Parser   parser   = new Parser(new Lexer(html));
            NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("input"), new HasAttributeFilter("id", "code_input")));

            if (nodeList != null && nodeList.Count > 0)
            {
                InputTag input = nodeList[0] as InputTag;
                return(input.GetAttribute("placeholder") == "请输入验证码");
            }
            return(false);
        }
Exemplo n.º 2
0
        /// <summary>
        /// 得到asp.net页面中的eventValidation值
        /// </summary>
        /// <param name="parser"></param>
        /// <returns></returns>
        public static string GetAspNetEventValidation(Parser parser)
        {
            string validataion = string.Empty;

            parser.Reset();
            NodeList viewNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("input"), new HasAttributeFilter("name", "__EVENTVALIDATION")));

            if (viewNodes != null && viewNodes.Count > 0)
            {
                InputTag viewTag = (InputTag)viewNodes[0];
                validataion = viewTag.GetAttribute("value");
            }
            return(validataion);
        }
Exemplo n.º 3
0
        /// <summary>
        /// 得到asp.net页面中的viewState值
        /// </summary>
        /// <param name="parser"></param>
        /// <returns></returns>
        public static string GetAspNetViewState(Parser parser)
        {
            string viewState = string.Empty;

            parser.Reset();
            NodeList viewNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("input"), new HasAttributeFilter("name", "__VIEWSTATE")));

            if (viewNodes != null && viewNodes.Count > 0)
            {
                InputTag viewTag = (InputTag)viewNodes[0];
                viewState = viewTag.GetAttribute("value");
            }
            return(viewState);
        }
Exemplo n.º 4
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <InviteInfo>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string cookiestr       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser  = new Parser(new Lexer(html));
            NodeList tdNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("class", "input-group-addon")));

            if (tdNodes != null && tdNodes.Count > 0)
            {
                try
                {
                    string reTemp   = tdNodes.AsString().GetRegexBegEnd("共", "项");
                    string pageTemp = tdNodes.AsString().GetRegexBegEnd("项", "页").GetReplace("共,项,页," + reTemp + ",,");
                    pageInt = int.Parse(pageTemp);
                }
                catch (Exception) { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(SiteUrl + "?pi=" + (i - 1), Encoding.UTF8);
                    }
                    catch { continue; }
                }

                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "inside_table")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = (TableTag)nodeList[0];

                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string   code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, bidType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        TableRow tr = table.Rows[j];
                        prjName   = tr.Columns[1].ToPlainTextString().Trim();
                        buildUnit = tr.Columns[2].ToPlainTextString().Trim();
                        beginDate = tr.Columns[3].ToPlainTextString().Trim();
                        InfoUrl   = "http://www.bajsjy.com/" + tr.Columns[1].GetATagHref();
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).Replace("<th", "<td").Replace("</th>", "</td>").Replace("&nbsp;", "");
                        }
                        catch (Exception)
                        {
                            continue;
                        }
                        Parser   parserdetail   = new Parser(new Lexer(htmldetail));
                        NodeList nodeDetailList = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "inside_table")));
                        if (nodeDetailList != null && nodeDetailList.Count > 0)
                        {
                            HtmlTxt = nodeDetailList.AsHtml();
                            TableTag tabledetail = (TableTag)nodeDetailList[0];

                            for (int r = 0; r < tabledetail.RowCount; r++)
                            {
                                TableRow trdetail = tabledetail.Rows[r];

                                for (int c = 0; c < trdetail.ColumnCount; c++)
                                {
                                    string   tr1 = string.Empty;
                                    string   tr2 = string.Empty;
                                    NodeList inptList;
                                    NodeList selList;
                                    if (trdetail.ColumnCount <= 1)
                                    {
                                        continue;
                                    }
                                    tr1 = trdetail.Columns[c].ToPlainTextString().Trim();
                                    tr2 = trdetail.Columns[c + 1].ToPlainTextString().Trim();

                                    inptList = trdetail.Columns[c + 1].SearchFor(typeof(InputTag), true);
                                    selList  = trdetail.Columns[c + 1].SearchFor(typeof(SelectTag), true);
                                    if (inptList != null && inptList.Count > 0)
                                    {
                                        if (inptList.Count > 1)
                                        {
                                            for (int inp = 0; inp < inptList.Count; inp++)
                                            {
                                                InputTag inputTage = (InputTag)inptList[inp];
                                                if (inputTage.GetAttribute("checked") == "checked")
                                                {
                                                    tr2 = inputTage.GetAttribute("value");
                                                }
                                            }
                                        }
                                        else
                                        {
                                            InputTag inputTage = (InputTag)inptList[0];
                                            tr2 = inputTage.GetAttribute("value");
                                        }
                                    }
                                    if (selList != null && selList.Count > 0)
                                    {
                                        SelectTag selTag = (SelectTag)selList[0];
                                        NodeList  opList = new NodeList();
                                        selTag.CollectInto(opList, new HasAttributeFilter("selected", "selected"));
                                        tr2 = opList.AsString();
                                    }
                                    inviteCtx += tr1 + ":" + tr2 + "\r\n";
                                    if (trdetail.ColumnCount > (c + 1))
                                    {
                                        c = c + 1;
                                    }
                                }
                            }


                            Regex regPrjAddr = new Regex(@"工程地址:[^\r\n]+\r\n");
                            prjAddress = regPrjAddr.Match(inviteCtx).Value.Replace("工程地址:", "").Trim();

                            Regex  regoType = new Regex(@"工程类型:[^\r\n]+\r\n");
                            string oType    = regoType.Match(inviteCtx).Value.Replace("工程类型:", "").Trim();

                            if (oType.Contains("房建"))
                            {
                                otherType = "房建及工业民用建筑";
                            }
                            else if (oType.Contains("市政"))
                            {
                                otherType = "市政工程";
                            }
                            else if (oType.Contains("园林绿化"))
                            {
                                otherType = "园林绿化工程";
                            }
                            else if (oType.Contains("装饰") || oType.Contains("装修"))
                            {
                                otherType = "装饰装修工程";
                            }
                            else if (oType.Contains("电力"))
                            {
                                otherType = "电力工程";
                            }
                            else if (oType.Contains("水利"))
                            {
                                otherType = "水利工程";
                            }
                            if (oType.Contains("环保"))
                            {
                                otherType = "环保工程";
                            }

                            msgType  = "深圳市建设工程交易中心宝安分中心";
                            specType = "建设工程";
                            bidType  = ToolHtml.GetInviteTypes(prjName);
                            InviteInfo info = ToolDb.GenInviteInfo("广东省", "深圳宝安区工程", "宝安区", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, bidType, specType, otherType, InfoUrl, HtmlTxt);
                            list.Add(info);

                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Exemplo n.º 5
0
        protected List <QualInfo> GetQual()
        {
            List <QualInfo> quals = ToolFile.Deserialize <QualInfo>(ToolFile.WebQualPath);

            if (quals == null || quals.Count < 1)
            {
                quals = new List <QualInfo>();
                int    pageInt   = 1;
                int    totalPage = 0;
                string url       = "http://jzsc.mohurd.gov.cn/asite/qualapt/aptData?apt_type=";
                string html      = string.Empty;
                try
                {
                    html = ToolWeb.GetHtmlByUrl(url);
                }
                catch { }
                Parser   parser   = new Parser(new Lexer(html));
                NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "clearfix")));
                if (pageNode != null && pageNode.Count > 0)
                {
                    try
                    {
                        string temp = pageNode.AsString().Replace(",", ",");
                        string page = temp.GetRegexBegEnd("total", ",").GetReplace("\":");
                        totalPage = int.Parse(page);
                        pageInt   = totalPage / 10 + 1;
                    }
                    catch { }
                }

                for (int i = 1; i <= pageInt; i++)
                {
                    if (i > 1)
                    {
                        NameValueCollection nvc = ToolWeb.GetNameValueCollection(new string[] {
                            "$total",
                            "$reload",
                            "$pg",
                            "$pgsz"
                        },
                                                                                 new string[] {
                            totalPage.ToString(),
                            "0",
                            i.ToString(),
                            "10"
                        });
                        try
                        {
                            html = ToolWeb.GetHtmlByUrl(url, nvc, Encoding.UTF8);
                        }
                        catch { }
                    }
                    parser = new Parser(new Lexer(html));
                    NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "table_box")));
                    if (listNode != null && listNode.Count > 0)
                    {
                        TableTag table = listNode[0] as TableTag;
                        for (int j = 0; j < table.RowCount - 1; j++)
                        {
                            TableRow tr = table.Rows[j];
                            parser = new Parser(new Lexer(tr.ToHtml()));
                            try
                            {
                                NodeList                    input       = parser.ExtractAllNodesThatMatch(new TagNameFilter("input"));
                                InputTag                    tag         = input[0] as InputTag;
                                string                      json        = tag.GetAttribute("value");
                                JavaScriptSerializer        serializer  = new JavaScriptSerializer();
                                Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(json);
                                QualInfo                    info        = new QualInfo();
                                info.QualCode = Convert.ToString(smsTypeJson["apt_code"]);
                                info.QualName = Convert.ToString(smsTypeJson["apt_scope"]);
                                quals.Add(info);
                            }
                            catch (Exception ex)
                            {
                                Logger.Error(i);
                                Logger.Error(tr.ToHtml());
                            }
                        }
                    }
                    Thread.Sleep(1000 * 1);
                }
                quals = quals.OrderBy(x => x.QualCode).ToList();
                ToolFile.Serialize <QualInfo>(quals, ToolFile.WebQualPath);
            }


            return(quals);
        }