コード例 #1
0
        private void parserTR(INode node)
        {
            hParser.TableRow tagTR = getTagRow(node);

            //TD在子节点
            if (tagTR.Headers != null && tagTR.Headers.Count() > 0)
            {
                for (int i = 0; i < tagTR.Headers.Count(); i++)
                {
                    var header = tagTR.Headers[i] as hParser.TableHeader; // th

                    if (header.TagName == "TH" && !string.IsNullOrEmpty(header.StringText))
                    {
                        parseResult += header.TagName + ":\r\nStringText:" + header.StringText + " ChildrenHTML:" + header.ChildrenHTML
                                       + " StartPosition:" + header.StartPosition.ToString() + " EndPosition:" + header.EndPosition.ToString() + "\r\n";
                    }
                }
            }
            if ((tagTR.Headers == null || tagTR.Headers.Count() == 0) && tagTR.ChildrenAsNodeArray != null && tagTR.ChildrenAsNodeArray.Count() > 0)
            {
                for (int i = 0; i < tagTR.ChildrenAsNodeArray.Count(); i++)
                {
                    var colum = tagTR.ChildrenAsNodeArray[i]  as hParser.TableColumn; //td

                    if (colum != null && colum.TagName == "TD" && !string.IsNullOrEmpty(colum.StringText) && colum.StringText != "\n")
                    {
                        parseResult += colum.TagName + ":\r\nStringText:" + colum.StringText + " ChildrenHTML:" + colum.ChildrenHTML
                                       + " StartPosition:" + colum.StartPosition.ToString() + " EndPosition:" + colum.EndPosition.ToString() + "\r\n";
                    }
                }
            }
        }
コード例 #2
0
		/// <summary>
		/// Creates new instance of <see cref="RowData"></see> object.
		/// </summary>
		/// <param name="trow"></param>
		public RowData(TableRow trow, TableData obTable)
			:base(trow)
		{
			m_Columns = new ColumnDataCollection();
			this.m_Table = obTable;
			ConvertFromTag(trow);
		}
コード例 #3
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list    = new ArrayList();
            string bidhtml = string.Empty;
            string html    = string.Empty;
            int    pageInt = 1;

            try
            {
                html = ToolWeb.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch (Exception ex)
            {
                Logger.Error(ex.ToString());
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "GridView1"), new TagNameFilter("table")));

            if (nodeList != null && nodeList.Count > 0)
            {
                TableTag table = nodeList[0] as TableTag;
                for (int i = 1; i < table.RowCount; i++)
                {
                    Winista.Text.HtmlParser.Tags.TableRow tr = table.Rows[i];
                    ATag   alink = tr.Columns[8].SearchFor(typeof(ATag), true)[0] as ATag;
                    string view  = string.Empty;
                    string even  = string.Empty;
                    view = ToolWeb.GetAspNetViewState(html);
                    even = ToolWeb.GetAspNetEventValidation(html);
                    string alin             = alink.Link.Replace("__doPostBack('", "").Replace("','')", "");
                    NameValueCollection nvc = ToolWeb.GetNameValueCollection(new string[] { "__EVENTTARGET", "__EVENTARGUMENT", "__VIEWSTATE", "GridViewPaging1$txtGridViewPagingForwardTo", "__VIEWSTATEENCRYPTED", "__EVENTVALIDATION" },
                                                                             new string[] { alin, "", view, "1", "", even });
                    string cookies = string.Empty;
                    try
                    {
                        bidhtml = ToolWeb.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8, ref cookies);
                    }
                    catch (Exception ex) { Logger.Error(ex.ToString()); }


                    for (int l = 1; l <= 14; l++)
                    {
                        if (l == 7)
                        {
                            continue;
                        }
                        Save(l, bidhtml, list, crawlAll);
                    }
                }
            }
            if (sqlcount > 100)
            {
                string sql = string.Format("update CorpCreditjd set IsNew='0' where CreateTime<'{0}'", DateTime.Now.ToString("yyyy-MM-dd"));
                ToolDb.ExecuteSql(sql);
            }
            return(list);
        }
コード例 #4
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string htl             = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    page            = 1;
            string eventValidation = string.Empty;

            try
            {
                htl = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.UTF8, ref cookiestr);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser  = new Parser(new Lexer(htl));
            NodeList tdNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "PageNumControl1_lbltotal")));

            if (tdNodes.Count > 0)
            {
                try
                {
                    page = int.Parse(tdNodes[0].ToPlainTextString().Trim());
                }
                catch { return(list); }
            }
            for (int i = 1; i <= page; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(htl);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(htl);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__EVENTTARGET",
                        "__EVENTARGUMENT",
                        "__LASTFOCUS",
                        "__VIEWSTATE",
                        "txtPrj_ID",
                        "txtPrj_Name", "Chk_Query", "Radiobuttonlist1", "PageNumControl1$gotopage", "PageNumControl1$NEXTpage", "__EVENTVALIDATION",
                    }, new string[] {
                        string.Empty,
                        string.Empty,
                        string.Empty,
                        viewState,
                        string.Empty,
                        string.Empty,
                        "0", "0", "", "下一页", eventValidation
                    });
                    try
                    {
                        htl = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8, ref cookiestr);
                    }
                    catch (Exception ex) { continue; }
                }
                parser = new Parser(new Lexer(htl));
                NodeList tableNodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "dgConstBid")));
                if (tableNodeList != null && tableNodeList.Count > 0)
                {
                    TableTag table = (TableTag)tableNodeList[0];
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string pUrl = string.Empty, pInfoSource = string.Empty, pBeginDate = string.Empty, pBuilTime = string.Empty, pEndDate = string.Empty, pConstUnit = string.Empty, pSuperUnit = string.Empty, pDesignUnit = string.Empty, pProspUnit = string.Empty, pInviteArea = string.Empty, pBuildArea = string.Empty, pPrjClass = string.Empty, pProClassLevel = string.Empty, pChargeDept = string.Empty, pPrjAddress = string.Empty, pBuildUnit = string.Empty, pPrjCode = string.Empty, PrjName = string.Empty, pCreatetime = string.Empty;
                        Winista.Text.HtmlParser.Tags.TableRow tr = table.Rows[j];
                        PrjName    = tr.Columns[3].ToPlainTextString().Trim();
                        pBuildUnit = tr.Columns[4].ToPlainTextString().Trim();
                        ATag   aTag = tr.Columns[2].SearchFor(typeof(ATag), true)[0] as ATag;
                        string link = aTag.Link.Replace("GoDetail('", "").Replace("')", "").Replace(";", "");
                        pUrl = "http://www.cb.gov.cn/sgxk/Details.aspx?NID=" + link;// +"&xxlxbh=&PRJ_TYPE=0";
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(pUrl), Encoding.UTF8).Replace("<br>", "\r\n");
                        }
                        catch (Exception)
                        {
                            continue;
                        }
                        Parser   parserdetail = new Parser(new Lexer(htmldetail));
                        NodeList dtnode       = parserdetail.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "Table8")));
                        if (dtnode != null && dtnode.Count > 0)
                        {
                            string   ctx      = string.Empty;
                            string   ctx1     = string.Empty;
                            string   dateStr  = string.Empty;
                            TableTag tableTwo = (TableTag)dtnode[0];
                            for (int k = 0; k < tableTwo.RowCount; k++)
                            {
                                Winista.Text.HtmlParser.Tags.TableRow trTwo = tableTwo.Rows[k];
                                for (int z = 0; z < trTwo.ColumnCount; z++)
                                {
                                    dateStr = trTwo.Columns[z].ToPlainTextString().Replace("\t", "").Replace("<br>", "\r\n").Replace("&nbsp;", "").Replace("<br/>", "\r\n").Trim();
                                    ctx    += trTwo.Columns[z].ToPlainTextString().Replace("\t", "").Replace("<br>", "\r\n").Replace("&nbsp;", "").Replace("<br/>", "\r\n").Replace(" ", "").Trim();
                                }
                                ctx += "\r\n";
                            }

                            ctx1        = dtnode.AsString().Replace("&nbsp;", "").Replace("\t", "").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Trim();
                            pInfoSource = ctx1;

                            pPrjCode = ctx.GetRegex("工程序号");

                            Regex regPrjAddr = new Regex(@"(工程地点|工程地址)(:|:)[^\r\n]+\r\n");
                            pPrjAddress = regPrjAddr.Match(ctx).Value.Replace("工程地址", "").Replace("工程地点", "").Replace(":", "").Replace(":", "").Trim();

                            Regex regChargeDept = new Regex(@"主管部门(:|:)[^\r\n]+\r\n");
                            pChargeDept = regChargeDept.Match(ctx).Value.Replace("主管部门", "").Replace(":", "").Replace(":", "").Trim();

                            Regex regProClassLevel = new Regex(@"工程类别等级(:|:)[^\r\n]+\r\n");
                            pProClassLevel = regProClassLevel.Match(ctx).Value.Replace("工程类别等级", "").Replace(":", "").Replace(":", "").Trim();

                            Regex regPrjClass = new Regex(@"(工程类型|工程类别)(:|:)[^\r\n]+\r\n");
                            pPrjClass = regPrjClass.Match(ctx).Value.Replace("工程类别", "").Replace("工程类型", "").Replace(":", "").Replace(":", "").Trim();

                            Regex regBuildUnit = new Regex(@"(招标面积|本次招标面积)(:|:)[^\r\n]+\r\n");
                            pInviteArea = regBuildUnit.Match(ctx).Value.Replace("本次招标面积", "").Replace("招标面积", "").Replace(":", "").Replace(":", "").Trim();

                            Regex regpInviteArea = new Regex(@"建筑总面积(:|:)[^\r\n]+\r\n");
                            pBuildArea = regpInviteArea.Match(ctx).Value.Replace("建筑总面积", "").Replace(":", "").Replace(":", "").Trim();

                            pConstUnit = ctx.GetRegex("施工单位");
                            if (pConstUnit == "/")
                            {
                                pConstUnit = string.Empty;
                            }
                            pSuperUnit = ctx.GetRegex("监理单位");
                            if (pSuperUnit == "/")
                            {
                                pSuperUnit = string.Empty;
                            }
                            pDesignUnit = ctx.GetRegex("设计单位");
                            if (pDesignUnit == "/")
                            {
                                pDesignUnit = string.Empty;
                            }
                            pProspUnit = ctx.GetRegex("勘察单位");
                            if (pProspUnit == "/")
                            {
                                pProspUnit = string.Empty;
                            }

                            pBeginDate = dateStr.GetRegex("计划开工日期").GetDateRegex();
                            pEndDate   = dateStr.GetRegex("计划竣工日期").GetDateRegex();

                            BaseProject info = ToolDb.GenBaseProject("广东省", pUrl, "深圳市龙岗区", pInfoSource, pBuilTime, pBeginDate, pEndDate, pConstUnit, pSuperUnit, pDesignUnit, pProspUnit, pInviteArea,
                                                                     pBuildArea, pPrjClass, pProClassLevel, pChargeDept, pPrjAddress, pBuildUnit, pPrjCode, PrjName, pCreatetime, "深圳市龙岗区住房和建设局");

                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
コード例 #5
0
ファイル: CreditAppraiseSZ.cs プロジェクト: SHNXJMG/Small
        private IList GetLianghao(bool crawlAll, IList list)
        {
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            int    pageInt         = 1;
            int    pageCount       = 1;
            int    count           = 0;

            //IList list = new ArrayList();
            try
            {
                html = ToolWeb.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch (Exception ex)
            {
                Logger.Error(ex);
            }
            Parser parser = new Parser(new Lexer(html));

            viewState       = ToolWeb.GetAspNetViewState(parser);
            eventValidation = ToolWeb.GetAspNetEventValidation(parser);
            NameValueCollection nvc = ToolWeb.GetNameValueCollection(new string[] { "__EVENTTARGET", "__EVENTARGUMENT", "__VIEWSTATE", "__VIEWSTATEENCRYPTED",
                                                                                    "__EVENTVALIDATION", "queryWhereAction", "queryWhereType", "queryWhere", "txtquery", "GridViewPaging1$txtGridViewPagingForwardTo" },
                                                                     new string[] { "GoodAction", "", viewState, "", eventValidation, "GoodAction", "", "", "", "1" });

            try
            {
                html = ToolWeb.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8);
            }
            catch { }
            parser = new Parser(new Lexer(html));
            NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "GridViewPaging1_lblGridViewPagingDesc")));

            if (nodeList != null && nodeList.Count > 0)
            {
                Regex reg = new Regex(@"共\d+页");
                try
                {
                    pageInt = int.Parse(reg.Match(nodeList.AsString()).Value.Trim(new char[] { '共', '页' }));
                }
                catch
                { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                pageCount++;
                if (pageCount > 10)
                {
                    pageCount = 1;
                    Thread.Sleep(300 * 1000);
                }
                if (i > 1)
                {
                    viewState       = ToolWeb.GetAspNetViewState(html);
                    eventValidation = ToolWeb.GetAspNetEventValidation(html);
                    NameValueCollection nvc1 = ToolWeb.GetNameValueCollection(new string[] { "__EVENTTARGET", "__EVENTARGUMENT", "__VIEWSTATE", "__VIEWSTATEENCRYPTED",
                                                                                             "__EVENTVALIDATION", "queryWhereAction", "queryWhereType", "queryWhere", "txtquery", "GridViewPaging1$txtGridViewPagingForwardTo", "GridViewPaging1$btnForwardToPage" },
                                                                              new string[] { "", "", viewState, "", eventValidation, "GoodAction", "", "", "", i.ToString(), "GO" });
                    try
                    {
                        html = ToolWeb.GetHtmlByUrl(SiteUrl, nvc1, Encoding.UTF8);
                    }
                    catch { }
                }
                parser = new Parser(new Lexer(html));
                NodeList dtList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "GridView1")));
                if (dtList != null && dtList.Count > 0)
                {
                    TableTag table = dtList[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        Winista.Text.HtmlParser.Tags.TableRow tr = table.Rows[j];
                        string corpName = string.Empty, projectName = string.Empty, targetCode = string.Empty, targetDesc = string.Empty, targetClass = string.Empty,
                               actionDateTime = string.Empty, actionType = string.Empty, province = string.Empty, city = string.Empty, infoSource = string.Empty,
                               url = string.Empty;
                        corpName       = tr.Columns[0].ToPlainTextString().Trim();
                        projectName    = tr.Columns[1].ToPlainTextString().Trim();
                        targetCode     = tr.Columns[2].ToPlainTextString().Trim();
                        targetDesc     = tr.Columns[3].ToPlainTextString().Trim();
                        targetClass    = tr.Columns[4].ToPlainTextString().Trim();
                        actionDateTime = tr.Columns[5].ToPlainTextString().Trim();
                        actionType     = "良好行为";
                        province       = "广东省";
                        city           = "深圳市";
                        infoSource     = "深圳市住房和建设局";
                        url            = SiteUrl;
                        if (Encoding.Default.GetByteCount(targetDesc) > 200)
                        {
                            targetDesc = string.Empty;
                        }
                        CreditAppraise info = ToolDb.GenCreditAppraise(corpName, projectName, targetCode, targetDesc, targetClass, actionDateTime, actionType, province, city, infoSource, url);
                        //ToolDb.SaveEntity(info, this.ExistCompareFields);
                        list.Add(info);
                        count++;
                        if (!crawlAll && count >= this.MaxCount)
                        {
                            return(list);
                        }
                    }
                }
            }
            return(list);
        }
コード例 #6
0
        private void Save(int l, string bidhtml, IList list, bool crawlAll)
        {
            string Url             = "http://61.144.226.2:8008/JDScore.aspx?clearPaging=true&guid=450845";
            string htl             = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiedtstr     = string.Empty;

            try
            {
                htl = ToolWeb.GetHtmlByUrl(Url, Encoding.UTF8, ref cookiedtstr);
            }
            catch (Exception ex)
            {
                Logger.Error(ex.ToString());
            }

            string[] classLen = new string[] { "A", "A-", "B", "B-", "C", "C-" };
            string   ddlindex = l.ToString();

            if (l > 13)
            {
                ddlindex = "999999999";
            }
            for (int n = 0; n < classLen.Length; n++)
            {
                int pageInt = 1;
                viewState       = ToolWeb.GetAspNetViewState(htl);
                eventValidation = ToolWeb.GetAspNetEventValidation(htl);
                string strcookie         = string.Empty;
                NameValueCollection nvc3 = ToolWeb.GetNameValueCollection(new string[] { "__EVENTTARGET", "__EVENTARGUMENT",
                                                                                         "__LASTFOCUS", "__VIEWSTATE", "txtCorpName", "DropDownList1", "DropDownList2", "hiddenIsFirst", "GridViewPaging1$txtGridViewPagingForwardTo", "GridViewPaging1$btnForwardToPage", "__VIEWSTATEENCRYPTED", "__EVENTVALIDATION" },
                                                                          new string[] { "", "", "", viewState, "", classLen[n], ddlindex, "false", "1", "Go", "", eventValidation });
                try
                {
                    htl = ToolWeb.GetHtmlByUrl(Url, nvc3, Encoding.UTF8, ref strcookie);
                }
                catch (Exception ex) { }
                Parser   parser = new Parser(new Lexer(htl));
                NodeList dtList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "GridViewPaging1_lblGridViewPagingDesc")));

                if (dtList != null && dtList.Count > 0)
                {
                    Regex reg = new Regex(@"共\d+页");
                    try
                    {
                        pageInt = int.Parse(reg.Match(dtList.AsString()).Value.Trim(new char[] { '共', '页' }));
                    }
                    catch
                    { }
                }
                for (int k = 1; k <= pageInt; k++)
                {
                    if (k > 1)
                    {
                        string viewState1        = ToolWeb.GetAspNetViewState(htl);
                        string eventValidation1  = ToolWeb.GetAspNetEventValidation(htl);
                        NameValueCollection nvc4 = ToolWeb.GetNameValueCollection(new string[] { "__EVENTTARGET", "__EVENTARGUMENT",
                                                                                                 "__LASTFOCUS", "__VIEWSTATE", "txtCorpName", "DropDownList1", "DropDownList2", "hiddenIsFirst", "GridViewPaging1$txtGridViewPagingForwardTo", "GridViewPaging1$btnForwardToPage", "__VIEWSTATEENCRYPTED", "__EVENTVALIDATION" },
                                                                                  new string[] { "", "", "", viewState1, "", classLen[n], ddlindex, "false", k.ToString(), "Go", "", eventValidation1 });
                        try
                        {
                            htl = ToolWeb.GetHtmlByUrl(Url, nvc4, Encoding.UTF8, ref strcookie);
                        }

                        catch (Exception ex) { }
                    }
                    string   beg = string.Empty, end = string.Empty, avg = string.Empty, type = string.Empty, thtype = string.Empty, classlv = string.Empty;
                    Parser   parserCtx = new Parser(new Lexer(htl));
                    NodeList ctxNode   = parserCtx.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("select"), new HasAttributeFilter("id", "DropDownList1")));
                    classlv = ctxNode.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("option"), new HasAttributeFilter("value", classLen[n])), true).AsString().Replace("&nbsp;", "");

                    Parser   parserCtx2 = new Parser(new Lexer(htl));
                    NodeList ctxNode2   = parserCtx2.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("select"), new HasAttributeFilter("id", "DropDownList2")));
                    thtype = ctxNode2.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("option"), new HasAttributeFilter("value", ddlindex)), true).AsString().Replace("&nbsp;", "");


                    Parser   dtparser = new Parser(new Lexer(htl));
                    NodeList delList  = dtparser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "GridView2")));
                    if (delList != null && delList.Count > 0)
                    {
                        TableTag tab = delList[0] as TableTag;
                        for (int e = 1; e < tab.RowCount; e++)
                        {
                            Winista.Text.HtmlParser.Tags.TableRow trdate = tab.Rows[e];
                            type = trdate.Columns[0].ToPlainTextString().Trim();
                            beg  = trdate.Columns[1].ToPlainTextString().Trim();
                            end  = trdate.Columns[2].ToPlainTextString().Trim();
                            Regex  regInt = new Regex(@"\d{1,}[\.]?\d{0,}");
                            string temp   = trdate.Columns[3].ToPlainTextString();
                            avg = regInt.Match(temp).Value;
                        }
                    }
                    Parser   par     = new Parser(new Lexer(htl));
                    NodeList conList = par.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "GridView1")));
                    if (conList != null && conList.Count > 0)
                    {
                        TableTag tabContent = conList[0] as TableTag;
                        for (int f = 1; f < tabContent.RowCount; f++)
                        {
                            Winista.Text.HtmlParser.Tags.TableRow dr = tabContent.Rows[f];
                            string corpName = string.Empty, corpType = string.Empty, corpRank = string.Empty, corpCategory = string.Empty,
                                   ranking = string.Empty, categoryRank = string.Empty, realScore = string.Empty, province = string.Empty,
                                   city = string.Empty, infoSource = string.Empty, infourl = string.Empty, beginDate = string.Empty,
                                   endDate = string.Empty, bidhtl = string.Empty, bad = string.Empty, good = string.Empty;
                            if (dr.ColumnCount > 7)
                            {
                                corpName     = dr.Columns[1].ToPlainTextString().Trim();
                                categoryRank = dr.Columns[6].ToPlainTextString().Trim();
                                ranking      = dr.Columns[5].ToPlainTextString().Trim();
                                string rea     = dr.Columns[7].ToPlainTextString().Trim();
                                string goodStr = dr.Columns[3].ToPlainTextString().Trim();
                                string badStr  = dr.Columns[4].ToPlainTextString().Trim();
                                Regex  regInt  = new Regex(@"\d{1,}[\.]?\d{0,}");
                                realScore    = regInt.Match(rea).Value;
                                good         = regInt.Match(goodStr).Value;
                                bad          = regInt.Match(badStr).Value;
                                beginDate    = beg;
                                endDate      = end;
                                corpCategory = thtype;
                                corpRank     = classlv;
                                infourl      = Url;
                                corpType     = type;
                                infoSource   = "深圳市住房和建设局";
                                province     = "广东省";
                                city         = "深圳市";
                                bidhtl       = bidhtml;
                            }
                            else
                            {
                                corpName     = dr.Columns[1].ToPlainTextString().Trim();
                                categoryRank = dr.Columns[5].ToPlainTextString().Trim();
                                ranking      = dr.Columns[4].ToPlainTextString().Trim();
                                string rea     = dr.Columns[6].ToPlainTextString().Trim();
                                string goodStr = dr.Columns[2].ToPlainTextString().Trim();
                                string badStr  = dr.Columns[3].ToPlainTextString().Trim();
                                Regex  regInt  = new Regex(@"\d{1,}[\.]?\d{0,}");
                                realScore    = regInt.Match(rea).Value;
                                good         = regInt.Match(goodStr).Value;
                                bad          = regInt.Match(badStr).Value;
                                beginDate    = beg;
                                endDate      = end;
                                corpCategory = thtype;
                                corpRank     = classlv;
                                infourl      = Url;
                                corpType     = type;
                                infoSource   = "深圳市住房和建设局";
                                province     = "广东省";
                                city         = "深圳市";
                                bidhtl       = bidhtml;
                            }
                            CorpCreditjd info = ToolDb.GenCorpCreditJD(corpName, corpType, corpRank, corpCategory, ranking, categoryRank, beginDate, endDate, realScore, province, city, infoSource, infourl, bidhtl, avg, good, bad);
                            ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate);
                            count++;
                            sqlcount++;
                            //if (!crawlAll && list.Count >= this.MaxCount) break;
                            if (count > 200)
                            {
                                count = 0;
                                Thread.Sleep(120000);
                            }
                        }
                    }
                }
            }
        }
コード例 #7
0
ファイル: CreditActionSZ.cs プロジェクト: SHNXJMG/Small
        private void GetLianghao(bool crawlAll)
        {
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            int    pageInt         = 1;
            IList  list            = new ArrayList();

            try
            {
                html = ToolWeb.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch (Exception ex)
            {
                Logger.Error(ex);
            }
            Parser parser = new Parser(new Lexer(html));

            viewState       = ToolWeb.GetAspNetViewState(parser);
            eventValidation = ToolWeb.GetAspNetEventValidation(parser);
            NameValueCollection nvc = ToolWeb.GetNameValueCollection(new string[] { "ScriptManager1", "__EVENTTARGET", "__EVENTARGUMENT", "__VIEWSTATE",
                                                                                    "txtName", "ddlType", "GridViewPaging1$txtGridViewPagingForwardTo", "__VIEWSTATEENCRYPTED", "__EVENTVALIDATION", "btnOK" },
                                                                     new string[] { "UpdatePanel1|btnOK", "", "", viewState, "", "GoodAction", "1", "", eventValidation, "查询" });

            try
            {
                html = ToolWeb.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8, ref cookiestr);
            }
            catch { }
            parser = new Parser(new Lexer(html));
            NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "GridViewPaging1_lblGridViewPagingDesc")));

            if (nodeList != null && nodeList.Count > 0)
            {
                Regex reg = new Regex(@"共\d+页");
                try
                {
                    pageInt = int.Parse(reg.Match(nodeList.AsString()).Value.Trim(new char[] { '共', '页' }));
                }
                catch
                { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState       = ToolWeb.GetAspNetViewState(html);
                    eventValidation = ToolWeb.GetAspNetEventValidation(html);
                    ToolWeb.GetNameValueCollection(new string[] { "__EVENTTARGET", "__EVENTARGUMENT", "__VIEWSTATE", "__VIEWSTATEENCRYPTED",
                                                                  "__EVENTVALIDATION", "txtName", "ddlType", "GridViewPaging1$txtGridViewPagingForwardTo", "GridViewPaging1$btnForwardToPage" },
                                                   new string[] { "", "", viewState, "", eventValidation, "", "GoodAction", i.ToString(), "GO" });
                    try
                    {
                        html = ToolWeb.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8);
                    }
                    catch { }
                }
                parser = new Parser(new Lexer(html));
                NodeList dtList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "GridView1")));
                if (dtList != null && dtList.Count > 0)
                {
                    TableTag table = dtList[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string corpCode = string.Empty, corpName = string.Empty, targetCode = string.Empty, targetDesc = string.Empty,
                               targetClass = string.Empty, targetLevel = string.Empty, targetUnit = string.Empty, docNo = string.Empty, beginDateTime = string.Empty,
                               actionDateTime = string.Empty, actionType = string.Empty, province = string.Empty, city = string.Empty, infoSource = string.Empty, url = string.Empty, prjName = string.Empty;
                        Winista.Text.HtmlParser.Tags.TableRow tr = table.Rows[j];
                        corpCode       = tr.Columns[1].ToPlainTextString().Trim();
                        corpName       = tr.Columns[2].ToPlainTextString().Trim();
                        targetCode     = tr.Columns[4].ToPlainTextString().Trim();
                        targetDesc     = tr.Columns[5].ToPlainTextString().Trim();
                        targetClass    = tr.Columns[6].ToPlainTextString().Trim();
                        targetLevel    = tr.Columns[7].ToPlainTextString().Trim();
                        targetUnit     = tr.Columns[8].ToPlainTextString().Trim();
                        docNo          = tr.Columns[9].ToPlainTextString().Trim();
                        beginDateTime  = tr.Columns[10].ToPlainTextString().Trim();
                        actionDateTime = tr.Columns[11].ToPlainTextString().Trim();
                        actionType     = "良好行为";
                        CreditAction info = ToolDb.GenCreditAction(corpCode, corpName, targetCode, targetDesc, targetClass, targetLevel, targetUnit,
                                                                   docNo, beginDateTime, actionDateTime, actionType, "广东省", "深圳市", "深圳市住房和建设局", SiteUrl, prjName);
                        ToolDb.SaveEntity(info, this.ExistCompareFields);
                    }
                }
            }
        }