Exemple #1
0
        private void SaveAttach(BidProject info, string htmltxt, string result, bool isUpdate)
        {
            List <BaseAttach> list = new List <BaseAttach>();

            if (htmltxt.Contains("http"))
            {
                Parser   parser = new Parser(new Lexer(htmltxt));
                NodeList aNode  = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                if (aNode != null && aNode.Count > 0)
                {
                    for (int j = 0; j < aNode.Count; j++)
                    {
                        ATag   aTag       = aNode[j].GetATag();
                        string attachName = aTag.LinkText;
                        string aurl       = string.Empty;

                        aurl = aTag.Link.GetReplace("\\\"", "");
                        if (string.IsNullOrWhiteSpace(attachName))
                        {
                            attachName = info.PrjName;
                        }
                        try
                        {
                            string   url  = System.Web.HttpUtility.UrlDecode(aurl);
                            string[] urls = url.Split('&');

                            BaseAttach entity = null;
                            if (isUpdate)
                            {
                                entity = ToolHtml.GetBaseAttach(url, attachName, result, "SiteManage\\Files\\Attach\\");
                            }
                            else
                            {
                                entity = ToolHtml.GetBaseAttach(url, attachName, info.Id, "SiteManage\\Files\\Attach\\");
                            }
                            if (entity != null)
                            {
                                list.Add(entity);
                            }
                        }
                        catch { }
                    }
                }
            }

            if (list.Count > 0)
            {
                if (isUpdate)
                {
                    string delSql = string.Format("delete from BaseAttach where SourceID='{0}'", result);
                    ToolFile.Delete(result);
                    int count = ToolDb.ExecuteSql(delSql);
                }
                foreach (BaseAttach attach in list)
                {
                    ToolDb.SaveEntity(attach, "SourceID,AttachServerPath");
                }
            }
        }
Exemple #2
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list      = new ArrayList();
            string htl       = string.Empty;
            string cookiestr = string.Empty;
            int    sqlCount  = 0;
            string viewState = string.Empty;
            string html      = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + this.MaxCount);
            }
            catch { return(null); }
            int startIndex = html.IndexOf("{");
            int endIndex   = html.LastIndexOf("}");

            html = html.Substring(startIndex, (endIndex + 1) - startIndex);
            JavaScriptSerializer        serializer  = new JavaScriptSerializer();
            Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html);

            object[] objvalues = smsTypeJson["rows"] as object[];
            foreach (object objValue in objvalues)
            {
                Dictionary <string, object> dic = (Dictionary <string, object>)objValue;
                string bProv = string.Empty, bCity = string.Empty, bArea = string.Empty, bPrjno = string.Empty, bPrjname = string.Empty,
                       bExpertendtime = string.Empty, bBidresultendtime = string.Empty, bBaseprice = string.Empty, bBiddate = string.Empty,
                       bBuildunit = string.Empty, bBidmethod = string.Empty,
                       bRemark = string.Empty, bInfourl = string.Empty;
                bPrjno   = Convert.ToString(dic["bdBH"]);
                bPrjname = Convert.ToString(dic["bdName"]);
                string htmldtl = string.Empty;
                bInfourl = "https://www.szjsjy.com.cn:8001/jyw-ba/jyxx/queryOldOTDataDetail.do?type=6&id=" + bPrjno;
                try
                {
                    htmldtl = this.ToolWebSite.GetHtmlByUrl(bInfourl).GetJsString().GetReplace("\\t,\\r,\\n,\"");
                }
                catch { continue; }
                BidProject info = ToolDb.GenExpertProject("广东省", "深圳市", "宝安区", bPrjno, bPrjname, bExpertendtime, bBaseprice, bBiddate, bBuildunit, bBidmethod, bRemark, bInfourl);
                string     sql = string.Format("select Id from BidProject where 1=1 and PrjNo='{0}' and PrjName='{1}'", info.PrjNo, info.PrjName);
                string     result = Convert.ToString(ToolDb.ExecuteScalar(sql));
                if (!string.IsNullOrEmpty(result))
                {
                    SaveExpert(result, bInfourl, htmldtl, true);
                }
                else
                {
                    if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate))
                    {
                        SaveExpert(info.Id, bInfourl, htmldtl, false);
                    }
                }
            }
            return(list);
        }
Exemple #3
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookieStr       = string.Empty;
            int    sqlCount        = 0;
            int    pageInt         = 1;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "cn6")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().Replace("(", "kdxx").GetRegexBegEnd("kdxx", ",");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://www.gdzbtb.gov.cn/pbbgbd/pingbiaobaogao_" + (i - 1).ToString() + ".htm", Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "position2")), true), new TagNameFilter("li")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    for (int j = 0; j < nodeList.Count; j++)
                    {
                        string bProv = string.Empty, bCity = string.Empty, bArea = string.Empty, bPrjno = string.Empty,
                               bPrjname = string.Empty, bBidresultendtime = string.Empty,
                               bBaseprice = string.Empty, bBiddate = string.Empty, bBuildunit = string.Empty, bBidmethod = string.Empty, bRemark = string.Empty, bInfourl = string.Empty;

                        bPrjname = nodeList[j].GetATagValue("title");
                        if (bPrjname.Contains("广东省"))
                        {
                            bCity    = "广州市区";
                            bPrjname = bPrjname.Replace("[", "").Replace("]-", "").Replace("]", "").Replace("广东省", "");
                        }
                        else
                        {
                            string temp = bPrjname.Replace("[", "kdxx").Replace("]", "xxdk").GetRegexBegEnd("kdxx", "xxdk");
                            bPrjname = bPrjname.Replace("[", "").Replace("]-", "").Replace("]", "").Replace(temp, "");
                            bCity    = temp + "区";
                        }
                        bInfourl = "http://www.gdzbtb.gov.cn/pbbgbd/" + nodeList[j].GetATagHref().Replace("../", "").Replace("./", "");
                        string htldtl = string.Empty;
                        try
                        {
                            htldtl = this.ToolWebSite.GetHtmlByUrl(bInfourl, Encoding.Default);
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("cellSpacing", "1")));

                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            string htmlTxt = dtlNode.AsHtml();
                            bBiddate = htmlTxt.GetDateRegex();
                            if (string.IsNullOrEmpty(bBiddate))
                            {
                                bBiddate = DateTime.Now.ToString("yyyy-MM-dd");
                            }

                            string attachUrl = string.Empty;
                            int    len1 = 0, len2 = 0;
                            len1 = htldtl.IndexOf("$(\"#pbbg_shongti\")");
                            len2 = htldtl.IndexOf("</a>");
                            string aurl       = string.Empty;
                            string attachName = string.Empty;
                            if (len1 > 0 && len2 > 0)
                            {
                                aurl   = htldtl.Substring(len1, len2 - len1) + "</a>";
                                parser = new Parser(new Lexer(aurl));
                                NodeList atagNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (atagNode != null && atagNode.Count > 0)
                                {
                                    ATag aTag = atagNode.GetATag();
                                    attachUrl  = aTag.Link;
                                    attachName = aTag.LinkText;
                                }
                            }

                            if (string.IsNullOrEmpty(attachName))
                            {
                                attachName = bPrjname;
                            }
                            BidProject info = ToolDb.GenResultProject("广东省", bCity, "", bPrjno, bPrjname, bBidresultendtime, bBaseprice, bBiddate, bBuildunit, bBidmethod, bRemark, bInfourl);
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(list);
                            }
                            sqlCount++;
                            string sql    = string.Format("select Id from BidProject where 1=1 and InfoUrl='{0}'", info.InfoUrl);
                            string result = Convert.ToString(ToolDb.ExecuteScalar(sql));
                            if (!string.IsNullOrEmpty(result))
                            {
                                if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate))
                                {
                                    if (!string.IsNullOrEmpty(attachUrl))
                                    {
                                        string fileUrl = string.Empty;
                                        try
                                        {
                                            fileUrl = DateTime.Parse(bBiddate).ToString("yyyyMM");
                                        }
                                        catch { fileUrl = DateTime.Now.ToString("yyyyMM"); }
                                        string     alink  = "http://www.gdzbtb.gov.cn/pbbgbd/" + fileUrl + "/" + attachUrl.Replace("../", "").Replace("./", "");
                                        BaseAttach attach = null;
                                        try
                                        {
                                            attach = ToolHtml.GetBaseAttach(alink, attachName, info.Id, "SiteManage\\Files\\Attach\\");
                                            if (attach == null)
                                            {
                                                attach = ToolHtml.GetBaseAttachByUrl(alink, attachName, info.Id, "SiteManage\\Files\\Attach\\");
                                            }
                                        }
                                        catch { }
                                        if (attach != null)
                                        {
                                            string sqlDelete = string.Format("delete from BaseAttach where SourceId='{0}'", result);
                                            ToolDb.ExecuteSql(sqlDelete);
                                            ToolDb.SaveEntity(attach, "SourceID,AttachServerPath");
                                        }
                                    }
                                }
                            }
                            else
                            {
                                if (ToolDb.SaveEntity(info, this.ExistCompareFields))
                                {
                                    if (!string.IsNullOrEmpty(attachUrl))
                                    {
                                        string fileUrl = string.Empty;
                                        try
                                        {
                                            fileUrl = DateTime.Parse(bBiddate).ToString("yyyyMM");
                                        }
                                        catch { fileUrl = DateTime.Now.ToString("yyyyMM"); }
                                        string     alink  = "http://www.gdzbtb.gov.cn/pbbgbd/" + fileUrl + "/" + attachUrl.Replace("../", "").Replace("./", "");
                                        BaseAttach attach = null;
                                        try
                                        {
                                            attach = ToolHtml.GetBaseAttach(alink, attachName, info.Id, "SiteManage\\Files\\Attach\\");
                                            if (attach == null)
                                            {
                                                attach = ToolHtml.GetBaseAttachByUrl(alink, attachName, info.Id, "SiteManage\\Files\\Attach\\");
                                            }
                                        }
                                        catch { }
                                        if (attach != null)
                                        {
                                            ToolDb.SaveEntity(attach, "SourceID,AttachServerPath");
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(list);
        }
Exemple #4
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string htl             = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                htl = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch { return(null); }
            Parser   parser = new Parser(new Lexer(htl));
            NodeList dList  = parser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("cellSpacing", "2"), new TagNameFilter("table")));

            if (dList != null && dList.Count > 0)
            {
                string pageString = dList.AsString();
                Regex  regexPage  = new Regex(@"共[^页]+页,");
                Match  pageMatch  = regexPage.Match(pageString);
                try
                {
                    pageInt = int.Parse(pageMatch.Value.Replace("共", "").Replace("页,", "").Replace(" ", ""));
                }
                catch { }
            }
            for (int j = 1; j <= pageInt; j++)
            {
                if (j > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(htl);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(htl);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__EVENTTARGET",
                        "__EVENTARGUMENT",
                        "__VIEWSTATE",
                        "__VIEWSTATEENCRYPTED",
                        "__EVENTVALIDATION",
                        "ctl00$Header$drpSearchType",
                        "ctl00$Header$txtGcxm",
                        "ctl00$Content$hdnId",
                        "ctl00$Content$hdnOperate",
                        "ctl00$hdnPageCount"
                    }, new string[] {
                        "ctl00$Content$GridView1",
                        "Page$" + j.ToString(),
                        viewState,
                        "",
                        eventValidation,
                        "0",
                        string.Empty,
                        string.Empty,
                        string.Empty,
                        pageInt.ToString()
                    });
                    try
                    {
                        htl = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8);
                    }
                    catch { return(list); }
                }
                parser = new Parser(new Lexer(htl));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "ctl00_Content_GridView1"), new TagNameFilter("table")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    if (pageInt > 1)
                    {
                        for (int i = 1; i < table.RowCount - 1; i++)
                        {
                            string bProv = string.Empty, bCity = string.Empty, bArea = string.Empty, bPrjno = string.Empty, bPrjname = string.Empty,
                                   bExpertendtime = string.Empty, bBidresultendtime = string.Empty, bBaseprice = string.Empty, bBiddate = string.Empty,
                                   bBuildunit = string.Empty, bBidmethod = string.Empty,
                                   bRemark = string.Empty, bInfourl = string.Empty;
                            TableRow tr = table.Rows[i];
                            bPrjno         = tr.Columns[1].ToPlainTextString().Trim();
                            bPrjname       = tr.Columns[2].ToPlainTextString().Trim();
                            bExpertendtime = tr.Columns[3].ToPlainTextString().Trim();
                            bInfourl       = SiteUrl;
                            ATag   aTag  = table.SearchFor(typeof(ATag), true)[i - 1] as ATag;
                            string aLink = aTag.Link.Replace("viewPwmd('", "").Replace("')", "");
                            try
                            {
                                string[] link = aLink.Split(',');
                                if (link.Length > 1)
                                {
                                    string        a1    = link[0].Replace(" ", "").Replace("'", "").Replace("'", "");
                                    string        a2    = link[1].Replace(" ", "").Replace("'", "").Replace("'", "");
                                    byte[]        byStr = System.Text.Encoding.UTF8.GetBytes(a2); //默认是System.Text.Encoding.Default.GetBytes(str)
                                    StringBuilder sb    = new StringBuilder();
                                    for (int d = 0; d < byStr.Length; d++)
                                    {
                                        sb.Append(@"%" + Convert.ToString(byStr[d], 16));
                                    }
                                    aLink = a1 + "&gcmc=" + sb.ToString();
                                }
                            }
                            catch { }
                            BidProject info = ToolDb.GenExpertProject("广东省", "深圳市", "", bPrjno, bPrjname, bExpertendtime, bBaseprice, bBiddate, bBuildunit, bBidmethod, bRemark, bInfourl);
                            string     sql  = string.Format("select * from BidProject where 1=1 and PrjNo='{0}' and PrjName='{1}'", info.PrjNo, info.PrjName);
                            DataTable  dt   = ToolDb.GetDbData(sql);
                            if (dt != null && dt.Rows.Count > 0)
                            {
                                sql = string.Format("select * from BidProject where 1=1 and PrjNo='{0}' and PrjName='{1}' and ExpertEndTime is null", info.PrjNo, info.PrjName);
                                DataTable dtinfo = ToolDb.GetDbData(sql);
                                if (dtinfo != null && dtinfo.Rows.Count > 0)
                                {
                                    string id     = Convert.ToString(dtinfo.Rows[0]["Id"]);
                                    string strSql = string.Format("update BidProject set ExpertEndTime='{0}' where Id='{1}'", info.ExpertEndTime, id);
                                    int    result = ToolDb.ExecuteSql(strSql);
                                    if (result > 0)
                                    {
                                        string url        = "http://www.szjsjy.com.cn/BusinessInfo/PWMDGSViewForm.aspx?GCBH=" + aLink;
                                        string htmldetail = string.Empty;
                                        try
                                        {
                                            htmldetail = this.ToolWebSite.GetHtmlByUrl(url, Encoding.UTF8);
                                        }
                                        catch { }
                                        if (!string.IsNullOrEmpty(htmldetail))
                                        {
                                            AddExpert(htmldetail, url, id);
                                        }
                                    }
                                }
                            }
                            else
                            {
                                ToolDb.SaveEntity(info, "");
                                string url        = "http://www.szjsjy.com.cn/BusinessInfo/PWMDGSViewForm.aspx?GCBH=" + aLink;
                                string htmldetail = string.Empty;
                                try
                                {
                                    htmldetail = this.ToolWebSite.GetHtmlByUrl(url, Encoding.UTF8);
                                }
                                catch { }
                                if (!string.IsNullOrEmpty(htmldetail))
                                {
                                    AddExpert(htmldetail, url, info.Id);
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                    else
                    {
                        for (int i = 1; i < table.RowCount; i++)
                        {
                            string bProv = string.Empty, bCity = string.Empty, bArea = string.Empty, bPrjno = string.Empty, bPrjname = string.Empty, bExpertendtime = string.Empty, bBidresultendtime = string.Empty, bBaseprice = string.Empty, bBiddate = string.Empty, bBuildunit = string.Empty, bBidmethod = string.Empty,
                                   bRemark = string.Empty, bInfourl = string.Empty;
                            TableRow tr = table.Rows[i];
                            bPrjno         = tr.Columns[1].ToPlainTextString().Trim();
                            bPrjname       = tr.Columns[2].ToPlainTextString().Trim();
                            bExpertendtime = tr.Columns[3].ToPlainTextString().Trim();
                            bInfourl       = SiteUrl;
                            ATag   aTag  = table.SearchFor(typeof(ATag), true)[i - 1] as ATag;
                            string aLink = aTag.Link.Replace("viewPwmd('", "").Replace("')", "");
                            try
                            {
                                string[] link = aLink.Split(',');
                                if (link.Length > 1)
                                {
                                    string        a1    = link[0].Replace(" ", "").Replace("'", "").Replace("'", "");
                                    string        a2    = link[1].Replace(" ", "").Replace("'", "").Replace("'", "");
                                    byte[]        byStr = System.Text.Encoding.UTF8.GetBytes(a2); //默认是System.Text.Encoding.Default.GetBytes(str)
                                    StringBuilder sb    = new StringBuilder();
                                    for (int d = 0; d < byStr.Length; d++)
                                    {
                                        sb.Append(@"%" + Convert.ToString(byStr[d], 16));
                                    }
                                    aLink = a1 + "&gcmc=" + sb.ToString();
                                }
                            }
                            catch { }
                            BidProject info = ToolDb.GenExpertProject("广东省", "深圳市", "", bPrjno, bPrjname, bExpertendtime, bBaseprice, bBiddate, bBuildunit, bBidmethod, bRemark, bInfourl);
                            string     sql  = string.Format("select * from BidProject where 1=1 and PrjNo='{0}' and PrjName='{1}'", info.PrjNo, info.PrjName);
                            DataTable  dt   = ToolDb.GetDbData(sql);
                            if (dt != null && dt.Rows.Count > 0)
                            {
                                sql = string.Format("select * from BidProject where 1=1 and PrjNo='{0}' and PrjName='{1}' and ExpertEndTime is null", info.PrjNo, info.PrjName);
                                DataTable dtinfo = ToolDb.GetDbData(sql);
                                if (dtinfo != null && dtinfo.Rows.Count > 0)
                                {
                                    string id     = Convert.ToString(dtinfo.Rows[0]["Id"]);
                                    string strSql = string.Format("update BidProject set ExpertEndTime='{0}' where Id='{1}'", info.ExpertEndTime, id);
                                    int    result = ToolDb.ExecuteSql(strSql);
                                    if (result > 0)
                                    {
                                        string url        = "http://www.szjsjy.com.cn/BusinessInfo/PWMDGSViewForm.aspx?GCBH=" + aLink;
                                        string htmldetail = string.Empty;
                                        try
                                        {
                                            htmldetail = this.ToolWebSite.GetHtmlByUrl(url, Encoding.UTF8);
                                        }
                                        catch { }
                                        if (!string.IsNullOrEmpty(htmldetail))
                                        {
                                            AddExpert(htmldetail, url, id);
                                        }
                                    }
                                }
                            }
                            else
                            {
                                ToolDb.SaveEntity(info, this.ExistCompareFields);
                                string url        = "http://www.szjsjy.com.cn/BusinessInfo/PWMDGSViewForm.aspx?GCBH=" + aLink;
                                string htmldetail = string.Empty;
                                try
                                {
                                    htmldetail = this.ToolWebSite.GetHtmlByUrl(url, Encoding.UTF8);
                                }
                                catch { }
                                if (!string.IsNullOrEmpty(htmldetail))
                                {
                                    AddExpert(htmldetail, url, info.Id);
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Exemple #5
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string htl             = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                htl = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch { return(null); }
            Parser   parser   = new Parser(new Lexer(htl));
            NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("cellspacing", "2"), new TagNameFilter("table")));

            if (pageList != null && pageList.Count > 0)
            {
                string pageString = pageList.AsString();
                Regex  regexPage  = new Regex(@"共[^页]+页,");
                Match  pageMatch  = regexPage.Match(pageString);
                try
                {
                    pageInt = int.Parse(pageMatch.Value.Replace("共", "").Replace("页,", "").Replace(" ", ""));
                }
                catch { pageInt = 1; }
            }
            for (int j = 1; j <= pageInt; j++)
            {
                if (j > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(htl);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(htl);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__EVENTTARGET",
                        "__EVENTARGUMENT",
                        "__VIEWSTATE",
                        "__VIEWSTATEENCRYPTED",
                        "__EVENTVALIDATION",
                        "ctl00$Header$drpSearchType",
                        "ctl00$Header$txtQymc",
                        "ctl00$Content$hdnOperate",
                        "ctl00$hdnPageCount"
                    }, new string[] {
                        "ctl00$Content$GridView1",
                        "Page$" + j.ToString(),
                        viewState,
                        "",
                        eventValidation,
                        "0",
                        string.Empty,
                        string.Empty,
                        pageInt.ToString()
                    });
                    try
                    {
                        htl = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8);
                    }
                    catch { }
                }
                parser = new Parser(new Lexer(htl));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "ctl00_Content_GridView1"), new TagNameFilter("table")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    if (pageInt > 1)
                    {
                        for (int i = 1; i < table.RowCount - 1; i++)
                        {
                            string bProv = string.Empty, bCity = string.Empty, bArea = string.Empty, bPrjno = string.Empty,
                                   bPrjname = string.Empty, bBidresultendtime = string.Empty,
                                   bBaseprice = string.Empty, bBiddate = string.Empty, bBuildunit = string.Empty, bBidmethod = string.Empty,
                                   bRemark = string.Empty, bInfourl = string.Empty;
                            TableRow tr = table.Rows[i] as TableRow;
                            bPrjno            = tr.Columns[1].ToPlainTextString();
                            bPrjname          = tr.Columns[2].ToPlainTextString();
                            bBidresultendtime = tr.Columns[3].ToPlainTextString();
                            ATag   aTag  = table.SearchFor(typeof(ATag), true)[i - 1] as ATag;
                            string aLink = "http://www.szjsjy.com.cn/BusinessInfo/" + aTag.Link;
                            bInfourl = aLink;
                            BidProject info = ToolDb.GenResultProject("广东省", "深圳市", "", bPrjno, bPrjname, bBidresultendtime, bBaseprice, bBiddate, bBuildunit, bBidmethod, bRemark, bInfourl);

                            string sql = string.Format("select Id from BidProject where 1=1 and PrjNo='{0}' and PrjName='{1}'", info.PrjNo, info.PrjName);
                            object obj = ToolDb.ExecuteScalar(sql);
                            //判断是否存在该条记录
                            if (obj != null && !string.IsNullOrEmpty(obj.ToString()))
                            {
                                sql = string.Format("select Id from BidProject where 1=1 and PrjNo='{0}' and PrjName='{1}' and BidResultEndTime is null", info.PrjNo, info.PrjName);
                                object dtinfo = ToolDb.ExecuteScalar(sql);
                                if (dtinfo != null && !string.IsNullOrEmpty(dtinfo.ToString()))
                                {
                                    string id     = dtinfo.ToString();
                                    string strSql = string.Format("update BidProject set BidResultEndTime='{0}' where Id='{1}'", info.BidResultEndTime, id);
                                    int    result = ToolDb.ExecuteSql(strSql);
                                    if (result > 0)
                                    {
                                        SaveAttach(aLink, id);
                                    }
                                }
                                else
                                {
                                    SaveAttach(aLink, obj.ToString());
                                }
                            }
                            else
                            {
                                ToolDb.SaveEntity(info, "");
                                SaveAttach(aLink, info.Id);
                            }
                        }
                    }
                    else
                    {
                        for (int i = 1; i < table.RowCount; i++)
                        {
                            string bProv = string.Empty, bCity = string.Empty, bArea = string.Empty, bPrjno = string.Empty,
                                   bPrjname = string.Empty, bBidresultendtime = string.Empty,
                                   bBaseprice = string.Empty, bBiddate = string.Empty, bBuildunit = string.Empty, bBidmethod = string.Empty,
                                   bRemark = string.Empty, bInfourl = string.Empty;
                            TableRow tr = table.Rows[i] as TableRow;
                            bPrjno            = tr.Columns[1].ToPlainTextString();
                            bPrjname          = tr.Columns[2].ToPlainTextString();
                            bBidresultendtime = tr.Columns[3].ToPlainTextString();
                            bInfourl          = SiteUrl;
                            ATag       aTag  = table.SearchFor(typeof(ATag), true)[i - 1] as ATag;
                            string     aLink = "http://www.szjsjy.com.cn/BusinessInfo/" + aTag.Link;
                            BidProject info  = ToolDb.GenResultProject("广东省", "深圳市", "", bPrjno, bPrjname, bBidresultendtime, bBaseprice, bBiddate, bBuildunit, bBidmethod, bRemark, bInfourl);

                            string sql = string.Format("select Id from BidProject where 1=1 and PrjNo='{0}' and PrjName='{1}'", info.PrjNo, info.PrjName);
                            object obj = ToolDb.ExecuteScalar(sql);
                            if (obj != null && !string.IsNullOrEmpty(obj.ToString()))
                            {
                                sql = string.Format("select Id from BidProject where 1=1 and PrjNo='{0}' and PrjName='{1}' and BidResultEndTime is null", info.PrjNo, info.PrjName);
                                object dtinfo = ToolDb.ExecuteScalar(sql);
                                if (dtinfo != null && !string.IsNullOrEmpty(dtinfo.ToString()))
                                {
                                    string id     = dtinfo.ToString();
                                    string strSql = string.Format("update BidProject set BidResultEndTime='{0}' where Id='{1}'", info.BidResultEndTime, id);
                                    int    result = ToolDb.ExecuteSql(strSql);
                                    if (result > 0)
                                    {
                                        SaveAttach(aLink, id);
                                    }
                                }
                                else
                                {
                                    SaveAttach(aLink, obj.ToString());
                                }
                            }
                            else
                            {
                                ToolDb.SaveEntity(info, "");
                                SaveAttach(aLink, info.Id);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Exemple #6
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    sqlCount        = 0;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch { return(null); }
            Parser parser = new Parser(new Lexer(html));

            NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("cellspacing", "2"), new TagNameFilter("table")));

            if (pageList != null && pageList.Count > 0)
            {
                string pageString = pageList.AsString();
                Regex  regexPage  = new Regex(@"共[^页]+页,");
                Match  pageMatch  = regexPage.Match(pageString);
                try
                {
                    pageInt = int.Parse(pageMatch.Value.Replace("共", "").Replace("页,", "").Replace(" ", ""));
                }
                catch { pageInt = 1; }
            }

            for (int j = 1; j <= pageInt; j++)
            {
                if (j > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__EVENTTARGET",
                        "__EVENTARGUMENT",
                        "__VIEWSTATE",
                        "__VIEWSTATEENCRYPTED",
                        "__EVENTVALIDATION",
                        "ctl00$Header$drpSearchType",
                        "ctl00$Header$txtGcxm",
                        "ctl00$Content$hdnOperate",
                        "ctl00$hdnPageCount"
                    }, new string[] {
                        "ctl00$Content$GridView1",
                        "Page$" + j.ToString(),
                        viewState,
                        "",
                        eventValidation,
                        "0",
                        string.Empty,
                        string.Empty,
                        pageInt.ToString()
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8);
                    }
                    catch { }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "ctl00_Content_GridView1")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    int      rows  = table.RowCount;
                    if (pageInt > 1)
                    {
                        rows = rows - 1;
                    }
                    for (int i = 1; i < rows; i++)
                    {
                        string bProv = string.Empty, bCity = string.Empty, bArea = string.Empty, bPrjno = string.Empty, bPrjname = string.Empty, bExpertendtime = string.Empty, bBidresultendtime = string.Empty, bBaseprice = string.Empty, bBiddate = string.Empty, bBuildunit = string.Empty, bBidmethod = string.Empty, bRemark = string.Empty, bInfourl = string.Empty;

                        TableRow tr = table.Rows[i] as TableRow;
                        bPrjno         = tr.Columns[1].ToPlainTextString();
                        bPrjname       = tr.Columns[2].ToPlainTextString();
                        bExpertendtime = tr.Columns[3].ToPlainTextString();
                        string aLink = tr.Columns[4].GetATagHref().GetRegexBegEnd("'", ",").Replace("'", "").Replace(",", "");
                        string bLink = tr.Columns[4].GetATagHref().GetRegexBegEnd(",'", "'").Replace("'", "").Replace(",", "");

                        bInfourl = "http://www.szjsjy.com.cn/BusinessInfo/PBYQZJGSViewForm.aspx?GCBH=" + aLink + "&GCMC=" + this.ToolWebSite.UrlEncode(bLink);
                        string htmlDtl = string.Empty;
                        try
                        {
                            htmlDtl = this.ToolWebSite.GetHtmlByUrl(bInfourl, Encoding.UTF8);
                        }
                        catch { }
                        BidProject info = ToolDb.GenExpertProject("广东省", "深圳市", "", bPrjno, bPrjname, bExpertendtime, bBaseprice, bBiddate, bBuildunit, bBidmethod, bRemark, bInfourl);
                        sqlCount++;
                        if (sqlCount > this.MaxCount)
                        {
                            return(null);
                        }
                        if (ToolDb.SaveEntity(info, ExistCompareFields, true, this.ExistsHtlCtx, null))
                        {
                            AddExpert(htmlDtl, bInfourl, info.Id);
                        }
                    }
                }
            }
            return(list);
        }
Exemple #7
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string htl             = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                htl = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8, ref cookiestr);
            }
            catch { return(null); }
            Parser   parser   = new Parser(new Lexer(htl));
            NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "ctl00_cph_context_ZsjyjgsList2_GridViewPaging1_PagingDescTd")));

            if (nodeList != null && nodeList.Count > 0)
            {
                try
                {
                    string   pagestr = nodeList[0].ToPlainTextString().Trim();
                    string[] page    = pagestr.Split(',');
                    pageInt = int.Parse(page[page.Length - 1].Replace("共", "").Replace("页", ""));
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(htl);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(htl);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "ctl00$ScriptManager1",
                                                                                                     "__EVENTTARGET", "__EVENTARGUMENT", "__VIEWSTATE",
                                                                                                     "ctl00$cph_context$ZsjyjgsList2$ddlSearch", "ctl00$cph_context$ZsjyjgsList2$txtTitle",
                                                                                                     "ctl00$cph_context$ZsjyjgsList2$txtStartTime", "ctl00$cph_context$ZsjyjgsList2$txtEndTime",
                                                                                                     "ctl00$cph_context$ZsjyjgsList2$GridViewPaging1$txtGridViewPagingForwardTo",
                                                                                                     "__VIEWSTATEENCRYPTED", "__EVENTVALIDATION", "ctl00$cph_context$ZsjyjgsList2$GridViewPaging1$btnForwardToPage" },
                                                                                      new string[] { "ctl00$cph_context$ZsjyjgsList2$UpdatePanel2|ctl00$cph_context$ZsjyjgsList2$GridViewPaging1$btnForwardToPage", "", "", viewState, "xxbt", "", "", "", i.ToString(), "", eventValidation, "GO" });
                    try
                    {
                        htl = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8, ref cookiestr);
                    }
                    catch { }
                }
                parser = new Parser(new Lexer(htl));
                NodeList dtList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "ctl00_cph_context_ZsjyjgsList2_GridView1")));
                if (dtList != null && dtList.Count > 0)
                {
                    TableTag table = dtList[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string bProv = string.Empty, bCity = string.Empty, bArea = string.Empty, bPrjno = string.Empty,
                               bPrjname = string.Empty, bBidresultendtime = string.Empty,
                               bBaseprice = string.Empty, bBiddate = string.Empty, bBuildunit = string.Empty, bBidmethod = string.Empty,
                               bRemark = string.Empty, bInfourl = string.Empty;
                        TableRow tr = table.Rows[j];
                        bPrjname   = tr.Columns[2].ToPlainTextString().Trim();
                        bBuildunit = tr.Columns[3].ToPlainTextString().Trim();
                        bBiddate   = tr.Columns[4].ToPlainTextString().Trim();
                        ATag aTag = tr.Columns[2].SearchFor(typeof(ATag), true)[0] as ATag;
                        bInfourl = "http://jyzx.cb.gov.cn/LGjyzxWeb/SiteManage/" + aTag.Link;
                        BidProject info   = ToolDb.GenResultProject("广东省", "深圳市", "龙岗区", bPrjno, bPrjname, bBidresultendtime, bBaseprice, bBiddate, bBuildunit, bBidmethod, bRemark, bInfourl);
                        string     sql    = string.Format("select Id from BidProject where 1=1 and PrjNo='{0}' and PrjName='{1}'", info.PrjNo, info.PrjName);
                        string     result = Convert.ToString(ToolDb.ExecuteScalar(sql));
                        if (!string.IsNullOrEmpty(result))
                        {
                            if (ToolDb.SaveEntity(info, this.ExistCompareFields))
                            {
                                string htltxt = string.Empty;
                                try
                                {
                                    htltxt = this.ToolWebSite.GetHtmlByUrl(bInfourl, Encoding.UTF8);
                                }
                                catch { }
                                Parser   par      = new Parser(new Lexer(htltxt));
                                NodeList fileList = par.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "ctl00_cph_context_AccessoriesControl1_GridView1")));
                                if (fileList != null && fileList.Count > 0)
                                {
                                    string sqlDelete = string.Format("delete from BaseAttach where SourceId='{0}'", result);
                                    ToolDb.ExecuteSql(sqlDelete);
                                    TableTag tab = fileList[0] as TableTag;
                                    for (int k = 1; k < tab.RowCount; k++)
                                    {
                                        TableRow   dr        = tab.Rows[k];
                                        ATag       aLink     = dr.Columns[1].SearchFor(typeof(ATag), true)[0] as ATag;
                                        string     data      = DateTime.Now.Year.ToString() + DateTime.Now.Month.ToString() + "\\";
                                        string     annexName = ToolDb.NewGuid;
                                        FilesClass file      = new FilesClass();
                                        file.strUrl = "http://jyzx.cb.gov.cn/LGjyzxWeb/" + aLink.Link.Replace("../", "");
                                        int    index    = aLink.LinkText.IndexOf(".");
                                        string fileName = annexName + aLink.LinkText.Substring(index, aLink.LinkText.Length - index);
                                        file.strFileName = fileName;
                                        file.strFile     = data;
                                        long size = file.DownLoadFile();
                                        if (size > 1024)
                                        {
                                            BaseAttach baseInfo = ToolDb.GenBaseAttach(annexName, aLink.LinkText, info.Id, data + fileName, size.ToString(), "");
                                            ToolDb.SaveEntity(baseInfo, "");
                                        }
                                    }
                                }
                            }
                        }
                        else
                        {
                            //开始下载附件
                            if (ToolDb.SaveEntity(info, this.ExistCompareFields))
                            {
                                string htltxt = string.Empty;
                                try
                                {
                                    htltxt = this.ToolWebSite.GetHtmlByUrl(bInfourl, Encoding.UTF8);
                                }
                                catch { }
                                Parser   par      = new Parser(new Lexer(htltxt));
                                NodeList fileList = par.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "ctl00_cph_context_AccessoriesControl1_GridView1")));
                                if (fileList != null && fileList.Count > 0)
                                {
                                    TableTag tab = fileList[0] as TableTag;
                                    for (int k = 1; k < tab.RowCount; k++)
                                    {
                                        TableRow   dr        = tab.Rows[k];
                                        ATag       aLink     = dr.Columns[1].SearchFor(typeof(ATag), true)[0] as ATag;
                                        string     data      = DateTime.Now.Year.ToString() + DateTime.Now.Month.ToString() + "\\";
                                        string     annexName = ToolDb.NewGuid;
                                        FilesClass file      = new FilesClass();
                                        file.strUrl = "http://jyzx.cb.gov.cn/LGjyzxWeb/" + aLink.Link.Replace("../", "");
                                        int    index    = aLink.LinkText.IndexOf(".");
                                        string fileName = annexName + aLink.LinkText.Substring(index, aLink.LinkText.Length - index);
                                        file.strFileName = fileName;
                                        file.strFile     = data;
                                        long size = file.DownLoadFile();
                                        if (size > 1024)
                                        {
                                            BaseAttach baseInfo = ToolDb.GenBaseAttach(annexName, aLink.LinkText, info.Id, data + fileName, size.ToString(), "");
                                            ToolDb.SaveEntity(baseInfo, "");
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(list);
        }
Exemple #8
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list = new List <BidProject>();
            string html = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + (MaxCount + 20));
            }
            catch { return(null); }
            int startIndex = html.IndexOf("{");
            int endIndex   = html.LastIndexOf("}");

            html = html.Substring(startIndex, (endIndex + 1) - startIndex);
            JavaScriptSerializer        serializer  = new JavaScriptSerializer();
            Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html);

            foreach (KeyValuePair <string, object> obj in smsTypeJson)
            {
                if (obj.Key == "total")
                {
                    continue;
                }
                object[] array = (object[])obj.Value;
                foreach (object arrValue in array)
                {
                    string bProv = string.Empty, bCity = string.Empty, bArea = string.Empty, bPrjno = string.Empty, bPrjname = string.Empty, bExpertendtime = string.Empty, bBidresultendtime = string.Empty, bBaseprice = string.Empty, bBiddate = string.Empty, bBuildunit = string.Empty, bBidmethod = string.Empty, bRemark = string.Empty, bInfourl = string.Empty;

                    Dictionary <string, object> dic = (Dictionary <string, object>)arrValue;
                    bPrjno   = Convert.ToString(dic["bdBH"]);
                    bPrjname = Convert.ToString(dic["bdName"]);
                    bInfourl = "https://www.szjsjy.com.cn:8001/jyw/queryOldDataDetail.do?type=6&id=" + bPrjno;
                    bool   IsJson  = false;
                    string htmldtl = string.Empty;
                    try
                    {
                        htmldtl = this.ToolWebSite.GetHtmlByUrl(bInfourl).GetJsString().GetReplace("\\t,\\r,\\n,\"");
                    }
                    catch { }
                    if (string.IsNullOrEmpty(htmldtl))
                    {
                        bInfourl = "https://www.szjsjy.com.cn:8001/jyw/queryPWInfoByGuid.do?guid=" + dic["pwBdGuid"];
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(bInfourl).GetJsString().GetReplace("\\t,\\r,\\n,\"");
                        }
                        catch { continue; }
                        IsJson = true;
                    }
                    BidProject info   = ToolDb.GenExpertProject("广东省", "深圳市", "", bPrjno, bPrjname, bExpertendtime, bBaseprice, bBiddate, bBuildunit, bBidmethod, bRemark, bInfourl);
                    string     sql    = string.Format("select Id from BidProject where 1=1 and PrjNo='{0}' and PrjName='{1}'", info.PrjNo, info.PrjName);
                    string     result = Convert.ToString(ToolDb.ExecuteScalar(sql));
                    if (!string.IsNullOrEmpty(result))
                    {
                        if (!IsJson)
                        {
                            SaveExpert(result, bInfourl, htmldtl, true);
                        }
                        else
                        {
                            SaveExpertJson(htmldtl, result, bInfourl, true);
                        }
                    }
                    else
                    {
                        if (ToolDb.SaveEntity(info, this.ExistCompareFields))
                        {
                            if (!IsJson)
                            {
                                SaveExpert(info.Id, bInfourl, htmldtl, false);
                            }
                            else
                            {
                                SaveExpertJson(htmldtl, info.Id, bInfourl, false);
                            }
                        }
                    }
                }
            }
            return(list);
        }
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list = new List <BidProject>();
            string html = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + (MaxCount + 20));
            }
            catch { return(null); }
            int startIndex = html.IndexOf("{");
            int endIndex   = html.LastIndexOf("}");

            html = html.Substring(startIndex, (endIndex + 1) - startIndex);
            JavaScriptSerializer        serializer  = new JavaScriptSerializer();
            Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html);

            foreach (KeyValuePair <string, object> obj in smsTypeJson)
            {
                if (obj.Key == "total")
                {
                    continue;
                }
                object[] array = (object[])obj.Value;
                foreach (object arrValue in array)
                {
                    string bProv = string.Empty, bCity = string.Empty, bArea = string.Empty, bPrjno = string.Empty,
                                bPrjname = string.Empty, bBidresultendtime = string.Empty,
                                bBaseprice = string.Empty, bBiddate = string.Empty, bBuildunit = string.Empty, bBidmethod = string.Empty, bRemark = string.Empty, bInfourl = string.Empty;

                    Dictionary <string, object> dic = (Dictionary <string, object>)arrValue;
                    bPrjno   = Convert.ToString(dic["bdBH"]);
                    bPrjname = Convert.ToString(dic["bdName"]);
                    string saveUrl = Convert.ToString(dic["detailUrl"]);
                    bInfourl = "https://www.szjsjy.com.cn:8001/jyw/queryOldDataDetail.do?type=7&id=" + bPrjno;
                    string htmldtl = string.Empty;
                    try
                    {
                        NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "guid", "ggbdguid" }, new string[] {
                            dic["guid"].ToString(), dic["tpfaGgBdGuid"].ToString()
                        });
                        htmldtl = this.ToolWebSite.GetHtmlByUrl("https://www.szjsjy.com.cn:8001/jyw/queryPBById.do", nvc);

                        JavaScriptSerializer        attachSerializer = new JavaScriptSerializer();
                        Dictionary <string, object> attachJson       = (Dictionary <string, object>)attachSerializer.DeserializeObject(htmldtl);
                        Dictionary <string, object> kbJiLu           = attachJson["vo"] as Dictionary <string, object>;
                        string attachId = Convert.ToString(kbJiLu["attachFileGuid"]);
                        htmldtl = this.ToolWebSite.GetHtmlByUrl("https://www.szjsjy.com.cn:8001/jyw/filegroup/queryByGroupGuidZS.do?groupGuid=" + attachId).GetJsString().GetReplace("\\t,\\r,\\n,\"");
                    }
                    catch
                    {
                        try
                        {
                            string url = "https://www.szjsjy.com.cn:8001/jyw/queryOldDataDetail.do?id=" + bPrjno + "&type=7";
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(url);
                        }
                        catch { }
                    }
                    BidProject info   = ToolDb.GenResultProject("广东省", "深圳市", "", bPrjno, bPrjname, bBidresultendtime, bBaseprice, bBiddate, bBuildunit, bBidmethod, bRemark, saveUrl);
                    string     sql    = string.Format("select Id from BidProject where 1=1 and PrjNo='{0}' and PrjName='{1}'", info.PrjNo, info.PrjName);
                    string     result = Convert.ToString(ToolDb.ExecuteScalar(sql));
                    if (!string.IsNullOrEmpty(result))
                    {
                        SaveAttach(info, htmldtl, result, true);
                    }
                    else
                    {
                        try
                        {
                            if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate))
                            {
                                SaveAttach(info, htmldtl, result, false);
                            }
                        }
                        catch (Exception ex)
                        {
                        }
                    }
                }
            }
            return(list);
        }
Exemple #10
0
        private void SaveAttach(BidProject info, string htmltxt, string result, bool isUpdate)
        {
            List <BaseAttach> list = new List <BaseAttach>();

            if (htmltxt.Contains("http"))
            {
                Parser   parser = new Parser(new Lexer(htmltxt));
                NodeList aNode  = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                if (aNode != null && aNode.Count > 0)
                {
                    for (int j = 0; j < aNode.Count; j++)
                    {
                        ATag   aTag       = aNode[j].GetATag();
                        string attachName = aTag.LinkText;
                        string aurl       = string.Empty;
                        if (!aTag.Link.ToLower().Contains("http"))
                        {
                            aurl = "https://www.szjsjy.com.cn:8001/" + aTag.Link.GetReplace("\\");
                        }
                        else
                        {
                            aurl = aTag.Link.GetReplace("\\");
                        }
                        if (string.IsNullOrWhiteSpace(attachName))
                        {
                            attachName = info.PrjName;
                        }
                        try
                        {
                            string   url  = System.Web.HttpUtility.UrlDecode(aurl);
                            string[] urls = url.Split('&');
                            url = urls[0] + "&" + urls[2] + "&" + urls[1];
                            BaseAttach entity = null;
                            if (isUpdate)
                            {
                                entity = ToolHtml.GetBaseAttach(url.Replace("\"", ""), attachName, result, "SiteManage\\Files\\Attach\\");
                            }
                            else
                            {
                                entity = ToolHtml.GetBaseAttach(url.Replace("\"", ""), attachName, info.Id, "SiteManage\\Files\\Attach\\");
                            }
                            if (entity != null)
                            {
                                list.Add(entity);
                            }
                        }
                        catch { }
                    }
                }
            }
            else
            {
                System.Data.DataTable dtlDtl = ToolHtml.JsonToDataTable(htmltxt);

                if (dtlDtl != null && dtlDtl.Rows.Count > 0)
                {
                    for (int i = 0; i < dtlDtl.Rows.Count; i++)
                    {
                        System.Data.DataRow row = dtlDtl.Rows[i];
                        string attachName       = Convert.ToString(row["attachName"]);
                        if (string.IsNullOrWhiteSpace(attachName))
                        {
                            attachName = info.PrjName;
                        }
                        string attachGuid = Convert.ToString(row["attachGuid"]);
                        string url        = "https://www.szjsjy.com.cn:8001/file/downloadFile?fileId=" + attachGuid;
                        try
                        {
                            BaseAttach entity = null;
                            if (isUpdate)
                            {
                                entity = ToolHtml.GetBaseAttachByUrl(url, attachName, result, "SiteManage\\Files\\Attach\\");
                            }
                            else
                            {
                                entity = ToolHtml.GetBaseAttachByUrl(url, attachName, info.Id, "SiteManage\\Files\\Attach\\");
                            }
                            if (entity != null)
                            {
                                list.Add(entity);
                            }
                        }
                        catch { }
                    }
                }
            }
            if (list.Count > 0)
            {
                if (isUpdate)
                {
                    string delSql = string.Format("delete from BaseAttach where SourceID='{0}'", result);
                    ToolFile.Delete(result);
                    int count = ToolDb.ExecuteSql(delSql);
                }
                foreach (BaseAttach attach in list)
                {
                    ToolDb.SaveEntity(attach, "SourceID,AttachServerPath");
                }
            }
        }
Exemple #11
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    sqlCount        = 0;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch { return(null); }
            Parser parser = new Parser(new Lexer(html));

            NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("cellspacing", "2"), new TagNameFilter("table")));

            if (pageList != null && pageList.Count > 0)
            {
                string pageString = pageList.AsString();
                Regex  regexPage  = new Regex(@"共[^页]+页,");
                Match  pageMatch  = regexPage.Match(pageString);
                try
                {
                    pageInt = int.Parse(pageMatch.Value.Replace("共", "").Replace("页,", "").Replace(" ", ""));
                }
                catch { pageInt = 1; }
            }

            for (int j = 1; j <= pageInt; j++)
            {
                if (j > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__EVENTTARGET",
                        "__EVENTARGUMENT",
                        "__VIEWSTATE",
                        "__EVENTVALIDATION",
                        "ctl00$Header$drpSearchType",
                        "ctl00$Header$txtQymc",
                        "ctl00$Content$hdnOperate",
                        "ctl00$hdnPageCount"
                    }, new string[] {
                        "ctl00$Content$GridView1",
                        "Page$" + j.ToString(),
                        viewState,
                        eventValidation,
                        "0",
                        string.Empty,
                        string.Empty,
                        pageInt.ToString()
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8, ref cookiestr);
                    }
                    catch { }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "ctl00_Content_GridView1")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    int      rows  = table.RowCount;
                    if (pageInt > 1)
                    {
                        rows = rows - 1;
                    }
                    for (int i = 1; i < rows; i++)
                    {
                        string bProv = string.Empty, bCity = string.Empty, bArea = string.Empty, bPrjno = string.Empty,
                               bPrjname = string.Empty, bBidresultendtime = string.Empty,
                               bBaseprice = string.Empty, bBiddate = string.Empty, bBuildunit = string.Empty, bBidmethod = string.Empty,
                               bRemark = string.Empty, bInfourl = string.Empty;

                        TableRow tr = table.Rows[i] as TableRow;
                        bPrjno            = tr.Columns[1].ToPlainTextString();
                        bPrjname          = tr.Columns[2].ToPlainTextString();
                        bBidresultendtime = tr.Columns[3].ToPlainTextString();
                        bInfourl          = "http://www.szjsjy.com.cn/BusinessInfo/" + tr.Columns[4].GetATagHref();
                        string htmlDtl = string.Empty;
                        try
                        {
                            htmlDtl = this.ToolWebSite.GetHtmlByUrl(bInfourl, Encoding.UTF8);
                        }
                        catch { }
                        BidProject info = ToolDb.GenResultProject("广东省", "深圳市", "", bPrjno, bPrjname, bBidresultendtime, bBaseprice, bBiddate, bBuildunit, bBidmethod, bRemark, bInfourl);
                        sqlCount++;
                        if (sqlCount > this.MaxCount)
                        {
                            return(null);
                        }
                        if (ToolDb.SaveEntity(info, ExistCompareFields, this.ExistsUpdate, this.ExistsHtlCtx, null))
                        {
                            Parser   dtparser = new Parser(new Lexer(htmlDtl));
                            NodeList dtList   = dtparser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "ctl00_ContentPlaceHolder1_GridView1"), new TagNameFilter("table")));
                            if (dtList != null && dtList.Count > 0)
                            {
                                TableTag dttable = dtList[0] as TableTag;
                                for (int t = 1; t < dttable.RowCount; t++)
                                {
                                    ATag file = dttable.SearchFor(typeof(ATag), true)[t - 1] as ATag;
                                    if (file.IsAtagAttach())
                                    {
                                        string     url    = "http://www.szjsjy.com.cn/" + file.Link.Replace("../", "").Replace("./", "");
                                        BaseAttach entity = ToolHtml.GetBaseAttach(url, file.LinkText, info.Id, "SiteManage\\Files\\Attach\\");
                                        if (entity != null)
                                        {
                                            ToolDb.SaveEntity(entity, string.Empty);
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(list);
        }