Ejemplo n.º 1
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            //取得页码
            int    pageInt = 1, sqlCount = 0;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default);
            }
            catch (Exception ex)
            {
                return(null);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "scott")), true), new TagNameFilter("a")));

            if (pageList != null && pageList.Count > 0)
            {
                try
                {
                    string temp = pageList[pageList.Count - 1].GetATagValue().Replace("(", "kdxx").Replace(")", "xxdk").GetRegexBegEnd("kdxx", "xxdk");
                    pageInt = Convert.ToInt32(temp);
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        string typeId           = ToolHtml.GetHtmlInputValue(html, "typeId");
                        string boardId          = ToolHtml.GetHtmlInputValue(html, "boardId");
                        string totalRows        = ToolHtml.GetHtmlInputValue(html, "totalRows");
                        NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                            "typeId", "boardId", "totalRows", "pageNO"
                        }, new string[] {
                            typeId, boardId, totalRows, i.ToString()
                        });
                        html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "lefttable")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 1; j < table.RowCount - 1; j++)
                    {
                        string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty,
                               infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;

                        TableRow tr = table.Rows[j];
                        infoType    = "政策法规";
                        headName    = tr.Columns[1].ToNodePlainString();
                        releaseTime = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        infoUrl     = tr.Columns[1].GetATagHref();
                        string htldtl = string.Empty;
                        try
                        {
                            htldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htldtl));
                        NodeList dtlList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "context_div")));
                        if (dtlList != null && dtlList.Count > 0)
                        {
                            ctxHtml = dtlList.AsHtml();
                            infoCtx = ctxHtml.ToCtxString();
                            msgType = MsgTypeCosnt.ZhongShanMsgType;
                            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "中山市区", string.Empty, infoCtx, infoType);
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                            else
                            {
                                sqlCount++;
                                if (ToolDb.SaveEntity(info, this.ExistCompareFields))
                                {
                                    parser = new Parser(new Lexer(ctxHtml));
                                    NodeList imgList = parser.ExtractAllNodesThatMatch(new TagNameFilter("img"));
                                    if (imgList != null && imgList.Count > 0)
                                    {
                                        for (int img = 0; img < imgList.Count; img++)
                                        {
                                            ImageTag   imgTag   = imgList[img] as ImageTag;
                                            BaseAttach baseInfo = ToolHtml.GetBaseAttachByUrl(imgTag.GetAttribute("src"), headName, info.Id);
                                            if (baseInfo != null)
                                            {
                                                ToolDb.SaveEntity(baseInfo, string.Empty);
                                            }
                                        }
                                    }
                                    parser = new Parser(new Lexer(ctxHtml));
                                    NodeList attachList = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                    if (attachList != null && attachList.Count > 0)
                                    {
                                        for (int a = 0; a < attachList.Count; a++)
                                        {
                                            ATag aTag = attachList[a] as ATag;
                                            if (aTag.IsAtagAttach())
                                            {
                                                try
                                                {
                                                    BaseAttach obj = ToolHtml.GetBaseAttachByUrl(aTag.Link, aTag.LinkText, info.Id);
                                                    if (obj != null)
                                                    {
                                                        ToolDb.SaveEntity(obj, string.Empty);
                                                    }
                                                }
                                                catch { }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(null);
        }
Ejemplo n.º 2
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            int    pageInt = 1, sqlCount = 0;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl);
            }
            catch { return(null); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "AspNetPager1")), true), new TagNameFilter("a")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode[pageNode.Count - 1].GetATagHref().GetRegexBegEnd(",'", "'");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__VIEWSTATE",
                        "__VIEWSTATEGENERATOR",
                        "__EVENTTARGET",
                        "__EVENTARGUMENT",
                        "__EVENTVALIDATION",
                        "TBKey",
                        "AspNetPager1_input"
                    }, new string[] {
                        viewState,
                        "E997B95C",
                        "AspNetPager1",
                        i.ToString(),
                        eventValidation,
                        "",
                        (i - 1).ToString()
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "GridView1")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string   headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;
                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[1].GetATag();
                        headName    = aTag.LinkText;
                        infoType    = "通知公告";
                        releaseTime = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        infoUrl     = "http://www.sdzb.gov.cn/" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "96%")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            ctxHtml = dtlNode.AsHtml();
                            infoCtx = ctxHtml.ToCtxString();
                            msgType = "山东省建设工程招标投标管理办公室";
                            List <string> attach = new List <string>();
                            parser = new Parser(new Lexer(ctxHtml));
                            NodeList imgNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("img"));
                            if (imgNode != null && imgNode.Count > 0)
                            {
                                for (int p = 0; p < imgNode.Count; p++)
                                {
                                    ImageTag img  = imgNode[p] as ImageTag;
                                    string   link = "http://www.sdzb.gov.cn" + img.ImageURL.GetReplace("../,./");
                                    ctxHtml = ctxHtml.GetReplace(img.ImageURL, link);
                                    attach.Add(link);
                                }
                            }

                            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "山东省", "山东省及地市", "", infoCtx, infoType);
                            sqlCount++;
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                            if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate))
                            {
                                if (attach.Count > 0)
                                {
                                    for (int a = 0; a < attach.Count; a++)
                                    {
                                        try
                                        {
                                            BaseAttach entity = ToolHtml.GetBaseAttachByUrl(attach[a], headName, info.Id);
                                            if (entity != null)
                                            {
                                                ToolDb.SaveEntity(entity, "SourceID,AttachServerPath");
                                            }
                                        }
                                        catch { }
                                    }
                                }
                                parser = new Parser(new Lexer(ctxHtml));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int k = 0; k < aNode.Count; k++)
                                    {
                                        ATag a = aNode[k] as ATag;
                                        if (a.Link.ToLower().Contains("download") || a.IsAtagAttach())
                                        {
                                            string link = string.Empty;
                                            if (a.Link.ToLower().Contains("http"))
                                            {
                                                link = a.Link;
                                            }
                                            else
                                            {
                                                link = "http://www.sdzb.gov.cn" + a.Link.GetReplace("../,./");
                                            }
                                            if (Encoding.Default.GetByteCount(link) > 500)
                                            {
                                                continue;
                                            }
                                            try
                                            {
                                                BaseAttach entity = ToolHtml.GetBaseAttachByUrl(link, a.LinkText, info.Id);
                                                if (entity != null)
                                                {
                                                    ToolDb.SaveEntity(entity, "SourceID,AttachServerPath");
                                                }
                                            }
                                            catch { }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(null);
        }
Ejemplo n.º 3
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new ArrayList();
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookieStr       = string.Empty;
            int    sqlCount        = 0;
            int    pageInt         = 1;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.Default);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "cn6")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().Replace("(", "kdxx").GetRegexBegEnd("kdxx", ",");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://www.gdzbtb.gov.cn/pbbgbd/pingbiaobaogao_" + (i - 1).ToString() + ".htm", Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "position2")), true), new TagNameFilter("li")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    for (int j = 0; j < nodeList.Count; j++)
                    {
                        string bProv = string.Empty, bCity = string.Empty, bArea = string.Empty, bPrjno = string.Empty,
                               bPrjname = string.Empty, bBidresultendtime = string.Empty,
                               bBaseprice = string.Empty, bBiddate = string.Empty, bBuildunit = string.Empty, bBidmethod = string.Empty, bRemark = string.Empty, bInfourl = string.Empty;

                        bPrjname = nodeList[j].GetATagValue("title");
                        if (bPrjname.Contains("广东省"))
                        {
                            bCity    = "广州市区";
                            bPrjname = bPrjname.Replace("[", "").Replace("]-", "").Replace("]", "").Replace("广东省", "");
                        }
                        else
                        {
                            string temp = bPrjname.Replace("[", "kdxx").Replace("]", "xxdk").GetRegexBegEnd("kdxx", "xxdk");
                            bPrjname = bPrjname.Replace("[", "").Replace("]-", "").Replace("]", "").Replace(temp, "");
                            bCity    = temp + "区";
                        }
                        bInfourl = "http://www.gdzbtb.gov.cn/pbbgbd/" + nodeList[j].GetATagHref().Replace("../", "").Replace("./", "");
                        string htldtl = string.Empty;
                        try
                        {
                            htldtl = this.ToolWebSite.GetHtmlByUrl(bInfourl, Encoding.Default);
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("cellSpacing", "1")));

                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            string htmlTxt = dtlNode.AsHtml();
                            bBiddate = htmlTxt.GetDateRegex();
                            if (string.IsNullOrEmpty(bBiddate))
                            {
                                bBiddate = DateTime.Now.ToString("yyyy-MM-dd");
                            }

                            string attachUrl = string.Empty;
                            int    len1 = 0, len2 = 0;
                            len1 = htldtl.IndexOf("$(\"#pbbg_shongti\")");
                            len2 = htldtl.IndexOf("</a>");
                            string aurl       = string.Empty;
                            string attachName = string.Empty;
                            if (len1 > 0 && len2 > 0)
                            {
                                aurl   = htldtl.Substring(len1, len2 - len1) + "</a>";
                                parser = new Parser(new Lexer(aurl));
                                NodeList atagNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (atagNode != null && atagNode.Count > 0)
                                {
                                    ATag aTag = atagNode.GetATag();
                                    attachUrl  = aTag.Link;
                                    attachName = aTag.LinkText;
                                }
                            }

                            if (string.IsNullOrEmpty(attachName))
                            {
                                attachName = bPrjname;
                            }
                            BidProject info = ToolDb.GenResultProject("广东省", bCity, "", bPrjno, bPrjname, bBidresultendtime, bBaseprice, bBiddate, bBuildunit, bBidmethod, bRemark, bInfourl);
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(list);
                            }
                            sqlCount++;
                            string sql    = string.Format("select Id from BidProject where 1=1 and InfoUrl='{0}'", info.InfoUrl);
                            string result = Convert.ToString(ToolDb.ExecuteScalar(sql));
                            if (!string.IsNullOrEmpty(result))
                            {
                                if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate))
                                {
                                    if (!string.IsNullOrEmpty(attachUrl))
                                    {
                                        string fileUrl = string.Empty;
                                        try
                                        {
                                            fileUrl = DateTime.Parse(bBiddate).ToString("yyyyMM");
                                        }
                                        catch { fileUrl = DateTime.Now.ToString("yyyyMM"); }
                                        string     alink  = "http://www.gdzbtb.gov.cn/pbbgbd/" + fileUrl + "/" + attachUrl.Replace("../", "").Replace("./", "");
                                        BaseAttach attach = null;
                                        try
                                        {
                                            attach = ToolHtml.GetBaseAttach(alink, attachName, info.Id, "SiteManage\\Files\\Attach\\");
                                            if (attach == null)
                                            {
                                                attach = ToolHtml.GetBaseAttachByUrl(alink, attachName, info.Id, "SiteManage\\Files\\Attach\\");
                                            }
                                        }
                                        catch { }
                                        if (attach != null)
                                        {
                                            string sqlDelete = string.Format("delete from BaseAttach where SourceId='{0}'", result);
                                            ToolDb.ExecuteSql(sqlDelete);
                                            ToolDb.SaveEntity(attach, "SourceID,AttachServerPath");
                                        }
                                    }
                                }
                            }
                            else
                            {
                                if (ToolDb.SaveEntity(info, this.ExistCompareFields))
                                {
                                    if (!string.IsNullOrEmpty(attachUrl))
                                    {
                                        string fileUrl = string.Empty;
                                        try
                                        {
                                            fileUrl = DateTime.Parse(bBiddate).ToString("yyyyMM");
                                        }
                                        catch { fileUrl = DateTime.Now.ToString("yyyyMM"); }
                                        string     alink  = "http://www.gdzbtb.gov.cn/pbbgbd/" + fileUrl + "/" + attachUrl.Replace("../", "").Replace("./", "");
                                        BaseAttach attach = null;
                                        try
                                        {
                                            attach = ToolHtml.GetBaseAttach(alink, attachName, info.Id, "SiteManage\\Files\\Attach\\");
                                            if (attach == null)
                                            {
                                                attach = ToolHtml.GetBaseAttachByUrl(alink, attachName, info.Id, "SiteManage\\Files\\Attach\\");
                                            }
                                        }
                                        catch { }
                                        if (attach != null)
                                        {
                                            ToolDb.SaveEntity(attach, "SourceID,AttachServerPath");
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 4
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list = new List <NotifyInfo>();
            int    pageInt = 1, sqlCount = 0;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default);
            }
            catch { return(list); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "style1")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode[0].ToNodePlainString().GetRegexBegEnd("/", "页");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "?page=" + i, Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "99%")), true), new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "100%"))));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        TableRow tr = (listNode[j] as TableTag).Rows[0];
                        string   headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;
                        ATag     aTag = tr.GetATag();
                        headName = aTag.LinkText;
                        if (Encoding.Default.GetByteCount(headName) > 200)
                        {
                            headName = headName.Substring(0, 99);
                        }
                        infoUrl = "http://www.hnsztb.com.cn/gsgg/" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "800")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            TableTag table = dtlNode[0] as TableTag;
                            if (table.RowCount > 1)
                            {
                                ctxHtml = table.Rows[1].ToHtml();
                            }
                            else
                            {
                                ctxHtml = table.ToHtml();
                            }
                            infoCtx     = ctxHtml.ToCtxString();
                            releaseTime = infoCtx.GetDateRegex();
                            if (string.IsNullOrEmpty(releaseTime))
                            {
                                releaseTime = infoCtx.GetDateRegex("yyyy年MM月dd日");
                            }
                            if (string.IsNullOrEmpty(releaseTime))
                            {
                                releaseTime = infoCtx.GetDateRegex("yyyy/MM/dd");
                            }
                            if (string.IsNullOrEmpty(releaseTime))
                            {
                                releaseTime = infoCtx.GetChinaTime();
                            }
                            msgType  = "河南省建设工程招标投标协会";
                            infoType = "通知公告";
                            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "河南省", "河南省及地市", string.Empty, infoCtx, infoType);
                            sqlCount++;
                            if (crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                            if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate))
                            {
                                parser = new Parser(new Lexer(ctxHtml));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int k = 0; k < aNode.Count; k++)
                                    {
                                        ATag a = aNode[k].GetATag();
                                        if (a.IsAtagAttach())
                                        {
                                            string link = string.Empty;
                                            if (a.Link.ToLower().Contains("http"))
                                            {
                                                link = aTag.Link;
                                            }
                                            else
                                            {
                                                link = "http://www.hnsztb.com.cn/" + a.Link;
                                            }
                                            BaseAttach entity = null;
                                            try
                                            {
                                                entity = ToolHtml.GetBaseAttach(link, a.LinkText, info.Id);
                                                if (entity == null)
                                                {
                                                    entity = ToolHtml.GetBaseAttachByUrl(link, a.LinkText, info.Id);
                                                }
                                                if (entity != null)
                                                {
                                                    ToolDb.SaveEntity(entity, string.Empty);
                                                }
                                            }
                                            catch { }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 5
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            int    pageInt = 1, sqlCount = 0;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl);
            }
            catch { return(null); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "pagination")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode[pageNode.Count - 1].ToNodePlainString().GetRegexBegEnd("/共", "页");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "pageindex",
                        "X-Requested-With"
                    }, new string[] {
                        i.ToString(),
                        "XMLHttpRequest"
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "left_picinfo_text")), true), new TagNameFilter("li")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;
                        INode  node = listNode[j];
                        ATag   aTag = node.GetATag();
                        headName    = aTag.LinkText;
                        infoType    = "通知公告";
                        releaseTime = node.ToPlainTextString().GetDateRegex("yyyy年MM月dd日");
                        infoUrl     = "http://www.zzjs.com.cn" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "clear")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            ctxHtml = dtlNode[0].ToHtml();
                            infoCtx = ctxHtml.ToCtxString();
                            msgType = "郑州市城乡建设委员会";
                            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "河南省", "河南省及地市", "郑州市", infoCtx, infoType);
                            sqlCount++;
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                            if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate))
                            {
                                parser = new Parser(new Lexer(ctxHtml));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int k = 0; k < aNode.Count; k++)
                                    {
                                        ATag a = aNode[k] as ATag;
                                        if (a.Link.ToLower().Contains("download"))
                                        {
                                            string link = string.Empty;
                                            if (a.Link.ToLower().Contains("http"))
                                            {
                                                link = a.Link;
                                            }
                                            else
                                            {
                                                link = "http://www.zzjs.com.cn" + a.Link.GetReplace("../,./");
                                            }
                                            if (Encoding.Default.GetByteCount(link) > 500)
                                            {
                                                continue;
                                            }
                                            try
                                            {
                                                BaseAttach attach = ToolHtml.GetBaseAttachByUrl(link, a.LinkText, info.Id);
                                                if (attach != null)
                                                {
                                                    ToolDb.SaveEntity(attach, "SourceID,AttachServerPath");
                                                }
                                            }
                                            catch { }
                                        }
                                    }
                                }
                            }
                        }
                        else
                        {
                            Logger.Error("无内容");
                        }
                    }
                }
            }
            return(null);
        }
Ejemplo n.º 6
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            int    count = 1, sqlCount = 1;
            IList  list            = new List <CorpWarning>();
            string htl             = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                htl = ToolWeb.GetHtmlByUrl(SiteUrl, Encoding.Default);
            }
            catch (Exception ex)
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(htl));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("a"), new HasAttributeFilter("id", "lx")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.GetATagHref().GetRegexBegEnd("page=", "&");
                    pageInt = int.Parse(temp);
                }
                catch
                {
                }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        htl = ToolWeb.GetHtmlByUrl(this.SiteUrl + "&page=" + i.ToString(), Encoding.Default);
                    }
                    catch
                    {
                        continue;
                    }
                }
                parser = new Parser(new Lexer(htl));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "bean")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string   code = string.Empty, warningName = string.Empty, deliveryDate = string.Empty, warningType = string.Empty, punishmentType = string.Empty, prjNumber = string.Empty, totalScore = string.Empty, resultScore = string.Empty, corpType = string.Empty, publicEndDate = string.Empty, warningEndDate = string.Empty, prjName = string.Empty, badInfo = string.Empty, msgType = string.Empty, color = string.Empty;
                        TableRow tr = table.Rows[j];
                        code           = tr.Columns[1].ToPlainTextString().GetATag().LinkText;
                        warningName    = tr.Columns[2].ToNodePlainString();
                        deliveryDate   = tr.Columns[3].ToPlainTextString().GetDateRegex();
                        warningType    = tr.Columns[4].ToNodePlainString();
                        punishmentType = tr.Columns[5].ToNodePlainString();
                        string infoUrl = "http://61.144.226.2:8001/web/cxda/xzcfAction.do?method=downLoadXzcfjdRemote&xzcfjdname=" + tr.Columns[1].GetATagValue("onclick").Replace("'", "lxl").GetRegexBegEnd("lxl", "lxl");
                        msgType = "深圳市住房和建设局";
                        CorpWarning info = ToolDb.GenCorpWarning("广东省", "深圳市区", "", code, warningName, deliveryDate, warningType, punishmentType, prjNumber, totalScore, resultScore, corpType, publicEndDate, warningEndDate, prjName, badInfo, msgType, color);

                        sqlCount++;
                        if (!crawlAll && sqlCount >= this.MaxCount)
                        {
                            return(list);
                        }
                        if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate))
                        {
                            BaseAttach attach = null;
                            try
                            {
                                attach = ToolHtml.GetBaseAttachByUrl(infoUrl, code, info.Id, "SiteManage\\Files\\Attach\\");
                            }
                            catch { }
                            if (attach != null)
                            {
                                ToolDb.SaveEntity(attach, "");
                            }
                        }
                        count++;
                        if (count >= 200)
                        {
                            count = 1;
                            Thread.Sleep(480000);
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 7
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            int   sqlCount = 0;
            IList list     = new List <BidInfo>();

            string html = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + this.MaxCount);
            }
            catch { return(null); }
            int startIndex = html.IndexOf("{");
            int endIndex   = html.LastIndexOf("}");

            html = html.Substring(startIndex, (endIndex + 1) - startIndex);
            JavaScriptSerializer        serializer  = new JavaScriptSerializer();
            Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html);

            foreach (KeyValuePair <string, object> obj in smsTypeJson)
            {
                if (obj.Key == "total")
                {
                    continue;
                }
                object[] array = (object[])obj.Value;
                foreach (object arrValue in array)
                {
                    string prjName = string.Empty,
                                buildUnit = string.Empty, bidUnit = string.Empty,
                                bidMoney = string.Empty, code = string.Empty,
                                bidDate = string.Empty,
                                beginDate = string.Empty,
                                endDate = string.Empty, bidType = string.Empty,
                                specType = string.Empty, InfoUrl = string.Empty,
                                msgType = string.Empty, bidCtx = string.Empty,
                                prjAddress = string.Empty, remark = string.Empty,
                                prjMgr = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;


                    Dictionary <string, object> dic = (Dictionary <string, object>)arrValue;
                    code      = Convert.ToString(dic["bdBH"]);
                    prjName   = Convert.ToString(dic["bdName"]);
                    beginDate = Convert.ToString(dic["fabuTime2"]);
                    string saveUrl = Convert.ToString(dic["detailUrl"]);
                    //if (!prjName.Contains("一片一路一街一景"))
                    //{
                    //    continue;
                    //}
                    InfoUrl = "https://www.szjsjy.com.cn:8001/jyw-lg/jyxx/queryOldOTDataDetail.do?type=4&id=" + dic["bdBH"];

                    List <Dictionary <string, object> > listAttachs = new List <Dictionary <string, object> >();
                    bool isJson = false;
                    try
                    {
                        HtmlTxt = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString().GetReplace("\\t,\\r,\\n,\"");
                        if (string.IsNullOrEmpty(HtmlTxt))
                        {
                            isJson = true;
                            string url     = "https://www.szjsjy.com.cn:8001/jyw-lg/jyxx/queryZbgs.do?guid=" + dic["dbZhongBiaoJieGuoGuid"] + "&ggGuid=&bdGuid=";
                            string htmldtl = this.ToolWebSite.GetHtmlByUrl(url);

                            Dictionary <string, object> dtlJsons = (Dictionary <string, object>)serializer.DeserializeObject(htmldtl);

                            buildUnit = Convert.ToString(dtlJsons["zbrAndLht"]);
                            bidUnit   = Convert.ToString(dtlJsons["tbrName"]);
                            bidMoney  = Convert.ToString(dtlJsons["zhongBiaoJE"]);
                            try
                            {
                                bidMoney = (decimal.Parse(bidMoney) / 1000000).ToString();
                            }
                            catch { }
                            prjMgr = Convert.ToString(dtlJsons["xiangMuJiLi"]);

                            Dictionary <string, object> gg = null;
                            try
                            {
                                gg = dtlJsons["gg"] as Dictionary <string, object>;
                            }
                            catch { }
                            Dictionary <string, object> bd = null;
                            Dictionary <string, object> gc = null;
                            Dictionary <string, object> xm = null;
                            try
                            {
                                bd = dtlJsons["bd"] as Dictionary <string, object>;
                            }
                            catch { }
                            try
                            {
                                gc = bd["gc"] as Dictionary <string, object>;
                            }
                            catch { }
                            try
                            {
                                xm = bd["xm"] as Dictionary <string, object>;
                            }
                            catch { }


                            string   htl      = this.ToolWebSite.GetHtmlByUrl(saveUrl);
                            Parser   parser   = new Parser(new Lexer(htl));
                            NodeList nodelist = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "right_bg")));
                            if (nodelist != null && nodelist.Count > 0)
                            {
                                HtmlTxt = nodelist.AsHtml();
                                try
                                {
                                    HtmlTxt = HtmlTxt.GetReplace("<span id=\"gcBH\"></span>", "<span id=\"gcBH\">" + code + "</span>");
                                }
                                catch { }
                                try
                                {
                                    HtmlTxt = HtmlTxt.GetReplace("<span id=\"gcName\"></span>", "<span id=\"gcBH\">" + gc["gcName"] + "</span>");
                                }
                                catch { }
                                try
                                {
                                    HtmlTxt = HtmlTxt.GetReplace("<span id=\"bdName\"></span>", "<span id=\"bdName\">" + prjName + "</span>");
                                }
                                catch { }
                                try
                                {
                                    HtmlTxt = HtmlTxt.GetReplace("<span id=\"xmBH\"></span>", "<span id=\"xmBH\">" + xm["xm_BH"] + "</span>");
                                }
                                catch { }
                                try
                                {
                                    HtmlTxt = HtmlTxt.GetReplace("<span id=\"xmName\"></span>", "<span id=\"xmName\">" + xm["xm_Name"] + "</span>");
                                }
                                catch { }
                                try
                                {
                                    long zbgsStartTime = Convert.ToInt64(dtlJsons["zbgsStartTime"]);
                                    HtmlTxt = HtmlTxt.GetReplace("<span id=\"zbgsStartTime\"></span>", "<span id=\"zbgsStartTime\">" + ToolHtml.GetDateTimeByLong(zbgsStartTime) + "</span>");
                                }
                                catch { }
                                try
                                {
                                    HtmlTxt = HtmlTxt.GetReplace("<span id=\"zbRName\"></span>", "<span id=\"zbRName\">" + gc["zbRName"] + "</span>");
                                }
                                catch { }
                                try
                                {
                                    HtmlTxt = HtmlTxt.GetReplace("<span id=\"zbdlJG\"></span>", "<span id=\"zbdlJG\">" + gc["creatorName"] + "</span>");
                                }
                                catch { }
                                try
                                {
                                    HtmlTxt = HtmlTxt.GetReplace("<span id=\"tbrName\"></span>", "<span id=\"tbrName\">" + dtlJsons["tbrName"] + "</span>");
                                }
                                catch { }
                                try
                                {
                                    HtmlTxt = HtmlTxt.GetReplace("<span id=\"zhongBiaoJE\"></span>", "<span id=\"zhongBiaoJE\">" + bidMoney + "万元</span>");
                                }
                                catch { }
                                try
                                {
                                    HtmlTxt = HtmlTxt.GetReplace("<span id=\"zhongBiaoGQ\"></span>", "<span id=\"zhongBiaoGQ\">" + dtlJsons["zhongBiaoGQ"] + "</span>");
                                }
                                catch { }
                                try
                                {
                                    HtmlTxt = HtmlTxt.GetReplace("<span id=\"xiangMuJiLi\"></span>", "<span id=\"xiangMuJiLi\">" + prjMgr + "</span>");
                                }
                                catch { }
                                try
                                {
                                    HtmlTxt = HtmlTxt.GetReplace("<span id=\"ziGeDengJi\"></span>", "<span id=\"ziGeDengJi\">" + dtlJsons["ziGeDengJi"] + "</span>");
                                }
                                catch { }

                                try
                                {
                                    HtmlTxt = HtmlTxt.GetReplace("<span id=\"ziGeZhengShu\"></span>", "<span id=\"ziGeZhengShu\">" + dtlJsons["ziGeZhengShu"] + "</span>");
                                }
                                catch { }
                                try
                                {
                                    string zanding = string.IsNullOrWhiteSpace(Convert.ToString(dtlJsons["isZanDingJinE"])) ? "否" : "是";
                                    HtmlTxt = HtmlTxt.GetReplace("<span id=\"isZanDingJinE\"></span>", "<span id=\"isZanDingJinE\">" + zanding + "</span>");
                                }
                                catch { }
                            }
                            try
                            {
                                string fileUrl  = "https://www.szjsjy.com.cn:8001/jyw-lg/jyxx/filegroup/queryByGroupGuidZS.do?groupGuid=" + dtlJsons["ztbFileGroupGuid"];
                                string fileJson = this.ToolWebSite.GetHtmlByUrl(fileUrl);
                                Dictionary <string, object> fileDic = (Dictionary <string, object>)serializer.DeserializeObject(fileJson);
                                object[] objFile = fileDic["rows"] as object[];

                                foreach (object file in objFile)
                                {
                                    Dictionary <string, object> attach = file as Dictionary <string, object>;
                                    listAttachs.Add(attach);
                                }
                            }
                            catch { }
                        }
                    }
                    catch { continue; }
                    bidCtx = HtmlTxt.Replace("<br />", "\r\n").ToCtxString();

                    if (!isJson)
                    {
                        buildUnit = bidCtx.GetBuildRegex();
                        bidUnit   = bidCtx.GetBidRegex();
                        bidMoney  = bidCtx.GetMoneyRegex();
                        prjMgr    = bidCtx.GetMgrRegex();

                        if (string.IsNullOrEmpty(prjMgr))
                        {
                            prjMgr = bidCtx.GetRegex("项目负责");
                        }
                    }
                    msgType  = "深圳市建设工程交易中心龙岗分中心";
                    specType = "建设工程";
                    bidType  = ToolHtml.GetInviteTypes(prjName);
                    prjName  = ToolDb.GetPrjName(prjName);
                    BidInfo info = ToolDb.GenBidInfo("广东省", "深圳龙岗区工程", "龙岗区", string.Empty, code, prjName, buildUnit, beginDate, bidUnit, beginDate, endDate, bidCtx, string.Empty, msgType, bidType, specType, otherType, bidMoney, saveUrl, prjMgr, HtmlTxt);

                    if (!crawlAll && sqlCount >= this.MaxCount)
                    {
                        return(null);
                    }

                    sqlCount++;
                    if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate, this.ExistsHtlCtx))
                    {
                        if (!isJson)
                        {
                            Parser   parser   = new Parser(new Lexer(HtmlTxt));
                            NodeList fileNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (fileNode != null && fileNode.Count > 0)
                            {
                                for (int f = 0; f < fileNode.Count; f++)
                                {
                                    ATag tag = fileNode[f] as ATag;
                                    if (tag.IsAtagAttach() || tag.Link.ToLower().Contains("downloadfile"))
                                    {
                                        try
                                        {
                                            BaseAttach attach = null;
                                            string     link   = string.Empty;
                                            if (tag.Link.ToLower().Contains("http"))
                                            {
                                                link = tag.Link;
                                                if (link.StartsWith("\\"))
                                                {
                                                    link = link.Substring(link.IndexOf("\\"), link.Length - link.IndexOf("\\"));
                                                }
                                                if (link.EndsWith("//"))
                                                {
                                                    link = link.Remove(link.LastIndexOf("//"));
                                                }
                                                link = link.GetReplace("\\", "");
                                            }
                                            else
                                            {
                                                link = "https://www.szjsjy.com.cn:8001/" + tag.Link;
                                            }
                                            attach = ToolHtml.GetBaseAttachByUrl(link, tag.LinkText, info.Id, "SiteManage\\Files\\InviteAttach\\");
                                            if (attach != null)
                                            {
                                                ToolDb.SaveEntity(attach, "SourceID,AttachServerPath");
                                            }
                                        }
                                        catch { continue; }
                                    }
                                }
                            }
                        }
                        else if (listAttachs.Count > 0)
                        {
                            foreach (Dictionary <string, object> attach in listAttachs)
                            {
                                BaseAttach attachBase = null;
                                try
                                {
                                    string attachName = Convert.ToString(attach["attachName"]);
                                    string attachId   = Convert.ToString(attach["attachGuid"]);
                                    string link       = "https://www.szjsjy.com.cn:8001/file/downloadFile?fileId=" + attachId;

                                    attachBase = ToolHtml.GetBaseAttach(link, attachName, info.Id, "SiteManage\\Files\\InviteAttach\\");
                                    if (attachBase != null)
                                    {
                                        ToolDb.SaveEntity(attachBase, "SourceID,AttachServerPath");
                                    }
                                }
                                catch { }
                            }
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 8
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            string html = string.Empty;
            string cookiestr = string.Empty;
            string viewState = string.Empty;
            int    pageInt = 1, sqlCount = 0;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8, ref cookiestr);
            }
            catch
            {
                return(null);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "MoreInfoList1_Pager")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("总页数:", "当前");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__VIEWSTATE",
                        "__EVENTTARGET",
                        "__EVENTARGUMENT",
                        "__EVENTVALIDATION"
                    }, new string[] {
                        viewState,
                        "MoreInfoList1$Pager",
                        i.ToString(),
                        eventValidation
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8, ref cookiestr);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "MoreInfoList1_DataGrid1")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;

                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[1].GetATag();
                        infoType    = "通知公告";
                        headName    = aTag.GetAttribute("title");
                        releaseTime = tr.Columns[2].ToPlainTextString().GetDateRegex();

                        infoUrl = "http://www.lnzb.cn" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "tblInfo")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            ctxHtml = dtlNode.AsHtml();
                            infoCtx = ctxHtml.ToCtxString();
                            msgType = "辽宁省建设厅招标投标管理处";
                            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "辽宁省", "辽宁省及地市", "", infoCtx, infoType);
                            sqlCount++;
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                            if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate))
                            {
                                parser = new Parser(new Lexer(ctxHtml));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int k = 0; k < aNode.Count; k++)
                                    {
                                        ATag a = aNode[k] as ATag;
                                        if (a.Link.ToLower().Contains("readattachfile") || a.IsAtagAttach())
                                        {
                                            string link = string.Empty;
                                            if (a.Link.ToLower().Contains("http"))
                                            {
                                                link = a.Link;
                                            }
                                            else
                                            {
                                                link = "http://www.lnzb.cn" + a.Link.GetReplace("../,./");
                                            }
                                            if (Encoding.Default.GetByteCount(link) > 500)
                                            {
                                                continue;
                                            }
                                            try
                                            {
                                                BaseAttach attach = ToolHtml.GetBaseAttachByUrl(link, a.LinkText, info.Id);
                                                if (attach != null)
                                                {
                                                    ToolDb.SaveEntity(attach, "SourceID,AttachServerPath");
                                                }
                                            }
                                            catch { }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(null);
        }
Ejemplo n.º 9
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList list     = new List <InviteInfo>();
            int   sqlCount = 0;
            //取得页码
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch (Exception ex)
            {
                return(list);
            }

            Parser parser = new Parser(new Lexer(html));

            NodeList sNode      = parser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "ctl00_cph_context_InfoList2_GridViewPaging1_PagingDescTd"), new TagNameFilter("td")));
            string   pageString = sNode.AsString();
            Regex    regexPage  = new Regex(@",共[^页]+页");
            Match    pageMatch  = regexPage.Match(pageString);

            try { pageInt = int.Parse(pageMatch.Value.Replace(",共", "").Replace("页", "").Trim()); }
            catch (Exception) { }

            string cookiestr = string.Empty;

            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(
                        new string[] { "ctl00$ScriptManager1",
                                       "__EVENTTARGET",
                                       "__EVENTARGUMENT",
                                       "__LASTFOCUS",
                                       "__VIEWSTATE",
                                       "ctl00$cph_context$InfoList2$ddlProjectType",
                                       "ctl00$cph_context$InfoList2$ddlSearch",
                                       "ctl00$cph_context$InfoList2$txtProjectName",
                                       "ctl00$cph_context$InfoList2$GridViewPaging1$txtGridViewPagingForwardTo",
                                       "__VIEWSTATEENCRYPTED",
                                       "ctl00$cph_context$InfoList2$GridViewPaging1$btnForwardToPage" },
                        new string[] {
                        "ctl00$cph_context$InfoList2$update1|ctl00$cph_context$InfoList2$GridViewPaging1$btnForwardToPage",
                        string.Empty,
                        string.Empty,
                        string.Empty,
                        viewState,
                        string.Empty,
                        "gcbh",
                        string.Empty,
                        i.ToString(),
                        "",
                        "GO"
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8);
                    }
                    catch { continue; }
                }

                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "ctl00_cph_context_InfoList2_GridView1"), new TagNameFilter("table")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string   code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        TableRow tr = table.Rows[j] as TableRow;
                        code      = tr.Columns[1].ToPlainTextString().Trim();
                        prjName   = tr.Columns[2].ToPlainTextString().Trim();
                        buildUnit = tr.Columns[3].ToPlainTextString().Trim();
                        beginDate = tr.Columns[5].ToPlainTextString().Trim();
                        endDate   = tr.Columns[6].ToPlainTextString().Trim();
                        string InvType = tr.Columns[4].ToPlainTextString().Trim();

                        ATag aTag = tr.Columns[2].SearchFor(typeof(ATag), true)[0] as ATag;
                        InfoUrl = "http://61.144.224.189:8001/LGjyzxWeb/SiteManage/" + aTag.Link.Replace("openNewWindowByMenu(\"", "").Replace("\")", "");
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).Replace("&nbsp;", "").Trim();
                            Parser   dtlparserHTML = new Parser(new Lexer(htmldetail));
                            NodeList dtnodeHTML    = dtlparserHTML.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "ctl00_cph_context_lblContent"), new TagNameFilter("span")));
                            HtmlTxt    = dtnodeHTML.AsHtml();
                            htmldetail = htmldetail.Replace("&nbsp;", "").Replace("</br>", "\r\n").Replace("<br>", "\r\n");
                        }
                        catch (Exception ex) { continue; }
                        Parser   dtlparser = new Parser(new Lexer(htmldetail));
                        NodeList dtnode    = dtlparser.ExtractAllNodesThatMatch(new AndFilter(new HasAttributeFilter("id", "ctl00_cph_context_lblContent"), new TagNameFilter("span")));
                        inviteCtx = dtnode.AsString().Replace("\r\r\n", "\r\n");


                        Regex regPrjAdd = new Regex(@"(工程地点|工程地址):[^\r\n]+[\r\n]{1}");
                        prjAddress = regPrjAdd.Match(inviteCtx).Value.Replace("工程地点:", "").Replace("工程地址:", "").Trim();
                        msgType    = "深圳市建设工程交易中心龙岗分中心";
                        specType   = "建设工程";
                        inviteType = ToolHtml.GetInviteTypes(InvType);
                        Regex  regOtherType = new Regex(@"(工程类型):[^\r\n]+[\r\n]{1}");
                        string oType        = regOtherType.Match(inviteCtx).Value.Replace("工程类型:", "").Trim();
                        if (oType.Contains("房建"))
                        {
                            otherType = "房建及工业民用建筑";
                        }
                        if (oType.Contains("市政"))
                        {
                            otherType = "市政工程";
                        }
                        if (oType.Contains("园林绿化"))
                        {
                            otherType = "园林绿化工程";
                        }
                        if (oType.Contains("装饰装修"))
                        {
                            otherType = "装饰装修工程";
                        }
                        if (oType.Contains("电力"))
                        {
                            otherType = "电力工程";
                        }
                        if (oType.Contains("水利"))
                        {
                            otherType = "水利工程";
                        }
                        if (oType.Contains("环保"))
                        {
                            otherType = "环保工程";
                        }
                        InviteInfo info = ToolDb.GenInviteInfo("广东省", "深圳龙岗区工程", "龙岗区", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, string.Empty, InfoUrl, HtmlTxt);
                        if (!crawlAll && sqlCount >= this.MaxCount)
                        {
                            return(null);
                        }

                        sqlCount++;
                        if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate, this.ExistsHtlCtx))
                        {
                            dtlparser.Reset();
                            NodeList fileNode = dtlparser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (fileNode != null && fileNode.Count > 0)
                            {
                                for (int f = 1; f < fileNode.Count; f++)
                                {
                                    ATag tag = fileNode[f] as ATag;
                                    if (tag.IsAtagAttach())
                                    {
                                        try
                                        {
                                            BaseAttach attach = null;
                                            string     url    = "http://61.144.224.189:8001/LGjyzxWeb/" + tag.Link.Replace("../", "");
                                            attach = ToolHtml.GetBaseAttachByUrl(url, tag.LinkText, info.Id, "SiteManage\\Files\\InviteAttach\\");
                                            if (attach != null)
                                            {
                                                ToolDb.SaveEntity(attach, "SourceID,AttachServerPath");
                                            }
                                        }
                                        catch { continue; }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 10
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList list     = new List <InviteInfo>();
            int   sqlCount = 0;
            int   count    = 0;
            //取得页码
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.ToolWebSite.UrlEncode(SiteUrl), Encoding.UTF8);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("select"), new HasAttributeFilter("name", "__ec_pages")));

            if (pageNode != null && pageNode.Count > 0)
            {
                SelectTag selectTag = pageNode[0] as SelectTag;
                pageInt = selectTag.OptionTags.Length;
            }
            string cookiestr = string.Empty;

            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "ec_i", "topicChrList_20070702_crd", "topicChrList_20070702_f_a", "topicChrList_20070702_p", "topicChrList_20070702_s_name", "id", "method", "__ec_pages", "topicChrList_20070702_rd", "topicChrList_20070702_f_name", "topicChrList_20070702_f_ldate" }, new string[] { "topicChrList_20070702", "20", string.Empty, i.ToString(), string.Empty, "1660", "view", (i - 1).ToString(), "20", string.Empty, string.Empty });
                        html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8, ref cookiestr);
                    }
                    catch { }
                }
                parser = new Parser(new Lexer(html));
                NodeList tdNodes = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "topicChrList_20070702_table")));
                if (tdNodes != null && tdNodes.Count > 0)
                {
                    TableTag table = tdNodes[0] as TableTag;

                    for (int t = 3; t < table.RowCount; t++)
                    {
                        string   code = string.Empty, buildUnit = string.Empty, prjName = string.Empty, prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty, specType = string.Empty, beginDate = string.Empty, endDate = string.Empty, remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty, CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty, HtmlTxt = string.Empty;
                        TableRow tr = table.Rows[t];
                        prjName = tr.Columns[2].ToPlainTextString().Trim().ToRegString();
                        //try
                        //{
                        inviteType = tr.Columns[3].ToPlainTextString().Trim();
                        beginDate  = tr.Columns[4].ToPlainTextString().Trim();
                        //}
                        //catch { DateTime beginDa = DateTime.Today; beginDate = beginDa.ToString("yyyy-MM-dd HH:mm:ss"); }



                        ATag aTag = tr.Columns[2].SearchFor(typeof(ATag), true)[0] as ATag;

                        Regex regexLink = new Regex(@"id=[^-]+");
                        InfoUrl = "http://www.szzfcg.cn/portal/documentView.do?method=view&" + regexLink.Match(aTag.Link).Value;
                        string htmldetail = string.Empty;
                        try
                        {
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).Replace("&nbsp;", "").Trim();
                            Parser   dtlparserHTML = new Parser(new Lexer(htmldetail));
                            NodeList dtnodeHTML    = dtlparserHTML.ExtractAllNodesThatMatch(new TagNameFilter("body"));
                            HtmlTxt    = dtnodeHTML.AsHtml();
                            htmldetail = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).Replace("&nbsp;", "").Replace("</br>", "\r\n").Replace("<br>", "\r\n");
                        }
                        catch (Exception ex) { }
                        Parser   dtlparser = new Parser(new Lexer(htmldetail));
                        NodeList dtnode    = dtlparser.ExtractAllNodesThatMatch(new TagNameFilter("body"));
                        inviteCtx = dtnode.AsString().Replace(" ", "").Replace("\t", "").Trim("\r\n".ToCharArray()).Replace("&ldquo;", "“").Replace("&rdquo;", "”").Replace("双击鼠标自动滚屏[打印此页][关闭此页]", "");
                        inviteCtx = System.Web.HttpUtility.HtmlDecode(inviteCtx);
                        Regex regCtx = new Regex(@"[\r\n]+");
                        inviteCtx = regCtx.Replace(inviteCtx, "\r\n");
                        Regex regcode = new Regex(@"(招标编号|项目编号)(:|:)([0-9]|[A-Za-z]|[-])+");
                        code = regcode.Match(inviteCtx).Value.Replace("招标编号", "").Replace("项目编号", "").Replace(":", "").Replace(":", "").Trim();

                        if (string.IsNullOrEmpty(inviteCtx) || string.IsNullOrEmpty(HtmlTxt))
                        {
                            parser = new Parser(new Lexer(htmldetail));
                            NodeFilter filter  = new TagNameFilter("body");
                            NodeList   ctxList = parser.ExtractAllNodesThatMatch(filter);
                            inviteCtx = ctxList.AsString();
                            HtmlTxt   = ctxList.AsHtml();
                        }
                        if (string.IsNullOrEmpty(inviteCtx) || string.IsNullOrEmpty(HtmlTxt))
                        {
                            Regex regexHtml = new Regex(@"<script[^<]*</script>|<\?xml[^/]*/>|<style[^<]*</style>|<xml[^<]*</xml>");
                            HtmlTxt   = regexHtml.Replace(htmldetail, "");
                            inviteCtx = Regex.Replace(HtmlTxt, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase).Replace(" ", "").Replace("\t", "");
                        }
                        msgType    = "深圳政府采购";
                        specType   = "政府采购";
                        prjAddress = "深圳市";
                        if (inviteType.Contains("160"))
                        {
                            inviteType = ToolHtml.GetInviteTypes(prjName);
                        }
                        InviteInfo info = ToolDb.GenInviteInfo("广东省", "深圳政府采购", "", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, string.Empty, InfoUrl, HtmlTxt);
                        if (!crawlAll && sqlCount >= this.MaxCount)
                        {
                            return(null);
                        }
                        sqlCount++;
                        if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate, this.ExistsHtlCtx))
                        {
                            count++;
                            parser = new Parser(new Lexer(htmldetail));
                            NodeList fileNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (fileNode != null && fileNode.Count > 0)
                            {
                                for (int f = 0; f < fileNode.Count; f++)
                                {
                                    ATag tag = fileNode[f] as ATag;
                                    if (tag.IsAtagAttach())
                                    {
                                        try
                                        {
                                            BaseAttach attach = null;
                                            if (tag.Link.ToLower().Contains(".com") || tag.Link.ToLower().Contains(".cn"))
                                            {
                                                attach = ToolHtml.GetBaseAttachByUrl(tag.Link.Replace("&amp;", "&"), tag.LinkText, info.Id, "SiteManage\\Files\\InviteAttach\\");
                                            }
                                            else
                                            {
                                                attach = ToolHtml.GetBaseAttachByUrl("http://www.szzfcg.cn" + tag.Link.Replace("&amp;", "&"), tag.LinkText, info.Id, "SiteManage\\Files\\InviteAttach\\");
                                            }
                                            if (attach != null)
                                            {
                                                ToolDb.SaveEntity(attach, "SourceID,AttachServerPath");
                                            }
                                        }
                                        catch { }
                                    }
                                }
                            }
                            if (count >= 10)
                            {
                                count = 0;
                                Thread.Sleep(1000 * 300);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 11
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list     = new List <InviteInfo>();
            int    sqlCount = 0;
            string html     = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + this.MaxCount);
            }
            catch { return(null); }
            int startIndex = html.IndexOf("{");
            int endIndex   = html.LastIndexOf("}");

            html = html.Substring(startIndex, (endIndex + 1) - startIndex);
            JavaScriptSerializer        serializer  = new JavaScriptSerializer();
            Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html);

            foreach (KeyValuePair <string, object> obj in smsTypeJson)
            {
                if (obj.Key == "total")
                {
                    continue;
                }
                object[] array = (object[])obj.Value;
                foreach (object arrValue in array)
                {
                    string code = string.Empty, buildUnit = string.Empty, prjName = string.Empty,
                                prjAddress = string.Empty, inviteCtx = string.Empty, inviteType = string.Empty,
                                specType = string.Empty, beginDate = string.Empty, endDate = string.Empty,
                                remark = string.Empty, inviteCon = string.Empty, InfoUrl = string.Empty,
                                CreateTime = string.Empty, msgType = string.Empty, otherType = string.Empty,
                                HtmlTxt = string.Empty;

                    Dictionary <string, object> dic = (Dictionary <string, object>)arrValue;
                    code      = Convert.ToString(dic["gcBH"]);
                    prjName   = Convert.ToString(dic["gcName"]);
                    beginDate = Convert.ToString(dic["ggStartTime2"]);
                    string saveUrl = Convert.ToString(dic["detailUrl"]);
                    InfoUrl = "https://www.szjsjy.com.cn:8001/jyw-lg/jyxx/queryOldOTDataDetail.do?type=1&id=" + dic["gcBH"];

                    try
                    {
                        HtmlTxt = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString().GetReplace("\\t,\\r,\\n,\"");

                        if (string.IsNullOrWhiteSpace(HtmlTxt))
                        {
                            string url     = "https://www.szjsjy.com.cn:8001/jyw-lg/jyxx/showGongGao.do?ggGuid=" + dic["ggGuid"];
                            string htmldtl = this.ToolWebSite.GetHtmlByUrl(url);

                            JavaScriptSerializer        Newserializer = new JavaScriptSerializer();
                            Dictionary <string, object> newTypeJson   = (Dictionary <string, object>)Newserializer.DeserializeObject(htmldtl);
                            HtmlTxt = Convert.ToString(newTypeJson["html"]);
                        }
                    }
                    catch (Exception ex) { continue; }
                    inviteCtx = HtmlTxt.Replace("</span>", "\r\n").Replace("<br />", "\r\n").ToCtxString();

                    prjAddress = inviteCtx.GetAddressRegex();
                    buildUnit  = inviteCtx.GetBuildRegex();
                    if (string.IsNullOrEmpty(code))
                    {
                        code = inviteCtx.GetCodeRegex();
                    }
                    msgType    = "深圳市建设工程交易中心龙岗分中心";
                    specType   = "建设工程";
                    inviteType = ToolHtml.GetInviteTypes(prjName);

                    InviteInfo info = ToolDb.GenInviteInfo("广东省", "深圳龙岗区工程", "龙岗区", string.Empty, code, prjName, prjAddress, buildUnit, beginDate, endDate, inviteCtx, remark, msgType, inviteType, specType, string.Empty, saveUrl, HtmlTxt);

                    if (!crawlAll && sqlCount >= this.MaxCount)
                    {
                        return(null);
                    }

                    sqlCount++;
                    if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate, this.ExistsHtlCtx))
                    {
                        Parser   parser   = new Parser(new Lexer(HtmlTxt));
                        NodeList fileNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                        if (fileNode != null && fileNode.Count > 0)
                        {
                            for (int f = 0; f < fileNode.Count; f++)
                            {
                                ATag tag = fileNode[f] as ATag;
                                if (tag.IsAtagAttach() || tag.Link.ToLower().Contains("downloadfile"))
                                {
                                    try
                                    {
                                        BaseAttach attach = null;
                                        string     link   = string.Empty;
                                        if (tag.Link.ToLower().Contains("http"))
                                        {
                                            link = tag.Link;
                                            if (link.StartsWith("\\"))
                                            {
                                                link = link.Substring(link.IndexOf("\\"), link.Length - link.IndexOf("\\"));
                                            }
                                            if (link.EndsWith("//"))
                                            {
                                                link = link.Remove(link.LastIndexOf("//"));
                                            }
                                            link = link.GetReplace("\\", "");
                                        }
                                        else
                                        {
                                            link = "https://www.szjsjy.com.cn:8001/" + tag.Link;
                                        }
                                        attach = ToolHtml.GetBaseAttachByUrl(link, tag.LinkText, info.Id, "SiteManage\\Files\\InviteAttach\\");
                                        if (attach != null)
                                        {
                                            ToolDb.SaveEntity(attach, "SourceID,AttachServerPath");
                                        }
                                    }
                                    catch { continue; }
                                }
                            }
                        }
                    }
                }
            }
            return(list);
        }
Ejemplo n.º 12
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            //取得页码
            int    pageInt = 1, sqlCount = 0;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, Encoding.UTF8);
            }
            catch (Exception ex)
            {
                return(null);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "ctl00_cph_context_GridViewPaingTwo1_lblGridViewPagingDesc")));

            if (pageList != null && pageList.Count > 0)
            {
                try
                {
                    string temp = pageList.AsString();
                    pageInt = Convert.ToInt32(temp.GetRegexBegEnd("共", "页"));
                }
                catch { pageInt = 1; }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        viewState       = this.ToolWebSite.GetAspNetViewState(html);
                        eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                        NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(
                            new string[] {
                            "__VIEWSTATE",
                            "__EVENTVALIDATION",
                            "ctl00$cph_context$GridViewPaingTwo1$txtGridViewPagingForwardTo",
                            "ctl00$cph_context$GridViewPaingTwo1$btnForwardToPage"
                        },
                            new string[] {
                            viewState, eventValidation, i.ToString(), "GO"
                        }
                            );
                        html = this.ToolWebSite.GetHtmlByUrl(SiteUrl, nvc, Encoding.UTF8);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList nodeList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "ctl00_cph_context_GridView1")));
                if (nodeList != null && nodeList.Count > 0)
                {
                    TableTag table = nodeList[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty,
                               infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;

                        TableRow tr = table.Rows[j];
                        headName    = tr.Columns[1].ToNodePlainString();
                        releaseTime = tr.Columns[2].ToNodePlainString();
                        infoType    = "政策法规";
                        infoUrl     = "http://www.dgzb.com.cn/DGJYWEB/SiteManage/" + tr.Columns[1].GetATagHref();
                        string htldtl = string.Empty;
                        try
                        {
                            htldtl = ToolHtml.GetHtmlByUrl(infoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { }
                        parser = new Parser(new Lexer(htldtl));
                        NodeList noList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "ctl00_cph_context_span_MetContent")));
                        if (noList != null && noList.Count > 0)
                        {
                            ctxHtml    = noList.AsHtml().Replace("<br/>", "\r\n").Replace("<BR/>", "");
                            infoCtx    = noList.AsString().Replace(" ", "").Replace("&nbsp;", "").Replace("\t\t", "\t").Replace("\t\t", "\t");
                            infoCtx    = Regex.Replace(infoCtx, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase).Replace(" ", "").Replace("\t", "").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n").Replace("\r\n\r\n", "\r\n");
                            msgType    = MsgTypeCosnt.DongGuanMsgType;
                            infoScorce = infoScorce.Replace("&nbsp;", "");
                            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "广东省", "东莞市区", string.Empty, infoCtx, infoType);

                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                            else
                            {
                                sqlCount++;
                                if (ToolDb.SaveEntity(info, this.ExistCompareFields))
                                {
                                    parser = new Parser(new Lexer(htldtl));
                                    NodeList attachList = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "ctl00_cph_context_DownLoadFiles1_GridView1")));
                                    if (attachList != null && attachList.Count > 0)
                                    {
                                        TableTag tabTag = attachList[0] as TableTag;
                                        for (int k = 1; k < tabTag.RowCount; k++)
                                        {
                                            TableRow dr = tabTag.Rows[k];
                                            try
                                            {
                                                string     attName  = string.IsNullOrEmpty(dr.Columns[1].ToNodePlainString()) ? headName : dr.Columns[1].ToNodePlainString();
                                                BaseAttach baseInfo = ToolHtml.GetBaseAttachByUrl("http://www.dgzb.com.cn/DGJYWEB/SiteManage/" + dr.Columns[1].GetATagHref(), attName, info.Id);
                                                if (baseInfo != null)
                                                {
                                                    ToolDb.SaveEntity(baseInfo, string.Empty);
                                                }
                                            }
                                            catch { }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(null);
        }
Ejemplo n.º 13
0
        private void SaveAttach(BidProject info, string htmltxt, string result, bool isUpdate)
        {
            List <BaseAttach> list = new List <BaseAttach>();

            if (htmltxt.Contains("http"))
            {
                Parser   parser = new Parser(new Lexer(htmltxt));
                NodeList aNode  = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                if (aNode != null && aNode.Count > 0)
                {
                    for (int j = 0; j < aNode.Count; j++)
                    {
                        ATag   aTag       = aNode[j].GetATag();
                        string attachName = aTag.LinkText;
                        string aurl       = string.Empty;
                        if (!aTag.Link.ToLower().Contains("http"))
                        {
                            aurl = "https://www.szjsjy.com.cn:8001/" + aTag.Link.GetReplace("\\");
                        }
                        else
                        {
                            aurl = aTag.Link.GetReplace("\\");
                        }
                        if (string.IsNullOrWhiteSpace(attachName))
                        {
                            attachName = info.PrjName;
                        }
                        try
                        {
                            string   url  = System.Web.HttpUtility.UrlDecode(aurl);
                            string[] urls = url.Split('&');
                            url = urls[0] + "&" + urls[2] + "&" + urls[1];
                            BaseAttach entity = null;
                            if (isUpdate)
                            {
                                entity = ToolHtml.GetBaseAttach(url.Replace("\"", ""), attachName, result, "SiteManage\\Files\\Attach\\");
                            }
                            else
                            {
                                entity = ToolHtml.GetBaseAttach(url.Replace("\"", ""), attachName, info.Id, "SiteManage\\Files\\Attach\\");
                            }
                            if (entity != null)
                            {
                                list.Add(entity);
                            }
                        }
                        catch { }
                    }
                }
            }
            else
            {
                System.Data.DataTable dtlDtl = ToolHtml.JsonToDataTable(htmltxt);

                if (dtlDtl != null && dtlDtl.Rows.Count > 0)
                {
                    for (int i = 0; i < dtlDtl.Rows.Count; i++)
                    {
                        System.Data.DataRow row = dtlDtl.Rows[i];
                        string attachName       = Convert.ToString(row["attachName"]);
                        if (string.IsNullOrWhiteSpace(attachName))
                        {
                            attachName = info.PrjName;
                        }
                        string attachGuid = Convert.ToString(row["attachGuid"]);
                        string url        = "https://www.szjsjy.com.cn:8001/file/downloadFile?fileId=" + attachGuid;
                        try
                        {
                            BaseAttach entity = null;
                            if (isUpdate)
                            {
                                entity = ToolHtml.GetBaseAttachByUrl(url, attachName, result, "SiteManage\\Files\\Attach\\");
                            }
                            else
                            {
                                entity = ToolHtml.GetBaseAttachByUrl(url, attachName, info.Id, "SiteManage\\Files\\Attach\\");
                            }
                            if (entity != null)
                            {
                                list.Add(entity);
                            }
                        }
                        catch { }
                    }
                }
            }
            if (list.Count > 0)
            {
                if (isUpdate)
                {
                    string delSql = string.Format("delete from BaseAttach where SourceID='{0}'", result);
                    ToolFile.Delete(result);
                    int count = ToolDb.ExecuteSql(delSql);
                }
                foreach (BaseAttach attach in list)
                {
                    ToolDb.SaveEntity(attach, "SourceID,AttachServerPath");
                }
            }
        }
Ejemplo n.º 14
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            int    pageInt = 1, sqlCount = 0;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8);
            }
            catch { return(null); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "huifont")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString();
                    temp    = temp.Substring(temp.IndexOf("/") + 1, temp.Length - temp.IndexOf("/") - 1);
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://www.hebggzy.cn/024/024002/" + i + ".html", Encoding.UTF8);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("li"), new HasAttributeFilter("class", "right-text-li")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        string headName = string.Empty, releaseTime = string.Empty, infoScorce = string.Empty, msgType = string.Empty, infoUrl = string.Empty, ctxHtml = string.Empty, infoCtx = string.Empty, infoType = string.Empty;

                        INode node = listNode[j];
                        ATag  aTag = node.GetATag();
                        infoType    = "通知公告";
                        headName    = aTag.GetAttribute("title");
                        releaseTime = node.ToPlainTextString().GetDateRegex();
                        infoUrl     = "http://www.hebggzy.cn" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(infoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "article-main")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            ctxHtml = dtlNode.AsHtml();
                            infoCtx = ctxHtml.ToCtxString();
                            msgType = "河北省公共资源交易中心";
                            NotifyInfo info = ToolDb.GenNotifyInfo(headName, releaseTime, infoScorce, msgType, infoUrl, ctxHtml, "河北省", "河北省及地市", "", infoCtx, infoType);
                            sqlCount++;
                            if (!crawlAll && sqlCount >= this.MaxCount)
                            {
                                return(null);
                            }
                            if (ToolDb.SaveEntity(info, this.ExistCompareFields, this.ExistsUpdate))
                            {
                                parser = new Parser(new Lexer(ctxHtml));
                                NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                                if (aNode != null && aNode.Count > 0)
                                {
                                    for (int k = 0; k < aNode.Count; k++)
                                    {
                                        ATag a = aNode[k] as ATag;
                                        if (a.Link.ToLower().Contains("download") || a.IsAtagAttach())
                                        {
                                            string link = string.Empty;
                                            if (a.Link.ToLower().Contains("http"))
                                            {
                                                link = a.Link;
                                            }
                                            else
                                            {
                                                link = "http://www.hebggzy.cn/" + a.Link.GetReplace("../,./");
                                            }
                                            if (Encoding.Default.GetByteCount(link) > 500)
                                            {
                                                continue;
                                            }
                                            try
                                            {
                                                BaseAttach attach = ToolHtml.GetBaseAttachByUrl(link, a.LinkText, info.Id);
                                                if (attach != null)
                                                {
                                                    ToolDb.SaveEntity(attach, "SourceID,AttachServerPath");
                                                }
                                            }
                                            catch { }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(null);
        }