Esempio n. 1
0
        public static ItemPlan ConvertToItemPlan(ItemPlanRequest request)
        {
            var item           = new ItemPlan();
            var accionPlaneada = new AccionPlaneada();

            accionPlaneada.Deliver(request.AccionPlaneada_Descripcion);
            var accionRealizada = new AccionRealizada();
            var evidencia       = new Evidencia();

            evidencia.Deliver(request.AccionRealizada_evidencia_Ruta);
            accionRealizada.Deliver(request.AccionRealizada_Descripcion, evidencia);
            item.Deliver(accionPlaneada, accionRealizada, request.PlanId);
            return(item);
        }
Esempio n. 2
0
        public static List <string> CanConvertToItemPlan(ItemPlanUpdateRequest request)
        {
            var errors         = new List <string>();
            var item           = new ItemPlan();
            var accionPlaneada = new AccionPlaneada();

            errors.AddRange(accionPlaneada.CanDeliver(request.AccionPlaneada_Descripcion));
            var accionRealizada = new AccionRealizada();
            var evidencia       = new Evidencia();

            errors.AddRange(evidencia.CanDeliver(request.AccionRealizada_evidencia_Ruta));
            errors.AddRange(accionRealizada.CanDeliver(request.AccionRealizada_Descripcion, evidencia));
            errors.AddRange(item.CanDeliver(accionPlaneada, accionRealizada));
            return(errors);
        }
Esempio n. 3
0
        public static PlanAccion CreatePlanAccion()
        {
            var actividad      = ActividadMother.CreateActividad();
            var accionPlaneada = new AccionPlaneada();

            accionPlaneada.Deliver("Se describe lo planeado");
            var accionRealizada = new AccionRealizada();
            var evidencia       = new Evidencia();

            evidencia.Deliver("loquesea/dir");
            accionRealizada.Deliver("Se describe lo realizado", evidencia);
            var itemPlan = new ItemPlan();

            itemPlan.Deliver(accionPlaneada, accionRealizada, 0);
            var items = new List <ItemPlan>();

            items.Add(itemPlan);
            var planAccion = new PlanAccion(items, actividad);

            return(planAccion);
        }
Esempio n. 4
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <ItemPlan>();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl);
            }
            catch { return(null); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("id", "GridView1_ctl21_labCountInfo")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("/", "每");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__EVENTTARGET",
                        "__EVENTARGUMENT",
                        "__LASTFOCUS",
                        "__VIEWSTATE",
                        "__VIEWSTATEGENERATOR",
                        "__EVENTVALIDATION"
                    }, new string[] {
                        "GridView1$ctl21$lbNext",
                        "", "",
                        viewState,
                        "44ED84FE",
                        eventValidation
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "GridView1")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 1; j < table.RowCount - 1; j++)
                    {
                        string ItemCode = string.Empty, ItemName = string.Empty, ItemAddress = string.Empty, BuildUnit = string.Empty, BuildNature = string.Empty, TotalInvest = string.Empty, PlanInvest = string.Empty, IssuedPlan = string.Empty, InvestSource = string.Empty, ApprovalUnit = string.Empty, ApprovalDate = string.Empty, ApprovalCode = string.Empty, MsgUnit = string.Empty, PlanDate = string.Empty, PlanType = string.Empty, PlanBeginDate = string.Empty, PlanEndDate = string.Empty, CtxHtml = string.Empty, ItemCtx = string.Empty, ItemContent = string.Empty, InfoUrl = string.Empty, MsgType = string.Empty, Area = string.Empty;

                        TableRow tr        = table.Rows[j];
                        ATag     aTag      = tr.Columns[0].GetATag();
                        Parser   divPerser = new Parser(new Lexer(tr.Columns[0].ToHtml()));
                        NodeList divNode   = divPerser.ExtractAllNodesThatMatch(new TagNameFilter("div"));
                        if (divNode != null && divNode.Count > 0)
                        {
                            ItemName = (divNode[0] as Div).GetAttribute("title");
                        }
                        else
                        {
                            ItemName = aTag.LinkText;
                        }
                        Area     = tr.Columns[1].ToNodePlainString();
                        PlanDate = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        InfoUrl  = "http://222.168.7.143:8888/er/AttachManage/ProjectPublic/" + aTag.Link.Replace("../", "");
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "682")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            CtxHtml = dtlNode[0].ToHtml();
                            ItemCtx = CtxHtml.ToCtxString();
                            string ctx = string.Empty;
                            for (int q = 1; q < dtlNode.Count; q++)
                            {
                                TableTag tag = dtlNode[q] as TableTag;
                                for (int r = 0; r < tag.RowCount; r++)
                                {
                                    for (int c = 0; c < tag.Rows[r].ColumnCount; c++)
                                    {
                                        string temp = tag.Rows[r].Columns[c].ToNodePlainString();
                                        if (r == 0 && c == 0)
                                        {
                                            continue;
                                        }
                                        if (r == 0)
                                        {
                                            if ((c + 1) % 2 == 0)
                                            {
                                                ctx += temp.GetReplace(":,:") + ":";
                                            }
                                            else
                                            {
                                                ctx += temp.GetReplace(":,:") + "\r\n";
                                            }
                                        }
                                        else
                                        {
                                            if ((c + 1) % 2 == 0)
                                            {
                                                ctx += temp.GetReplace(":,:") + "\r\n";
                                            }
                                            else
                                            {
                                                ctx += temp.GetReplace(":,:") + ":";
                                            }
                                        }
                                    }
                                }
                            }
                            ItemCode     = ctx.GetCodeRegex();
                            ItemContent  = ctx.GetRegex("建设内容", true, 500);
                            ApprovalCode = ctx.GetRegex("文号");
                            ApprovalDate = ctx.GetRegex("批复时间");
                            ItemAddress  = ctx.GetAddressRegex();
                            PlanType     = "项目公开";
                            MsgType      = "吉林省发展和改革委员会";

                            ItemPlan info = ToolDb.GenItemPlan("吉林省", "吉林省及地市", Area, ItemCode, ItemName, ItemAddress, BuildUnit, BuildNature, TotalInvest, PlanInvest, IssuedPlan, InvestSource, ApprovalUnit, ApprovalDate, ApprovalCode, MsgUnit, PlanDate, PlanType, PlanBeginDate, PlanEndDate, CtxHtml, ItemCtx, ItemContent, MsgType, InfoUrl);

                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Esempio n. 5
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <ItemPlan>();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 27;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl);
            }
            catch { return(null); }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://www.hbfgw.gov.cn/hqfw/xmgg/xmkzgg/index_" + (i - 1).ToString() + ".shtml");
                    }
                    catch { continue; }
                }
                Parser   parser   = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "mytable")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string   ItemCode = string.Empty, ItemName = string.Empty, ItemAddress = string.Empty, BuildUnit = string.Empty, BuildNature = string.Empty, TotalInvest = string.Empty, PlanInvest = string.Empty, IssuedPlan = string.Empty, InvestSource = string.Empty, ApprovalUnit = string.Empty, ApprovalDate = string.Empty, ApprovalCode = string.Empty, MsgUnit = string.Empty, PlanDate = string.Empty, PlanType = string.Empty, PlanBeginDate = string.Empty, PlanEndDate = string.Empty, CtxHtml = string.Empty, ItemCtx = string.Empty, ItemContent = string.Empty, InfoUrl = string.Empty, MsgType = string.Empty;
                        TableRow tr = table.Rows[j];
                        ItemCode = tr.Columns[0].ToNodePlainString().GetReplace("('无')").GetReplace("('", "kdxx").GetReplace("')", "xxdk").GetRegexBegEnd("kdxx", "xxdk");
                        ATag aTag = tr.Columns[1].GetATag();
                        ItemName     = aTag.LinkText;
                        ApprovalUnit = tr.Columns[2].ToNodePlainString();
                        PlanDate     = tr.Columns[3].ToPlainTextString().GetDateRegex();
                        InfoUrl      = "http://www.hbfgw.gov.cn/hqfw/xmgg/xmkzgg/" + aTag.Link.GetReplace("../,./");
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "appendixDiv")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            parser = new Parser(new Lexer(htmldtl));
                            NodeList hNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("h1"));
                            if (hNode != null && hNode.Count > 0)
                            {
                                string temp = hNode[0].ToNodePlainString();
                                ItemName = string.IsNullOrEmpty(temp) ? ItemName : temp;
                            }
                            ItemName = ItemName.GetReplace("省发改委批复,省发改委核准");
                            CtxHtml  = dtlNode.AsHtml().Replace("none", "block");
                            ItemCtx  = CtxHtml.ToCtxString();

                            string        imgUrl = InfoUrl.Substring(0, InfoUrl.LastIndexOf("/"));
                            List <string> attach = new List <string>();
                            parser = new Parser(new Lexer(CtxHtml));
                            NodeList imgNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("img"));
                            if (imgNode != null && imgNode.Count > 0)
                            {
                                for (int p = 0; p < imgNode.Count; p++)
                                {
                                    ImageTag img  = imgNode[p] as ImageTag;
                                    string   link = imgUrl + "/" + img.ImageURL.GetReplace("../,./");
                                    CtxHtml = CtxHtml.GetReplace(img.ImageURL, link);
                                    attach.Add(link);
                                }
                            }
                            PlanType = "项目核准信息";
                            MsgType  = "湖北省发展和改革委员会";

                            ItemPlan info = ToolDb.GenItemPlan("湖北省", "湖北省及地市", "", ItemCode, ItemName, ItemAddress, BuildUnit, BuildNature, TotalInvest, PlanInvest, IssuedPlan, InvestSource, ApprovalUnit, ApprovalDate, ApprovalCode, MsgUnit, PlanDate, PlanType, PlanBeginDate, PlanEndDate, CtxHtml, ItemCtx, ItemContent, MsgType, InfoUrl);
                            list.Add(info);
                            if (attach.Count > 0)
                            {
                                for (int a = 0; a < attach.Count; a++)
                                {
                                    BaseAttach entity = ToolDb.GenBaseAttach(ItemName, info.Id, attach[a]);
                                    base.AttachList.Add(entity);
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Esempio n. 6
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <ItemPlan>();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl);
            }
            catch { return(null); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "page")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("/", "页");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "?page=" + i);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "list")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 1; j < table.RowCount - 2; j++)
                    {
                        string ItemCode = string.Empty, ItemName = string.Empty, ItemAddress = string.Empty, BuildUnit = string.Empty, BuildNature = string.Empty, TotalInvest = string.Empty, PlanInvest = string.Empty, IssuedPlan = string.Empty, InvestSource = string.Empty, ApprovalUnit = string.Empty, ApprovalDate = string.Empty, ApprovalCode = string.Empty, MsgUnit = string.Empty, PlanDate = string.Empty, PlanType = string.Empty, PlanBeginDate = string.Empty, PlanEndDate = string.Empty, CtxHtml = string.Empty, ItemCtx = string.Empty, ItemContent = string.Empty, InfoUrl = string.Empty, MsgType = string.Empty;

                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[0].GetATag();
                        ItemName = tr.Columns[0].GetAttribute("title");
                        ItemCode = tr.Columns[1].ToNodePlainString();
                        PlanDate = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        InfoUrl  = "http://www.ahpc.gov.cn/zwgk/" + aTag.Link.GetReplace("../,./");
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "content")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            CtxHtml      = dtlNode.AsHtml();
                            ItemCtx      = CtxHtml.ToLower().GetReplace("</p>,<br/>,<br>", "\r\n").ToCtxString();
                            BuildUnit    = ItemCtx.GetBuildRegex();
                            ItemAddress  = ItemCtx.GetAddressRegex();
                            ItemContent  = ItemCtx.GetRegex("内容", true, 1000);
                            InvestSource = ItemCtx.GetRegex("资金来源", true, 40);
                            TotalInvest  = ItemCtx.GetRegexBegEnd("投资", "万元").GetChina();
                            MsgUnit      = "社会发展处";
                            ApprovalUnit = ItemCtx.GetRegex("主办处室");
                            PlanType     = "项目公示";
                            MsgType      = "安徽省发展和改革委员会";
                            ItemPlan info = ToolDb.GenItemPlan("安徽省", "安徽省及地市", "", ItemCode, ItemName, ItemAddress, BuildUnit, BuildNature, TotalInvest, PlanInvest, IssuedPlan, InvestSource, ApprovalUnit, ApprovalDate, ApprovalCode, MsgUnit, PlanDate, PlanType, PlanBeginDate, PlanEndDate, CtxHtml, ItemCtx, ItemContent, MsgType, InfoUrl);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Esempio n. 7
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <ItemPlan>();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "-1");
            }
            catch { return(null); }

            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "m_COUNT")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode[0].ToNodePlainString().GetRegexBegEnd("/", ")");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + ((i - 1) * 24));
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "m_TAB")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        string ItemCode = string.Empty, ItemName = string.Empty, ItemAddress = string.Empty, BuildUnit = string.Empty, BuildNature = string.Empty, TotalInvest = string.Empty, PlanInvest = string.Empty, IssuedPlan = string.Empty, InvestSource = string.Empty, ApprovalUnit = string.Empty, ApprovalDate = string.Empty, ApprovalCode = string.Empty, MsgUnit = string.Empty, PlanDate = string.Empty, PlanType = string.Empty, PlanBeginDate = string.Empty, PlanEndDate = string.Empty, CtxHtml = string.Empty, ItemCtx = string.Empty, ItemContent = string.Empty, InfoUrl = string.Empty, MsgType = string.Empty;

                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[1].GetATag();
                        if (aTag == null)
                        {
                            continue;
                        }
                        ItemName = tr.Columns[1].ToNodePlainString();
                        if (ItemName.Contains("..."))
                        {
                            aTag.GetAttribute("title");
                        }
                        PlanDate = "20" + tr.Columns[2].ToPlainTextString().GetDateRegex("yy-MM-dd");

                        InfoUrl = "http://www.scdrc.gov.cn" + aTag.Link;//aTag.Link.GetReplace(".htm", "_1.htm");

                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch
                        {
                            continue;
                        }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList IsNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("iframe"), new HasAttributeFilter("id", "m_FRAME")));
                        if (IsNode != null && IsNode.Count > 0)
                        {
                            try
                            {
                                InfoUrl = "http://www.scdrc.gov.cn" + aTag.Link.GetReplace(".htm", "_1.htm");
                                htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                            }
                            catch { continue; }
                        }


                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "m_TEXT")));
                        if (dtlNode == null || dtlNode.Count < 1)
                        {
                            parser.Reset();
                            dtlNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("body"));
                        }
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            CtxHtml = dtlNode.AsHtml();
                            parser  = new Parser(new Lexer(CtxHtml));
                            NodeList tableNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));
                            if (tableNode != null && tableNode.Count > 0)
                            {
                                TableTag tag = tableNode[0] as TableTag;
                                for (int r = 0; r < tag.RowCount; r++)
                                {
                                    for (int c = 0; c < tag.Rows[r].ColumnCount; c++)
                                    {
                                        string temp = tag.Rows[r].Columns[c].ToNodePlainString();
                                        if ((c + 1) % 2 == 0)
                                        {
                                            ItemCtx += temp.GetReplace(":,:") + "\r\n";
                                        }
                                        else
                                        {
                                            ItemCtx += temp.GetReplace(":,:") + ":";
                                        }
                                    }
                                }
                            }
                            else
                            {
                                ItemCtx = CtxHtml.ToCtxString();
                            }
                            ItemContent   = ItemCtx.GetRegex("内容", true, 1000);
                            ApprovalUnit  = ItemCtx.GetRegex("批复单位");
                            ApprovalDate  = ItemCtx.GetRegex("批复日期,批复时间");
                            ApprovalCode  = ItemCtx.GetRegex("批复文号(备案号)");
                            TotalInvest   = ItemCtx.GetRegex("总投资").GetMoney();
                            PlanBeginDate = ItemCtx.GetRegex("开工时间");
                            ItemAddress   = ItemCtx.GetRegex("所属地区");
                            PlanType      = ItemCtx.GetRegex("项目类型");
                            MsgType       = "四川省发展和改革委员会";
                            ItemName      = ItemName.GetReplace("四川省发展和改革委员会");
                            ItemPlan info = ToolDb.GenItemPlan("四川省", "四川省及地市", "", ItemCode, ItemName, ItemAddress, BuildUnit, BuildNature, TotalInvest, PlanInvest, IssuedPlan, InvestSource, ApprovalUnit, ApprovalDate, ApprovalCode, MsgUnit, PlanDate, PlanType, PlanBeginDate, PlanEndDate, CtxHtml, ItemCtx, ItemContent, MsgType, InfoUrl);
                            parser = new Parser(new Lexer(CtxHtml));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.scdrc.gov.cn/dir1111/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }

            return(list);
        }
Esempio n. 8
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <ItemPlan>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "page")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("/", "页").GetReplace("(");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "/p/" + i + ".html");
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "list")), true), new TagNameFilter("li")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        string ItemCode = string.Empty, ItemName = string.Empty, ItemAddress = string.Empty, BuildUnit = string.Empty, BuildNature = string.Empty, TotalInvest = string.Empty, PlanInvest = string.Empty, IssuedPlan = string.Empty, InvestSource = string.Empty, ApprovalUnit = string.Empty, ApprovalDate = string.Empty, ApprovalCode = string.Empty, MsgUnit = string.Empty, PlanDate = string.Empty, PlanType = string.Empty, PlanBeginDate = string.Empty, PlanEndDate = string.Empty, CtxHtml = string.Empty, ItemCtx = string.Empty, ItemContent = string.Empty, InfoUrl = string.Empty, MsgType = string.Empty;

                        INode node = listNode[j];
                        ATag  aTag = node.GetATag();
                        if (aTag == null)
                        {
                            continue;
                        }
                        ItemName = aTag.GetAttribute("title").GetReplace("甘肃省发展和改革委员会");
                        PlanDate = node.ToPlainTextString().GetDateRegex();
                        InfoUrl  = "http://www.gsdrc.gov.cn" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "content")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            CtxHtml     = dtlNode.AsHtml();
                            ItemCtx     = CtxHtml.ToCtxString();
                            TotalInvest = ItemCtx.GetRegexBegEnd("总投资", "万元");
                            ItemCode    = ItemCtx.GetRegex("项目编码");

                            PlanType = "项目审批与核准";
                            MsgType  = "甘肃省发展和改革委员会";
                            ItemPlan info = ToolDb.GenItemPlan("甘肃省", "甘肃省及地市", "", ItemCode, ItemName, ItemAddress, BuildUnit, BuildNature, TotalInvest, PlanInvest, IssuedPlan, InvestSource, ApprovalUnit, ApprovalDate, ApprovalCode, MsgUnit, PlanDate, PlanType, PlanBeginDate, PlanEndDate, CtxHtml, ItemCtx, ItemContent, MsgType, InfoUrl);

                            list.Add(info);
                            parser = new Parser(new Lexer(CtxHtml));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.gsdrc.gov.cn/" + a.Link.GetReplace("../,./");
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Esempio n. 9
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <ItemPlan>();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                string postUrl = string.Empty;
                if (this.MaxCount > 50)
                {
                    postUrl = "http://www.lg.gov.cn/module/jslib/jquery/jpage/dataproxy.jsp?startrecord=1&endrecord=181&perpage=181";
                }
                else
                {
                    postUrl = "http://www.lg.gov.cn/module/jslib/jquery/jpage/dataproxy.jsp?startrecord=1&endrecord=" + this.MaxCount + "&perpage=" + this.MaxCount;
                }
                NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                    "appid",
                    "webid",
                    "path",
                    "col",
                    "columnid",
                    "sourceContentType",
                    "unitid",
                    "webname",
                    "permissiontype"
                }, new string[] {
                    "1",
                    "1",
                    "/",
                    "1",
                    "6802",
                    "1",
                    "9393",
                    "龙岗政府在线",
                    "0"
                });
                html = this.ToolWebSite.GetHtmlByUrl(postUrl, nvc);
                Regex  reg = new Regex("(?<=(kdxx))[.\\s\\S]*?(?=(xxdk))", RegexOptions.Multiline | RegexOptions.Singleline);
                string c   = reg.Match(html.Replace("['", "kdxx").Replace("']", "xxdk")).Value.Replace("kdxx", "").Replace("xxdk", "").Replace("','", "");
                html = "<table>" + c + "</table>";
            }
            catch { }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList listNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));

            if (listNode != null && listNode.Count > 0)
            {
                TableTag table = listNode[0] as TableTag;
                for (int j = 0; j < table.RowCount; j++)
                {
                    TableRow tr = table.Rows[j];
                    string   ItemCode = string.Empty, ItemName = string.Empty, ItemAddress = string.Empty, BuildUnit = string.Empty, BuildNature = string.Empty, TotalInvest = string.Empty, PlanInvest = string.Empty, IssuedPlan = string.Empty, InvestSource = string.Empty, ApprovalUnit = string.Empty, ApprovalDate = string.Empty, ApprovalCode = string.Empty, MsgUnit = string.Empty, PlanDate = string.Empty, PlanType = string.Empty, PlanBeginDate = string.Empty, PlanEndDate = string.Empty, CtxHtml = string.Empty, ItemCtx = string.Empty, ItemContent = string.Empty, InfoUrl = string.Empty, MsgType = string.Empty;

                    ATag aTag = tr.Columns[0].GetATag();
                    ItemName = aTag.GetAttribute("title");
                    PlanDate = tr.Columns[1].ToPlainTextString().GetDateRegex();
                    InfoUrl  = aTag.Link;
                    string htmldtl = string.Empty;
                    try
                    {
                        htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8);
                    }
                    catch
                    {
                        continue;
                    }
                    parser = new Parser(new Lexer(htmldtl));
                    NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "zoom")));
                    if (dtlNode != null && dtlNode.Count > 0)
                    {
                        CtxHtml  = dtlNode.AsHtml();
                        ItemCtx  = CtxHtml.ToCtxString();
                        PlanType = "项目核准信息";
                        MsgType  = "深圳市龙岗区发改局";

                        ItemPlan info = ToolDb.GenItemPlan("广东省", "深圳市区", "龙岗区", ItemCode, ItemName, ItemAddress, BuildUnit, BuildNature, TotalInvest, PlanInvest, IssuedPlan, InvestSource, ApprovalUnit, ApprovalDate, ApprovalCode, MsgUnit, PlanDate, PlanType, PlanBeginDate, PlanEndDate, CtxHtml, ItemCtx, ItemContent, MsgType, InfoUrl);

                        list.Add(info);
                        if (!crawlAll && list.Count >= this.MaxCount)
                        {
                            return(list);
                        }
                    }
                }
            }

            return(list);
        }
Esempio n. 10
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <ItemPlan>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                    "appid",
                    "webid",
                    "path",
                    "columnid",
                    "sourceContentType",
                    "unitid",
                    "webname",
                    "permissiontype"
                },
                                                                                  new string[] {
                    "1",
                    "1",
                    "/",
                    "808",
                    "1",
                    "620",
                    "浙江省发展和改革委员会",
                    "0"
                });
                string post = "appid=1&webid=1&path=%2F&columnid=808&sourceContentType=1&unitid=620&webname=浙江省发展和改革委员会&permissiontype=0";
                html = ToolHtml.GetHtmlGJByUrlPost(this.SiteUrl, post, Encoding.UTF8, "");//this.ToolWebSite.GetHtmlByUrl("http://www.zjdpc.gov.cn/col/col808/index.html", Encoding.UTF8, ref cookiestr);
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc);
            }
            catch {  }

            try
            {
                string temp = html.GetRegexBegEnd("totalPage", ";").GetReplace("=");
                pageInt = int.Parse(temp);
            }
            catch { }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "col",
                        "appid",
                        "webid",
                        "path",
                        "columnid",
                        "sourceContentType",
                        "unitid",
                        "webname",
                        "permissiontype"
                    },
                                                                                      new string[] {
                        "1",
                        "1",
                        "1",
                        "/",
                        "808",
                        "1",
                        "620",
                        "浙江省发展和改革委员会",
                        "0"
                    });
                    try
                    {
                        int endrecord   = i * 45;
                        int startrecord = 45 * i - 44;
                        html = this.ToolWebSite.GetHtmlByUrl("http://www.zjdpc.gov.cn/module/jslib/jquery/jpage/dataproxy.jsp?perpage=15&endrecord=" + endrecord + "&startrecord=" + startrecord, nvc);
                    }
                    catch { continue; }
                }

                Parser   parser   = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        string ItemCode = string.Empty, ItemName = string.Empty, ItemAddress = string.Empty, BuildUnit = string.Empty, BuildNature = string.Empty, TotalInvest = string.Empty, PlanInvest = string.Empty, IssuedPlan = string.Empty, InvestSource = string.Empty, ApprovalUnit = string.Empty, ApprovalDate = string.Empty, ApprovalCode = string.Empty, MsgUnit = string.Empty, PlanDate = string.Empty, PlanType = string.Empty, PlanBeginDate = string.Empty, PlanEndDate = string.Empty, CtxHtml = string.Empty, ItemCtx = string.Empty, ItemContent = string.Empty, InfoUrl = string.Empty, MsgType = string.Empty;

                        TableRow tr   = (listNode[j] as TableTag).Rows[0];
                        ATag     aTag = tr.Columns[1].GetATag();
                        ItemName = aTag.GetAttribute("title").GetReplace("省发改委,\\,'");
                        PlanDate = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        InfoUrl  = "http://www.zjdpc.gov.cn" + aTag.Link.GetReplace("\\,'");
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "zoom")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            CtxHtml     = dtlNode.AsHtml();
                            ItemCtx     = CtxHtml.ToCtxString().GetReplace("begin-->,&ldquo;,&rdquo;,end-->");
                            TotalInvest = ItemCtx.GetRegexBegEnd("总投资", "万元");
                            MsgType     = "浙江省公共资源交易中心";
                            PlanType    = "项目审批信息";
                            ItemPlan info = ToolDb.GenItemPlan("浙江省", "浙江省及地市", "", ItemCode, ItemName, ItemAddress, BuildUnit, BuildNature, TotalInvest, PlanInvest, IssuedPlan, InvestSource, ApprovalUnit, ApprovalDate, ApprovalCode, MsgUnit, PlanDate, PlanType, PlanBeginDate, PlanEndDate, CtxHtml, ItemCtx, ItemContent, MsgType, InfoUrl);
                            list.Add(info);
                            parser = new Parser(new Lexer(CtxHtml));
                            NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                            if (aNode != null && aNode.Count > 0)
                            {
                                for (int k = 0; k < aNode.Count; k++)
                                {
                                    ATag a = aNode[k] as ATag;
                                    if (a.IsAtagAttach())
                                    {
                                        string link = string.Empty;
                                        if (a.Link.ToLower().Contains("http"))
                                        {
                                            link = a.Link;
                                        }
                                        else
                                        {
                                            link = "http://www.zjdpc.gov.cn/" + a.Link;
                                        }
                                        BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                        base.AttachList.Add(attach);
                                    }
                                }
                            }
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Esempio n. 11
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <ItemPlan>();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "table25")));

            if (listNode != null && listNode.Count > 0)
            {
                TableTag table = listNode[listNode.Count - 1] as TableTag;
                for (int j = 1; j < table.RowCount; j++)
                {
                    TableRow tr = table.Rows[j];
                    string   ItemCode = string.Empty, ItemName = string.Empty, ItemAddress = string.Empty, BuildUnit = string.Empty, BuildNature = string.Empty, TotalInvest = string.Empty, PlanInvest = string.Empty, IssuedPlan = string.Empty, InvestSource = string.Empty, ApprovalUnit = string.Empty, ApprovalDate = string.Empty, ApprovalCode = string.Empty, MsgUnit = string.Empty, PlanDate = string.Empty, PlanType = string.Empty, PlanBeginDate = string.Empty, PlanEndDate = string.Empty, CtxHtml = string.Empty, ItemCtx = string.Empty, ItemContent = string.Empty, InfoUrl = string.Empty, MsgType = string.Empty;


                    ATag aTag = tr.GetATag();
                    ItemName = aTag.LinkText.ToNodeString().GetReplace("   , ");
                    ItemCode = tr.Columns[0].ToNodePlainString().GetRegexBegEnd("【", "】").GetReplace("项目编号:");
                    InfoUrl  = "http://www.ztzl.qhfgw.gov.cn/xmjcb/xmxxgk/" + aTag.Link.GetReplace("../,./");
                    string htmldtl = string.Empty;
                    try
                    {
                        htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                    }
                    catch { continue; }

                    parser = new Parser(new Lexer(htmldtl));
                    NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "table143")));
                    if (dtlNode != null && dtlNode.Count > 0)
                    {
                        CtxHtml     = dtlNode.AsHtml();
                        ItemCtx     = CtxHtml.Replace("</p>", "\r\n").Replace("</tr>", "\r\n").ToCtxString();
                        TotalInvest = ItemCtx.GetRegexBegEnd("总投资", "万元");
                        PlanDate    = ItemCtx.GetDateRegex();
                        PlanType    = "项目信息";
                        MsgType     = "青海省发展和改革委员会";
                        ItemPlan info = ToolDb.GenItemPlan("青海省", "青海省及地市", "", ItemCode, ItemName, ItemAddress, BuildUnit, BuildNature, TotalInvest, PlanInvest, IssuedPlan, InvestSource, ApprovalUnit, ApprovalDate, ApprovalCode, MsgUnit, PlanDate, PlanType, PlanBeginDate, PlanEndDate, CtxHtml, ItemCtx, ItemContent, MsgType, InfoUrl);
                        list.Add(info);
                        parser = new Parser(new Lexer(CtxHtml));
                        NodeList aNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("a"));
                        if (aNode != null && aNode.Count > 0)
                        {
                            for (int k = 0; k < aNode.Count; k++)
                            {
                                ATag a = aNode[k] as ATag;
                                if (a.IsAtagAttach())
                                {
                                    string link = string.Empty;
                                    if (a.Link.ToLower().Contains("http"))
                                    {
                                        link = a.Link;
                                    }
                                    else
                                    {
                                        link = "http://www.ztzl.qhfgw.gov.cn/" + a.Link.GetReplace("../,./");
                                    }
                                    if (Encoding.Default.GetByteCount(link) > 500)
                                    {
                                        continue;
                                    }
                                    BaseAttach attach = ToolDb.GenBaseAttach(a.LinkText, info.Id, link);
                                    base.AttachList.Add(attach);
                                }
                            }
                        }
                        if (!crawlAll && list.Count >= this.MaxCount)
                        {
                            return(list);
                        }
                    }
                }
            }
            return(list);
        }
Esempio n. 12
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList list = new List <ItemPlan>();
            Dictionary <string, string> dic = GetCityList();

            if (dic == null || dic.Count < 1)
            {
                return(list);
            }

            foreach (string key in dic.Keys)
            {
                string html = string.Empty;
                string cookiestr = string.Empty;
                string viewState = string.Empty;
                int    pageInt = 1, sqlCount = 0;
                string eventValidation = string.Empty;
                try
                {
                    this.ToolWebSite.GetHtmlByUrl(dic[key], Encoding.UTF8, ref cookiestr);
                    html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8, ref cookiestr);
                }
                catch { }
                Parser   parser   = new Parser(new Lexer(html));
                NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "badoo")), true), new TagNameFilter("a")));
                if (pageNode != null && pageNode.Count > 0)
                {
                    try
                    {
                        string temp = pageNode[pageNode.Count - 1].GetATag().Link.Replace("javascript", "").Replace("jumpPage(", "").Replace(")", "");
                        pageInt = int.Parse(temp);
                    }
                    catch { }
                }
                for (int i = 1; i <= pageInt; i++)
                {
                    if (i > 1)
                    {
                        NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] { "param.name", "param.proofCode", "page.pageNo", "page.orderBy", "page.order" }, new string[] {
                            "", "", i.ToString(), "", ""
                        });
                        try
                        {
                            html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8, ref cookiestr);
                        }
                        catch { continue; }
                    }
                    parser = new Parser(new Lexer(html));
                    NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "hytab")));
                    if (listNode != null && listNode.Count > 0)
                    {
                        TableTag table = listNode[0] as TableTag;
                        for (int j = 1; j < table.RowCount; j++)
                        {
                            string   ItemCode = string.Empty, ItemName = string.Empty, ItemAddress = string.Empty, BuildUnit = string.Empty, BuildNature = string.Empty, TotalInvest = string.Empty, PlanInvest = string.Empty, IssuedPlan = string.Empty, InvestSource = string.Empty, ApprovalUnit = string.Empty, ApprovalDate = string.Empty, ApprovalCode = string.Empty, MsgUnit = string.Empty, PlanDate = string.Empty, PlanType = string.Empty, PlanBeginDate = string.Empty, PlanEndDate = string.Empty, CtxHtml = string.Empty, ItemCtx = string.Empty, ItemContent = string.Empty, InfoUrl = string.Empty, MsgType = string.Empty;
                            TableRow tr = table.Rows[j];
                            ItemName = tr.Columns[0].ToNodePlainString();
                            PlanDate = tr.Columns[2].ToPlainTextString().GetDateRegex();
                            InfoUrl  = "http://www.gdtz.gov.cn" + tr.Columns[0].GetATagHref();
                            string htmldtl = string.Empty;
                            try
                            {
                                htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                            }
                            catch { continue; }
                            parser = new Parser(new Lexer(htmldtl));
                            NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "xmgknr")));
                            if (dtlNode != null && dtlNode.Count > 0)
                            {
                                CtxHtml = dtlNode.AsHtml();
                                parser  = new Parser(new Lexer(CtxHtml));
                                NodeList tableNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));
                                if (tableNode != null && tableNode.Count > 0)
                                {
                                    TableTag tab = tableNode[0] as TableTag;
                                    for (int k = 1; k < tab.RowCount; k++)
                                    {
                                        TableRow dr = tab.Rows[k];
                                        if (dr.ColumnCount < 2)
                                        {
                                            break;
                                        }
                                        try
                                        {
                                            ItemCtx += dr.Columns[0].ToNodePlainString() + ":";
                                            ItemCtx += dr.Columns[1].ToNodePlainString() + "\r\n";
                                        }
                                        catch (Exception ex) {
                                            Logger.Error(InfoUrl + ItemName + key + i);
                                            Logger.Error(ex);
                                        }
                                    }
                                }
                                else
                                {
                                    ItemCtx = CtxHtml.ToCtxString();
                                }
                                ApprovalCode = ItemCtx.GetRegex("备案项目编号");
                                ItemAddress  = ItemCtx.GetRegex("项目所在地");
                                TotalInvest  = ItemCtx.GetRegex("项目总投资").Replace("万元", "").Replace("万", "");
                                ItemContent  = ItemCtx.GetRegex("项目规模及内容");
                                ApprovalUnit = ItemCtx.GetRegex("备案机关");
                                ApprovalDate = ItemCtx.GetRegex("复核通过日期");
                                string   temp     = ItemCtx.GetRegex("项目起止年限");
                                string[] tempPlan = temp.Split('-');
                                if (tempPlan.Length == 2)
                                {
                                    PlanBeginDate = tempPlan[0];
                                    PlanEndDate   = tempPlan[1];
                                }
                                PlanType = "项目公开";
                                MsgType  = "广东省发展和改革委员会";
                                string city = key;
                                if (key.Contains("顺德"))
                                {
                                    city = "佛山市区";
                                }

                                ItemPlan info = ToolDb.GenItemPlan("广东省", city, "", ItemCode, ItemName, ItemAddress, BuildUnit, BuildNature, TotalInvest, PlanInvest, IssuedPlan, InvestSource, ApprovalUnit, ApprovalDate, ApprovalCode, MsgUnit, PlanDate, PlanType, PlanBeginDate, PlanEndDate, CtxHtml, ItemCtx, ItemContent, MsgType, InfoUrl);
                                list.Add(info);
                                sqlCount++;
                                if (!crawlAll && sqlCount >= this.MaxCount)
                                {
                                    goto type;
                                }
                            }
                        }
                    }
                }
                type : continue;
            }
            return(list);
        }
Esempio n. 13
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <ItemPlan>();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "arContent")));

            if (pageNode != null && pageNode.Count > 0)
            {
                TableTag pageTable = pageNode[0] as TableTag;
                string   temp      = pageTable.Rows[pageTable.RowCount - 1].ToNodePlainString().Replace("createPageHTML", "").Replace("0,", "").Replace("(", "").Replace(")", "").Replace("index", "").Replace("htm", "").Replace(",", "").Replace("\"", "").Replace(";", "").Trim();
                try
                {
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "/index_" + (i - 1).ToString() + ".htm", Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "arContent")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 1; j < table.RowCount - 1; j++)
                    {
                        TableRow tr = table.Rows[j];
                        string   ItemCode = string.Empty, ItemName = string.Empty, ItemAddress = string.Empty, BuildUnit = string.Empty, BuildNature = string.Empty, TotalInvest = string.Empty, PlanInvest = string.Empty, IssuedPlan = string.Empty, InvestSource = string.Empty, ApprovalUnit = string.Empty, ApprovalDate = string.Empty, ApprovalCode = string.Empty, MsgUnit = string.Empty, PlanDate = string.Empty, PlanType = string.Empty, PlanBeginDate = string.Empty, PlanEndDate = string.Empty, CtxHtml = string.Empty, ItemCtx = string.Empty, ItemContent = string.Empty, InfoUrl = string.Empty, MsgType = string.Empty;

                        ATag aTag = tr.Columns[1].GetATag();
                        ItemName = aTag.GetAttribute("title");
                        ItemCode = tr.Columns[2].ToNodePlainString();
                        PlanDate = tr.Columns[3].ToPlainTextString().GetDateRegex();

                        InfoUrl = this.SiteUrl + aTag.Link.Replace("../", "").Replace("./", "");
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "detail")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            CtxHtml = dtlNode.AsHtml();
                            ItemCtx = CtxHtml.ToCtxString();
                            string ctx = string.Empty;
                            parser = new Parser(new Lexer(CtxHtml));
                            NodeList dtlTable = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "100%")));
                            if (dtlTable != null && dtlTable.Count > 0)
                            {
                                TableTag tableTag = dtlTable[0] as TableTag;
                                for (int k = 0; k < tableTag.RowCount; k++)
                                {
                                    for (int c = 0; c < tableTag.Rows[k].ColumnCount; c++)
                                    {
                                        if (c % 2 == 0)
                                        {
                                            ctx += tableTag.Rows[k].Columns[c].ToNodePlainString().Replace(":", "").Replace(":", "") + ":";
                                        }
                                        else
                                        {
                                            ctx += tableTag.Rows[k].Columns[c].ToNodePlainString() + "\r\n";
                                        }
                                    }
                                }
                            }

                            MsgUnit = ctx.GetRegex("发布单位");
                            if (string.IsNullOrEmpty(MsgUnit))
                            {
                                MsgUnit = "发改委";
                            }
                            PlanType = "项目审批信息";
                            MsgType  = "深圳市发展和改革委员会";

                            ItemPlan info = ToolDb.GenItemPlan("广东省", "深圳市区", "", ItemCode, ItemName, ItemAddress, BuildUnit, BuildNature, TotalInvest, PlanInvest, IssuedPlan, InvestSource, ApprovalUnit, ApprovalDate, ApprovalCode, MsgUnit, PlanDate, PlanType, PlanBeginDate, PlanEndDate, CtxHtml, ItemCtx, ItemContent, MsgType, InfoUrl);

                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Esempio n. 14
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            List <string> urlList = new List <string>();

            urlList.Add("http://www.baoan.gov.cn/ztlm/gcjszt/gcjs/xmsp/xusp/kxxyj/");
            urlList.Add("http://www.baoan.gov.cn/ztlm/gcjszt/gcjs/xmsp/xusp/cbsj/");
            urlList.Add("http://www.baoan.gov.cn/ztlm/gcjszt/gcjs/xmsp/xusp/hjyxpj/");
            IList list = new List <ItemPlan>();

            foreach (string url in urlList)
            {
                int    count           = 0;
                string html            = string.Empty;
                string cookiestr       = string.Empty;
                string viewState       = string.Empty;
                int    pageInt         = 1;
                string eventValidation = string.Empty;
                try
                {
                    html = this.ToolWebSite.GetHtmlByUrl(url, Encoding.UTF8);
                }
                catch
                {
                    return(list);
                }
                Parser   parser   = new Parser(new Lexer(html));
                NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "fenye")));
                if (pageNode != null && pageNode.Count > 0)
                {
                    string temp = pageNode.AsString().Replace("createPageHTML", "").Replace("0,", "").Replace("(", "").Replace(")", "").Replace("index", "").Replace("html", "").Replace(",", "").Replace("\"", "").Replace(";", "").Trim();
                    try
                    {
                        pageInt = int.Parse(temp);
                    }
                    catch { }
                }
                for (int i = 1; i <= pageInt; i++)
                {
                    if (i > 1)
                    {
                        try
                        {
                            html = this.ToolWebSite.GetHtmlByUrl(url + "/index_" + (i - 1).ToString() + ".html", Encoding.UTF8);
                        }
                        catch { continue; }
                    }
                    parser = new Parser(new Lexer(html));
                    NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "97%")));
                    if (listNode != null && listNode.Count > 0)
                    {
                        TableTag table = listNode[0] as TableTag;
                        for (int j = 1; j < table.RowCount; j++)
                        {
                            TableRow tr = table.Rows[j];
                            string   ItemCode = string.Empty, ItemName = string.Empty, ItemAddress = string.Empty, BuildUnit = string.Empty, BuildNature = string.Empty, TotalInvest = string.Empty, PlanInvest = string.Empty, IssuedPlan = string.Empty, InvestSource = string.Empty, ApprovalUnit = string.Empty, ApprovalDate = string.Empty, ApprovalCode = string.Empty, MsgUnit = string.Empty, PlanDate = string.Empty, PlanType = string.Empty, PlanBeginDate = string.Empty, PlanEndDate = string.Empty, CtxHtml = string.Empty, ItemCtx = string.Empty, ItemContent = string.Empty, InfoUrl = string.Empty, MsgType = string.Empty;

                            ItemName = tr.Columns[1].ToNodePlainString();
                            ItemCode = tr.Columns[2].ToNodePlainString();
                            PlanDate = tr.Columns[3].ToPlainTextString().GetDateRegex();

                            InfoUrl = url + tr.Columns[1].GetATagHref().Replace("../", "").Replace("./", "");
                            string htmldtl = string.Empty;
                            try
                            {
                                htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8);
                            }
                            catch { continue; }

                            parser = new Parser(new Lexer(htmldtl));
                            NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "900")));
                            if (dtlNode != null && dtlNode.Count > 0)
                            {
                                CtxHtml = dtlNode.AsHtml();
                                TableTag dtlTable = dtlNode[0] as TableTag;
                                for (int k = 1; k < dtlTable.RowCount; k++)
                                {
                                    ItemCtx += dtlTable.Rows[k].Columns[0].ToNodePlainString() + ":";
                                    ItemCtx += dtlTable.Rows[k].Columns[1].ToNodePlainString() + "\r\n";
                                }
                                BuildUnit    = ItemCtx.GetRegex("建设单位");
                                ApprovalCode = ItemCtx.GetRegex("审批文号");
                                ApprovalUnit = ItemCtx.GetRegex("审批单位");
                                ApprovalDate = ItemCtx.GetRegex("审批时间").Replace(".", "-");

                                PlanType = "项目审批信息";
                                MsgType  = "深圳市宝安区发改局";

                                ItemPlan info = ToolDb.GenItemPlan("广东省", "深圳市区", "宝安区", ItemCode, ItemName, ItemAddress, BuildUnit, BuildNature, TotalInvest, PlanInvest, IssuedPlan, InvestSource, ApprovalUnit, ApprovalDate, ApprovalCode, MsgUnit, PlanDate, PlanType, PlanBeginDate, PlanEndDate, CtxHtml, ItemCtx, ItemContent, MsgType, InfoUrl);
                                count++;
                                list.Add(info);
                                if (!crawlAll && count >= this.MaxCount)
                                {
                                    return(list);
                                }
                            }
                        }
                    }
                }
            }
            return(list);
        }
Esempio n. 15
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <ItemPlan>();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 5;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl);
            }
            catch
            {
                return(list);
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://hebpi.net/portal/ShowMoreProjectAction.do?method=YsProListPage&page=" + i + "&rp=20");
                    }
                    catch { continue; }
                }
                JavaScriptSerializer        serializer  = new JavaScriptSerializer();
                Dictionary <string, object> smsTypeJson = (Dictionary <string, object>)serializer.DeserializeObject(html);
                foreach (KeyValuePair <string, object> obj in smsTypeJson)
                {
                    if (obj.Key == "total" || obj.Key == "ROWNUM_")
                    {
                        continue;
                    }
                    object[] array = (object[])obj.Value;
                    foreach (object arrValue in array)
                    {
                        string ItemCode = string.Empty, ItemName = string.Empty, ItemAddress = string.Empty, BuildUnit = string.Empty, BuildNature = string.Empty, TotalInvest = string.Empty, PlanInvest = string.Empty, IssuedPlan = string.Empty, InvestSource = string.Empty, ApprovalUnit = string.Empty, ApprovalDate = string.Empty, ApprovalCode = string.Empty, MsgUnit = string.Empty, PlanDate = string.Empty, PlanType = string.Empty, PlanBeginDate = string.Empty, PlanEndDate = string.Empty, CtxHtml = string.Empty, ItemCtx = string.Empty, ItemContent = string.Empty, InfoUrl = string.Empty, MsgType = string.Empty;

                        Dictionary <string, object> dic = (Dictionary <string, object>)arrValue;

                        ItemName = Convert.ToString(dic["CUTNAME"]);
                        PlanDate = Convert.ToString(dic["DD"]);
                        InfoUrl  = "http://hebpi.net:80/portal/ShowMoreProjectAction.do?method=detail&id=" + Convert.ToString(dic["ID"]);
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        Parser   parser  = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "neirongleft")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            CtxHtml     = dtlNode.AsHtml();
                            ItemCtx     = CtxHtml.ToCtxString();
                            TotalInvest = ItemCtx.GetRegexBegEnd("总投资", "万元");
                            PlanType    = "项目审批信息";
                            MsgType     = "河北省发展和改革委员会";

                            ItemPlan info = ToolDb.GenItemPlan("河北省", "河北省及地市", "", ItemCode, ItemName, ItemAddress, BuildUnit, BuildNature, TotalInvest, PlanInvest, IssuedPlan, InvestSource, ApprovalUnit, ApprovalDate, ApprovalCode, MsgUnit, PlanDate, PlanType, PlanBeginDate, PlanEndDate, CtxHtml, ItemCtx, ItemContent, MsgType, InfoUrl);

                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Esempio n. 16
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <ItemPlan>();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl);
            }
            catch { return(null); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("dt"), new HasAttributeFilter("class", "ny_my")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().Replace("(", "(").GetRegexBegEnd("(", ",");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "index_" + (i - 1) + ".html");
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("dt"), new HasAttributeFilter("class", "ny_news")), true), new TagNameFilter("li")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        string ItemCode = string.Empty, ItemName = string.Empty, ItemAddress = string.Empty, BuildUnit = string.Empty, BuildNature = string.Empty, TotalInvest = string.Empty, PlanInvest = string.Empty, IssuedPlan = string.Empty, InvestSource = string.Empty, ApprovalUnit = string.Empty, ApprovalDate = string.Empty, ApprovalCode = string.Empty, MsgUnit = string.Empty, PlanDate = string.Empty, PlanType = string.Empty, PlanBeginDate = string.Empty, PlanEndDate = string.Empty, CtxHtml = string.Empty, ItemCtx = string.Empty, ItemContent = string.Empty, InfoUrl = string.Empty, MsgType = string.Empty;

                        INode node = listNode[j];
                        ATag  aTag = node.GetATag();
                        ItemName = aTag.LinkText;
                        PlanDate = node.ToPlainTextString().GetDateRegex();
                        if (aTag.Link.ToLower().Contains("http"))
                        {
                            InfoUrl = aTag.Link;
                        }
                        else
                        {
                            InfoUrl = "http://plan.hainan.gov.cn/fzggzl/xmsp/" + aTag.Link.GetReplace("../,./");
                        }
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "1000")));
                        if (dtlNode != null && dtlNode.Count > 1)
                        {
                            CtxHtml = dtlNode[0].ToHtml() + dtlNode[1].ToHtml();
                            ItemCtx = CtxHtml.ToCtxString();

                            ApprovalUnit = ItemCtx.GetRegex("发文机构");
                            ItemCode     = ItemCtx.GetRegex("索引号");
                            ApprovalCode = ItemCtx.GetRegex("文号");
                            ApprovalDate = ItemCtx.GetDateRegex("yyyy年MM月dd日");
                            PlanType     = "项目审批信息";
                            MsgType      = "海南省发展和改革委员会";
                            ItemPlan info = ToolDb.GenItemPlan("海南省", "海南省及地市", "", ItemCode, ItemName, ItemAddress, BuildUnit, BuildNature, TotalInvest, PlanInvest, IssuedPlan, InvestSource, ApprovalUnit, ApprovalDate, ApprovalCode, MsgUnit, PlanDate, PlanType, PlanBeginDate, PlanEndDate, CtxHtml, ItemCtx, ItemContent, MsgType, InfoUrl);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Esempio n. 17
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <ItemPlan>();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default);
            }
            catch { return(null); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "cn6")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().Replace("createPageHTML", "").Replace(" 0,", "").Replace("(", "").Replace(")", "").Replace("index", "").Replace("htm", "").Replace(",", "").Replace("\"", "").Replace(";", "").Trim();;
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://www.gdzbtb.gov.cn/zbsxhz/index_" + (i - 1).ToString() + ".htm", Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "position2")), true), new TagNameFilter("li")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        string ItemCode = string.Empty, ItemName = string.Empty, ItemAddress = string.Empty, BuildUnit = string.Empty, BuildNature = string.Empty, TotalInvest = string.Empty, PlanInvest = string.Empty, IssuedPlan = string.Empty, InvestSource = string.Empty, ApprovalUnit = string.Empty, ApprovalDate = string.Empty, ApprovalCode = string.Empty, MsgUnit = string.Empty, PlanDate = string.Empty, PlanType = string.Empty, PlanBeginDate = string.Empty, PlanEndDate = string.Empty, CtxHtml = string.Empty, ItemCtx = string.Empty, ItemContent = string.Empty, InfoUrl = string.Empty, MsgType = string.Empty;

                        ATag aTag = listNode[j].GetATag();
                        ItemName = aTag.GetAttribute("title");
                        InfoUrl  = "http://www.gdzbtb.gov.cn/zbsxhz/" + aTag.Link.Replace("../", "").Replace("./", "");
                        string tempCity = ItemName.Replace("[", "kdxx").Replace("]", "xxdk").GetRegexBegEnd("kdxx", "xxdk");
                        ItemName = ItemName.Replace("[" + tempCity + "]-", "");

                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch
                        {
                            continue;
                        }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "100%")));
                        if (dtlNode != null && dtlNode.Count > 3)
                        {
                            TableTag table = dtlNode[3] as TableTag;
                            CtxHtml = dtlNode.AsHtml();

                            for (int k = 0; k < table.RowCount; k++)
                            {
                                ItemCtx += table.Rows[k].Columns[0].ToNodePlainString() + ":";
                                ItemCtx += table.Rows[k].Columns[1].ToNodePlainString() + "\r\n";
                            }
                            PlanDate = ItemCtx.GetRegex("批复日期").GetDateRegex();
                            if (string.IsNullOrEmpty(PlanDate))
                            {
                                PlanDate = ItemCtx.GetDateRegex();
                            }
                            if (string.IsNullOrEmpty(PlanDate))
                            {
                                PlanDate = DateTime.Now.ToString("yyyy-MM-dd");
                            }
                            ItemCode     = ItemCtx.GetRegex("项目编码").Replace(" ", "");
                            BuildUnit    = ItemCtx.GetRegex("项目单位");
                            ApprovalUnit = ItemCtx.GetRegex("核准部门");
                            ApprovalDate = PlanDate;
                            ApprovalCode = ItemCtx.GetRegex("批复文号");
                            ItemContent  = ItemCtx.GetRegex("规模及内容", true, 1000);
                            string city = string.Empty;
                            if (tempCity == "广东")
                            {
                                city = "广州市区";
                            }
                            else
                            {
                                city = tempCity + "市区";
                            }
                            PlanType = "项目核准信息";
                            MsgType  = "广东省招标投标监管网";

                            ItemPlan info = ToolDb.GenItemPlan("广东省", city, "", ItemCode, ItemName, ItemAddress, BuildUnit, BuildNature, TotalInvest, PlanInvest, IssuedPlan, InvestSource, ApprovalUnit, ApprovalDate, ApprovalCode, MsgUnit, PlanDate, PlanType, PlanBeginDate, PlanEndDate, CtxHtml, ItemCtx, ItemContent, MsgType, InfoUrl);

                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Esempio n. 18
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <ItemPlan>();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 24;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default);
            }
            catch { return(null); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "z_12")), true), new TagNameFilter("a")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode[pageNode.Count - 1].GetATagHref();
                    pageInt = int.Parse(temp.GetReplace("index_,.htm"));
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "index_" + (i - 1) + ".htm", Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("valign", "top")), true), new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "730"))));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        TableRow tr   = (listNode[j] as TableTag).Rows[0];
                        ATag     aTag = tr.Columns[1].GetATag();
                        if (aTag == null)
                        {
                            continue;
                        }

                        string ItemCode = string.Empty, ItemName = string.Empty, ItemAddress = string.Empty, BuildUnit = string.Empty, BuildNature = string.Empty, TotalInvest = string.Empty, PlanInvest = string.Empty, IssuedPlan = string.Empty, InvestSource = string.Empty, ApprovalUnit = string.Empty, ApprovalDate = string.Empty, ApprovalCode = string.Empty, MsgUnit = string.Empty, PlanDate = string.Empty, PlanType = string.Empty, PlanBeginDate = string.Empty, PlanEndDate = string.Empty, CtxHtml = string.Empty, ItemCtx = string.Empty, ItemContent = string.Empty, InfoUrl = string.Empty, MsgType = string.Empty;

                        ItemName = aTag.LinkText;
                        PlanDate = listNode[j].ToPlainTextString().GetDateRegex();
                        InfoUrl  = "http://www.bjpc.gov.cn/gcjs/" + aTag.Link.GetReplace("../,./");
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("cellpadding", "2")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            CtxHtml = dtlNode.AsHtml();
                            TableTag dtlTable = dtlNode[0] as TableTag;
                            for (int r = 0; r < dtlTable.RowCount; r++)
                            {
                                for (int c = 0; c < dtlTable.Rows[r].ColumnCount; c++)
                                {
                                    if ((c + 1) % 2 == 0)
                                    {
                                        ItemCtx += dtlTable.Rows[r].Columns[c].ToNodePlainString() + "\r\n";
                                    }
                                    else
                                    {
                                        ItemCtx += dtlTable.Rows[r].Columns[c].ToNodePlainString().GetReplace(":,:") + ":";
                                    }
                                }
                            }
                            ItemContent  = ItemCtx.GetRegex("内容摘要", true, 500);
                            ItemCode     = ApprovalCode = ItemCtx.GetRegex("审批文号");
                            ApprovalUnit = ItemCtx.GetRegex("批复单位");
                            ApprovalDate = ItemCtx.GetRegex("批复时间").GetDateRegex();
                            TotalInvest  = ItemCtx.GetRegexBegEnd("总投资", "万元").GetChina();
                            if (ItemName.Contains(".."))
                            {
                                string temp = ItemCtx.GetRegex("项目名称");
                                ItemName = string.IsNullOrEmpty(temp) ? ItemName : temp;
                            }
                            PlanType = "项目信息";
                            MsgType  = "北京市发展和改革委员会";
                            ItemPlan info = ToolDb.GenItemPlan("北京市", "北京市区", "", ItemCode, ItemName, ItemAddress, BuildUnit, BuildNature, TotalInvest, PlanInvest, IssuedPlan, InvestSource, ApprovalUnit, ApprovalDate, ApprovalCode, MsgUnit, PlanDate, PlanType, PlanBeginDate, PlanEndDate, CtxHtml, ItemCtx, ItemContent, MsgType, InfoUrl);

                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Esempio n. 19
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <ItemPlan>();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 100;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default);
            }
            catch { return(null); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "list_sort")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("分", "页");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://www.nxdrc.gov.cn/zfxxgk/zfxxgkml/index" + (i - 1).ToString() + ".htm", Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "list_v01")), true), new TagNameFilter("table")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        string   ItemCode = string.Empty, ItemName = string.Empty, ItemAddress = string.Empty, BuildUnit = string.Empty, BuildNature = string.Empty, TotalInvest = string.Empty, PlanInvest = string.Empty, IssuedPlan = string.Empty, InvestSource = string.Empty, ApprovalUnit = string.Empty, ApprovalDate = string.Empty, ApprovalCode = string.Empty, MsgUnit = string.Empty, PlanDate = string.Empty, PlanType = string.Empty, PlanBeginDate = string.Empty, PlanEndDate = string.Empty, CtxHtml = string.Empty, ItemCtx = string.Empty, ItemContent = string.Empty, InfoUrl = string.Empty, MsgType = string.Empty;
                        TableRow tr = table.Rows[j];
                        ItemCode = tr.Columns[0].ToNodePlainString();
                        ATag aTag = tr.Columns[1].GetATag();
                        ItemName = aTag.LinkText.GetReplace("自治区发展改革委批准,自治区发展改革委批复,自治区发改委");

                        PlanDate = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        InfoUrl  = "http://www.nxdrc.gov.cn/zfxxgk/zfxxgkml/" + aTag.Link.GetReplace("../,./");
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "main3")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            CtxHtml     = dtlNode.AsHtml();
                            ItemCtx     = CtxHtml.ToCtxString();
                            TotalInvest = ItemCtx.GetRegexBegEnd("总投资", "万元");
                            parser      = new Parser(new Lexer(CtxHtml));
                            NodeList conNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("", "id")));
                            if (conNode != null && conNode.Count > 0)
                            {
                                ItemContent = conNode[0].ToNodePlainString();
                                if (Encoding.Default.GetByteCount(ItemContent) > 2000)
                                {
                                    ItemContent = "";
                                }
                            }

                            PlanType = "项目审批信息";
                            MsgType  = "宁夏回族自治区发展和改革委员会";

                            ItemPlan info = ToolDb.GenItemPlan("宁夏回族自治区", "宁夏回族自治区及地市", "", ItemCode, ItemName, ItemAddress, BuildUnit, BuildNature, TotalInvest, PlanInvest, IssuedPlan, InvestSource, ApprovalUnit, ApprovalDate, ApprovalCode, MsgUnit, PlanDate, PlanType, PlanBeginDate, PlanEndDate, CtxHtml, ItemCtx, ItemContent, MsgType, InfoUrl);
                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Esempio n. 20
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <ItemPlan>();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default);
            }
            catch { return(null); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("select"), new HasAttributeFilter("class", "form")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    SelectTag tag  = pageNode[0] as SelectTag;
                    string    temp = tag.OptionTags[tag.OptionTags.Length - 1].Value;
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "&dqy=" + i, Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "700")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 1; j < table.RowCount - 2; j++)
                    {
                        string   ItemCode = string.Empty, ItemName = string.Empty, ItemAddress = string.Empty, BuildUnit = string.Empty, BuildNature = string.Empty, TotalInvest = string.Empty, PlanInvest = string.Empty, IssuedPlan = string.Empty, InvestSource = string.Empty, ApprovalUnit = string.Empty, ApprovalDate = string.Empty, ApprovalCode = string.Empty, MsgUnit = string.Empty, PlanDate = string.Empty, PlanType = string.Empty, PlanBeginDate = string.Empty, PlanEndDate = string.Empty, CtxHtml = string.Empty, ItemCtx = string.Empty, ItemContent = string.Empty, InfoUrl = string.Empty, MsgType = string.Empty;
                        TableRow tr = table.Rows[j].GetTableTag().Rows[0];

                        ATag aTag = tr.Columns[1].GetATag();
                        ItemName     = aTag.GetAttribute("title");
                        ItemCode     = tr.Columns[0].ToNodePlainString();
                        ApprovalCode = tr.Columns[2].ToNodePlainString();
                        PlanDate     = "20" + tr.Columns[3].ToPlainTextString().GetDateRegex("yy-MM-dd");
                        InfoUrl      = "http://www.shdrc.gov.cn/" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default);
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "maintitle2")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            CtxHtml = dtlNode.AsHtml();
                            ItemCtx = CtxHtml.GetReplace("<!--", "<span>").GetReplace("-->", "<span>").ToCtxString().GetReplace("begin,end,-->,<--");
                            parser  = new Parser(new Lexer(CtxHtml));
                            NodeList tableNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "text3")));
                            if (tableNode != null && tableNode.Count > 0)
                            {
                                string   ctx = string.Empty;
                                TableTag tag = tableNode[0] as TableTag;
                                for (int r = 0; r < tag.RowCount; r++)
                                {
                                    for (int c = 0; c < tag.Rows[r].ColumnCount; c++)
                                    {
                                        if ((c + 1) % 2 == 0)
                                        {
                                            ctx += tag.Rows[r].Columns[c].ToNodePlainString().GetReplace("begin,end").ToCtxString() + "\r\n";
                                        }
                                        else
                                        {
                                            ctx += tag.Rows[r].Columns[c].ToNodePlainString().GetReplace("begin,end").ToCtxString() + ":";
                                        }
                                    }
                                }
                                string code = ctx.GetRegex("项目编码");
                                ItemCode     = code == "" ? ItemCode : code;
                                ItemContent  = ctx.GetRegex("内容", true, 500);
                                ApprovalUnit = ctx.GetRegex("批复机关");
                                ApprovalDate = ctx.GetRegex("批复时间").GetDateRegex();
                            }
                            MsgUnit  = "上海市发展和改革委员会";
                            PlanType = "项目审批信息";
                            MsgType  = "上海市发展和改革委员会";
                            ItemPlan info = ToolDb.GenItemPlan("上海市", "上海市区", "", ItemCode, ItemName, ItemAddress, BuildUnit, BuildNature, TotalInvest, PlanInvest, IssuedPlan, InvestSource, ApprovalUnit, ApprovalDate, ApprovalCode, MsgUnit, PlanDate, PlanType, PlanBeginDate, PlanEndDate, CtxHtml, ItemCtx, ItemContent, MsgType, InfoUrl);

                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Esempio n. 21
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <ItemPlan>();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "Webpager1")));

            if (pageNode != null && pageNode.Count > 0)
            {
                string temp = pageNode.AsString().GetRegexBegEnd("共", "页");
                try
                {
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState = this.ToolWebSite.GetAspNetViewState(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__VIEWSTATE",
                        "__EVENTTARGET",
                        "__EVENTARGUMENT",
                        "deptKey",
                        "key",
                        "Webpager1_input"
                    }, new string[] {
                        viewState,
                        "Webpager1",
                        i.ToString(),
                        "", "",
                        (i - 1).ToString()
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("cellSpacing", "1")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        TableRow tr = table.Rows[j];
                        string   ItemCode = string.Empty, ItemName = string.Empty, ItemAddress = string.Empty, BuildUnit = string.Empty, BuildNature = string.Empty, TotalInvest = string.Empty, PlanInvest = string.Empty, IssuedPlan = string.Empty, InvestSource = string.Empty, ApprovalUnit = string.Empty, ApprovalDate = string.Empty, ApprovalCode = string.Empty, MsgUnit = string.Empty, PlanDate = string.Empty, PlanType = string.Empty, PlanBeginDate = string.Empty, PlanEndDate = string.Empty, CtxHtml = string.Empty, ItemCtx = string.Empty, ItemContent = string.Empty, InfoUrl = string.Empty, MsgType = string.Empty;


                        MsgUnit  = tr.Columns[2].ToNodePlainString();
                        PlanDate = tr.Columns[1].ToPlainTextString().GetDateRegex("yyyy/MM/dd");
                        ATag aTag = tr.Columns[0].GetATag();
                        ItemName = aTag.LinkText.ToNodeString().GetReplace("   , ");

                        InfoUrl = "http://xxgk.sd.gov.cn/GovInfoOpen/InfoOpenDir/InfoOpenDirTwo/" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }

                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "contents")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            CtxHtml = dtlNode.AsHtml();
                            ItemCtx = CtxHtml.Replace("</p>", "\r\n").Replace("</tr>", "\r\n").ToCtxString();
                            parser  = new Parser(new Lexer(CtxHtml));
                            NodeList tableNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "90%")));
                            if (tableNode != null && tableNode.Count > 0)
                            {
                                string   ctx      = string.Empty;
                                TableTag tableTag = tableNode[0] as TableTag;
                                for (int r = 0; r < tableTag.RowCount; r++)
                                {
                                    for (int c = 0; c < tableTag.Rows[r].ColumnCount; c++)
                                    {
                                        string temp = tableTag.Rows[r].Columns[c].ToNodePlainString();
                                        if ((c + 1) % 2 == 0)
                                        {
                                            ctx += temp.GetReplace(":,:") + "\r\n";
                                        }
                                        else
                                        {
                                            ctx += temp.GetReplace(":,:") + ":";
                                        }
                                    }
                                }
                                ItemCode = ctx.GetRegex("索引号");
                            }

                            parser.Reset();
                            tableNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "zoom")), true), new TagNameFilter("table")));
                            if (tableNode != null && tableNode.Count > 0)
                            {
                                string   ctx      = string.Empty;
                                TableTag tableTag = tableNode[0] as TableTag;
                                for (int c = 0; c < tableTag.Rows[0].ColumnCount; c++)
                                {
                                    try
                                    {
                                        ctx += tableTag.Rows[0].Columns[c].ToNodePlainString() + ":";
                                        ctx += tableTag.Rows[1].Columns[c].ToNodePlainString() + "\r\n";
                                    }
                                    catch { }
                                }
                                ApprovalCode = ctx.GetRegex("批准文号");
                                ApprovalUnit = ctx.GetRegex("项目申请人");
                                ApprovalDate = ctx.GetRegex("批准时间");
                                ItemContent  = ctx.GetRegex("主要建设内容", true, 500);
                            }


                            PlanType = "项目信息";
                            MsgType  = "山东省发展和改革委员会";
                            ItemPlan info = ToolDb.GenItemPlan("山东省", "山东省及地市", "", ItemCode, ItemName, ItemAddress, BuildUnit, BuildNature, TotalInvest, PlanInvest, IssuedPlan, InvestSource, ApprovalUnit, ApprovalDate, ApprovalCode, MsgUnit, PlanDate, PlanType, PlanBeginDate, PlanEndDate, CtxHtml, ItemCtx, ItemContent, MsgType, InfoUrl);

                            list.Add(info);

                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Esempio n. 22
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <ItemPlan>();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default);
            }
            catch
            {
                return(null);
            }
            try
            {
                string temp = html.GetRegexBegEnd("<strong>", "</strong>").GetReplace("<fontcolor=red>1</font>/");//pageNode[0].ToNodePlainString().GetReplace("1/");
                pageInt = int.Parse(temp);
            }
            catch { }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "&page=" + i, Encoding.Default);
                    }
                    catch { continue; }
                }
                Parser   parser   = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("cellspacing", "5")), true), new TagNameFilter("li")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        ATag aTag = listNode[j].GetATag();
                        if (aTag == null)
                        {
                            continue;
                        }
                        string ItemCode = string.Empty, ItemName = string.Empty, ItemAddress = string.Empty, BuildUnit = string.Empty, BuildNature = string.Empty, TotalInvest = string.Empty, PlanInvest = string.Empty, IssuedPlan = string.Empty, InvestSource = string.Empty, ApprovalUnit = string.Empty, ApprovalDate = string.Empty, ApprovalCode = string.Empty, MsgUnit = string.Empty, PlanDate = string.Empty, PlanType = string.Empty, PlanBeginDate = string.Empty, PlanEndDate = string.Empty, CtxHtml = string.Empty, ItemCtx = string.Empty, ItemContent = string.Empty, InfoUrl = string.Empty, MsgType = string.Empty;

                        ItemName = aTag.LinkText.GetReplace("省发展改革委、,省发展改革委,&nbsp;");
                        PlanDate = listNode[j].ToPlainTextString().GetDateRegex();
                        InfoUrl  = "http://www.lndp.gov.cn/" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("height", "200")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            CtxHtml = dtlNode.AsHtml();
                            parser  = new Parser(new Lexer(CtxHtml));
                            NodeList tableNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));
                            if (tableNode != null && tableNode.Count > 0)
                            {
                                TableTag table = tableNode[0] as TableTag;
                                for (int r = 0; r < table.RowCount; r++)
                                {
                                    for (int c = 0; c < table.Rows[r].ColumnCount; c++)
                                    {
                                        string temp = table.Rows[r].Columns[c].ToNodePlainString();
                                        if ((c + 1) % 2 == 0)
                                        {
                                            ItemCtx += temp.GetReplace(":,:") + "\r\n";
                                        }
                                        else
                                        {
                                            ItemCtx += temp.GetReplace(":,:") + ":";
                                        }
                                    }
                                }
                            }
                            ItemContent = ItemCtx.GetRegex("内容概述", true, 500);
                            TotalInvest = ItemCtx.GetRegexBegEnd("总投资", "万元").GetChina();
                            ItemCode    = ApprovalCode = ItemCtx.GetCodeRegex();
                            if (string.IsNullOrEmpty(ItemCode))
                            {
                                ItemCode = ApprovalCode = ItemCtx.GetRegex("编  号");
                            }
                            PlanType = "项目信息";
                            MsgType  = "辽宁省发展和改革委员会";
                            ItemPlan info = ToolDb.GenItemPlan("辽宁省", "辽宁省及地市", "", ItemCode, ItemName, ItemAddress, BuildUnit, BuildNature, TotalInvest, PlanInvest, IssuedPlan, InvestSource, ApprovalUnit, ApprovalDate, ApprovalCode, MsgUnit, PlanDate, PlanType, PlanBeginDate, PlanEndDate, CtxHtml, ItemCtx, ItemContent, MsgType, InfoUrl);

                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Esempio n. 23
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <ItemPlan>();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "right")), true), new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("align", "right"))));

            if (pageNode != null && pageNode.Count > 0)
            {
                string temp = pageNode.AsString().GetRegexBegEnd("共", "页");
                try
                {
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "/index_" + (i - 1).ToNodeString() + ".shtml", Encoding.UTF8);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "100%")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        TableRow tr = table.Rows[j];
                        string   ItemCode = string.Empty, ItemName = string.Empty, ItemAddress = string.Empty, BuildUnit = string.Empty, BuildNature = string.Empty, TotalInvest = string.Empty, PlanInvest = string.Empty, IssuedPlan = string.Empty, InvestSource = string.Empty, ApprovalUnit = string.Empty, ApprovalDate = string.Empty, ApprovalCode = string.Empty, MsgUnit = string.Empty, PlanDate = string.Empty, PlanType = string.Empty, PlanBeginDate = string.Empty, PlanEndDate = string.Empty, CtxHtml = string.Empty, ItemCtx = string.Empty, ItemContent = string.Empty, InfoUrl = string.Empty, MsgType = string.Empty;

                        ATag   aTag     = tr.Columns[1].GetATag();
                        string tempName = aTag.GetAttribute("title");
                        PlanDate = tr.Columns[1].ToPlainTextString().GetDateRegex();
                        ItemName = tempName.GetRegexBegEnd("&ldquo;", "&rdquo;");
                        if (string.IsNullOrEmpty(ItemName))
                        {
                            ItemName = tempName.GetRegexBegEnd("关于下达", "政府投资项目");
                        }
                        if (string.IsNullOrEmpty(ItemName))
                        {
                            ItemName = tempName.Replace("关于下达", "").Replace("&rdquo;", "");
                        }

                        InfoUrl = "http://www.yantian.gov.cn" + aTag.Link;

                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8);
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "content")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            CtxHtml = dtlNode.AsHtml();
                            ItemCtx = CtxHtml.ToCtxString();
                            parser  = new Parser(new Lexer(CtxHtml));
                            NodeList pNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("p"));
                            if (pNode != null && pNode.Count > 0)
                            {
                                BuildUnit = pNode[0].ToNodePlainString().Replace(":", "").Replace(":", "");
                            }
                            TotalInvest = ItemCtx.GetRegexBegEnd("项目总投资", "万元");
                            if (string.IsNullOrEmpty(TotalInvest))
                            {
                                TotalInvest = ItemCtx.GetRegexBegEnd("项目总投资共", "万元");
                            }
                            IssuedPlan = ItemCtx.GetRegexBegEnd("本次下达资金", "万元");
                            if (string.IsNullOrEmpty(IssuedPlan))
                            {
                                IssuedPlan = ItemCtx.GetRegexBegEnd("下达资金", "万元");
                            }
                            if (string.IsNullOrEmpty(IssuedPlan))
                            {
                                IssuedPlan = ItemCtx.GetRegexBegEnd("本次下达前期费用", "万元");
                            }

                            PlanType = "项目审批信息";
                            MsgType  = "深圳市盐田区";

                            ItemPlan info = ToolDb.GenItemPlan("广东省", "深圳市区", "盐田区", ItemCode, ItemName, ItemAddress, BuildUnit, BuildNature, TotalInvest, PlanInvest, IssuedPlan, InvestSource, ApprovalUnit, ApprovalDate, ApprovalCode, MsgUnit, PlanDate, PlanType, PlanBeginDate, PlanEndDate, CtxHtml, ItemCtx, ItemContent, MsgType, InfoUrl);

                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Esempio n. 24
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <ItemPlan>();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl);
            }
            catch { return(null); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "pager")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().Replace("createPageHTML", "").Replace(" 0,", "").Replace("(", "").Replace(")", "").Replace("index", "").Replace("html", "").Replace(",", "").Replace("\"", "").Replace(";", "").Trim();;
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "index_" + (i - 1).ToString() + ".html");
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "listul")), true), new TagNameFilter("li")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        string ItemCode = string.Empty, ItemName = string.Empty, ItemAddress = string.Empty, BuildUnit = string.Empty, BuildNature = string.Empty, TotalInvest = string.Empty, PlanInvest = string.Empty, IssuedPlan = string.Empty, InvestSource = string.Empty, ApprovalUnit = string.Empty, ApprovalDate = string.Empty, ApprovalCode = string.Empty, MsgUnit = string.Empty, PlanDate = string.Empty, PlanType = string.Empty, PlanBeginDate = string.Empty, PlanEndDate = string.Empty, CtxHtml = string.Empty, ItemCtx = string.Empty, ItemContent = string.Empty, InfoUrl = string.Empty, MsgType = string.Empty;

                        ATag aTag = listNode[j].GetATag();
                        ItemName = aTag.GetAttribute("title");
                        InfoUrl  = "http://www.szft.gov.cn/" + aTag.Link.Replace("../", "").Replace("./", "");
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                        }
                        catch
                        {
                            continue;
                        }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "contenter")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            CtxHtml  = dtlNode.AsHtml();
                            ItemCtx  = CtxHtml.ToCtxString();
                            PlanDate = ItemCtx.GetRegex("信息发布日期").GetDateRegex();
                            if (string.IsNullOrEmpty(PlanDate))
                            {
                                PlanDate = ItemCtx.GetDateRegex();
                            }
                            if (string.IsNullOrEmpty(PlanDate))
                            {
                                PlanDate = DateTime.Now.ToString("yyyy-MM-dd");
                            }
                            ItemCode = ItemCtx.GetRegex("项目编码").Replace(" ", "");
                            string ctx = string.Empty;
                            parser = new Parser(new Lexer(CtxHtml));
                            NodeList tableNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));
                            if (tableNode != null && tableNode.Count > 0)
                            {
                                TableTag table = tableNode[0] as TableTag;
                                if (table.RowCount >= 2)
                                {
                                    TableRow tr  = table.Rows[0];
                                    TableRow trC = table.Rows[1];
                                    for (int k = 0; k < tr.ColumnCount; k++)
                                    {
                                        ctx += tr.Columns[k].ToNodePlainString() + ":";
                                        ctx += trC.Columns[k].ToNodePlainString() + "\r\n";
                                    }
                                    if (string.IsNullOrEmpty(ItemCode))
                                    {
                                        ItemCode = ctx.GetRegex("序号");
                                    }
                                    BuildUnit    = ctx.GetRegex("建设单位");
                                    BuildNature  = ctx.GetRegex("建设性质");
                                    TotalInvest  = ctx.GetRegex("总投资(万元),总投资");
                                    PlanInvest   = ctx.GetRegex("本期计划(万元),本期计划");
                                    IssuedPlan   = ctx.GetRegex("累计已下达计划(万元),累计已下达计划");
                                    InvestSource = ctx.GetRegex("资金来源");
                                    ItemContent  = ctx.GetRegex("主要建设内容,建设内容");
                                    if (string.IsNullOrEmpty(ItemContent))
                                    {
                                        ItemContent = trC.Columns[trC.ColumnCount - 1].ToNodePlainString();
                                    }
                                }
                            }

                            PlanType = "项目审批信息";
                            MsgType  = "深圳市福田区发展和改革局";

                            ItemPlan info = ToolDb.GenItemPlan("广东省", "深圳市区", "福田区", ItemCode, ItemName, ItemAddress, BuildUnit, BuildNature, TotalInvest, PlanInvest, IssuedPlan, InvestSource, ApprovalUnit, ApprovalDate, ApprovalCode, MsgUnit, PlanDate, PlanType, PlanBeginDate, PlanEndDate, CtxHtml, ItemCtx, ItemContent, MsgType, InfoUrl);

                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Esempio n. 25
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <ItemPlan>();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "gvlist")), true), new TagNameFilter("table")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    TableTag table = pageNode[0] as TableTag;
                    string   temp  = table.Rows[0].Columns[table.Rows[0].ColumnCount - 1].ToNodePlainString();
                    pageInt = int.Parse(temp);
                }
                catch
                { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    viewState       = this.ToolWebSite.GetAspNetViewState(html);
                    eventValidation = this.ToolWebSite.GetAspNetEventValidation(html);
                    NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[] {
                        "__EVENTTARGET",
                        "__EVENTARGUMENT",
                        "__VIEWSTATE",
                        "__VIEWSTATEGENERATOR",
                        "__EVENTVALIDATION",
                        "ddlDepartment",
                        "txtstartDate",
                        "txtendDate",
                        "keyword",
                    },
                                                                                      new string[] {
                        "gvlist",
                        "Page$" + i,
                        viewState,
                        "D5D8AE3D",
                        eventValidation,
                        "发展财政局",
                        string.Empty,
                        string.Empty,
                        string.Empty
                    });
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, nvc, Encoding.UTF8);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("id", "gvlist")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    int      num   = table.RowCount;
                    if (i > 1)
                    {
                        num--;
                    }
                    for (int j = 1; j < num; j++)
                    {
                        string   ItemCode = string.Empty, ItemName = string.Empty, ItemAddress = string.Empty, BuildUnit = string.Empty, BuildNature = string.Empty, TotalInvest = string.Empty, PlanInvest = string.Empty, IssuedPlan = string.Empty, InvestSource = string.Empty, ApprovalUnit = string.Empty, ApprovalDate = string.Empty, ApprovalCode = string.Empty, MsgUnit = string.Empty, PlanDate = string.Empty, PlanType = string.Empty, PlanBeginDate = string.Empty, PlanEndDate = string.Empty, CtxHtml = string.Empty, ItemCtx = string.Empty, ItemContent = string.Empty, InfoUrl = string.Empty, MsgType = string.Empty;
                        TableRow tr = table.Rows[j];
                        ItemCode = tr.Columns[0].ToNodePlainString();
                        ItemName = tr.Columns[1].ToNodePlainString();
                        PlanDate = tr.Columns[2].ToNodePlainString();
                        InfoUrl  = "http://other.sznews.com/pinshanproject/" + tr.Columns[1].GetATagHref();
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("class", "stylecontent")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            CtxHtml     = dtlNode.AsHtml();
                            ItemCtx     = CtxHtml.ToCtxString();
                            TotalInvest = ItemCtx.GetRegexBegEnd("总投资", "万元").GetChina();
                            IssuedPlan  = ItemCtx.GetRegexBegEnd("本次下达", "万元").GetChina();
                            PlanType    = "项目核准信息";
                            MsgType     = "深圳市坪山区发改局";
                            ItemPlan info = ToolDb.GenItemPlan("广东省", "深圳市区", "坪山区", ItemCode, ItemName, ItemAddress, BuildUnit, BuildNature, TotalInvest, PlanInvest, IssuedPlan, InvestSource, ApprovalUnit, ApprovalDate, ApprovalCode, MsgUnit, PlanDate, PlanType, PlanBeginDate, PlanEndDate, CtxHtml, ItemCtx, ItemContent, MsgType, InfoUrl);

                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Esempio n. 26
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <ItemPlan>();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default);
            }
            catch { return(null); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "tdfont")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().Replace("function createPageHTML", "").GetRegexBegEnd("createPageHTML", ",").Replace("(", "");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "index_" + (i - 1) + ".htm", Encoding.Default);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("cellspacing", "3")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 0; j < table.RowCount; j++)
                    {
                        if (table.Rows[j].ColumnCount < 2)
                        {
                            continue;
                        }
                        string ItemCode = string.Empty, ItemName = string.Empty, ItemAddress = string.Empty, BuildUnit = string.Empty, BuildNature = string.Empty, TotalInvest = string.Empty, PlanInvest = string.Empty, IssuedPlan = string.Empty, InvestSource = string.Empty, ApprovalUnit = string.Empty, ApprovalDate = string.Empty, ApprovalCode = string.Empty, MsgUnit = string.Empty, PlanDate = string.Empty, PlanType = string.Empty, PlanBeginDate = string.Empty, PlanEndDate = string.Empty, CtxHtml = string.Empty, ItemCtx = string.Empty, ItemContent = string.Empty, InfoUrl = string.Empty, MsgType = string.Empty;

                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.Columns[1].GetATag();
                        ItemName = System.Web.HttpUtility.UrlDecode(aTag.GetAttribute("title"));
                        PlanDate = tr.Columns[2].ToPlainTextString().GetDateRegex();
                        if (aTag.Link.ToLower().Contains("departmentsite") || aTag.Link.ToLower().Contains("fagaiing"))
                        {
                            InfoUrl = "http://www.jxdpc.gov.cn/" + aTag.Link.Replace("../", "");
                        }
                        else
                        {
                            InfoUrl = "http://www.jxdpc.gov.cn/" + "zdxm/zdxmxx/" + aTag.Link.Replace("./", "");
                        }


                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.Default);
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "artibody")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            CtxHtml = dtlNode.AsHtml();
                            ItemCtx = CtxHtml.ToCtxString();


                            PlanType = "建设工程";
                            MsgType  = "江西省发展和改革委员会";

                            ItemPlan info = ToolDb.GenItemPlan("江西省", "江西省及地市", "", ItemCode, ItemName, ItemAddress, BuildUnit, BuildNature, TotalInvest, PlanInvest, IssuedPlan, InvestSource, ApprovalUnit, ApprovalDate, ApprovalCode, MsgUnit, PlanDate, PlanType, PlanBeginDate, PlanEndDate, CtxHtml, ItemCtx, ItemContent, MsgType, InfoUrl);

                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Esempio n. 27
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <ItemPlan>();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.UTF8);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "pageNavigate")));

            if (pageNode != null && pageNode.Count > 0)
            {
                string temp = pageNode.AsString().GetRegexBegEnd("/共", "页");
                try
                {
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl + "&pageNo=" + i, Encoding.UTF8);
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "listTable")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        TableRow tr = table.Rows[j];
                        string   ItemCode = string.Empty, ItemName = string.Empty, ItemAddress = string.Empty, BuildUnit = string.Empty, BuildNature = string.Empty, TotalInvest = string.Empty, PlanInvest = string.Empty, IssuedPlan = string.Empty, InvestSource = string.Empty, ApprovalUnit = string.Empty, ApprovalDate = string.Empty, ApprovalCode = string.Empty, MsgUnit = string.Empty, PlanDate = string.Empty, PlanType = string.Empty, PlanBeginDate = string.Empty, PlanEndDate = string.Empty, CtxHtml = string.Empty, ItemCtx = string.Empty, ItemContent = string.Empty, InfoUrl = string.Empty, MsgType = string.Empty;

                        MsgUnit      = tr.Columns[3].ToNodePlainString();
                        ApprovalCode = tr.Columns[1].ToNodePlainString();
                        PlanDate     = tr.Columns[4].ToPlainTextString().GetDateRegex("yyyy/MM/dd");
                        ATag   aTag     = tr.Columns[2].GetATag();
                        string tempName = aTag.LinkText.Replace("\n", "").Replace("\t", "").Replace("\r", "").Trim();
                        ItemName = tempName.GetRegexBegEnd("关于下达", "项目");
                        if (string.IsNullOrEmpty(ItemName))
                        {
                            ItemName = tempName.GetRegexBegEnd("关于调整下达", "项目");
                        }
                        if (string.IsNullOrEmpty(ItemName))
                        {
                            ItemName = tempName.GetRegexBegEnd("关于预安排", "项目");
                        }
                        if (string.IsNullOrEmpty(ItemName))
                        {
                            ItemName = tempName.GetRegexBegEnd("关于追加", "项目");
                        }
                        if (string.IsNullOrEmpty(ItemName))
                        {
                            ItemName = tempName.GetRegexBegEnd("关于", "项目");
                        }
                        if (string.IsNullOrEmpty(ItemName))
                        {
                            ItemName = tempName;
                        }
                        InfoUrl = "http://www.szlh.gov.cn" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl, Encoding.UTF8);
                        }
                        catch { continue; }

                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "main2")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            CtxHtml     = dtlNode.AsHtml();
                            ItemCtx     = CtxHtml.Replace("</p>", "\r\n").Replace("</tr>", "\r\n").ToCtxString();
                            TotalInvest = ItemCtx.GetRegexBegEnd("总投资控制在", "万元");
                            PlanInvest  = ItemCtx.GetRegexBegEnd("计划安排建设资金", "万元");
                            if (string.IsNullOrEmpty(TotalInvest))
                            {
                                TotalInvest = ItemCtx.GetRegexBegEnd("计划项目总投资", "万元").Replace("为", "");
                            }
                            if (string.IsNullOrEmpty(TotalInvest))
                            {
                                TotalInvest = ItemCtx.GetRegexBegEnd("计划共安排建设资金", "万元");
                            }
                            if (string.IsNullOrEmpty(TotalInvest))
                            {
                                TotalInvest = ItemCtx.GetRegexBegEnd("计划共安排投资", "万元");
                            }
                            if (string.IsNullOrEmpty(TotalInvest) || string.IsNullOrEmpty(PlanInvest))
                            {
                                parser = new Parser(new Lexer(CtxHtml));
                                NodeList inNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("align", "justify")));
                                if (inNode != null && inNode.Count > 0)
                                {
                                    string ctx = inNode.AsString();
                                    TotalInvest = ctx.GetRegexBegEnd("总投资控制在", "万元");
                                    PlanInvest  = ctx.GetRegexBegEnd("计划安排建设资金", "万元");
                                    if (string.IsNullOrEmpty(TotalInvest))
                                    {
                                        TotalInvest = ItemCtx.GetRegexBegEnd("计划项目总投资", "万元").Replace("为", "");
                                    }
                                    if (string.IsNullOrEmpty(TotalInvest))
                                    {
                                        TotalInvest = ItemCtx.GetRegexBegEnd("计划共安排建设资金", "万元");
                                    }
                                    if (string.IsNullOrEmpty(TotalInvest))
                                    {
                                        TotalInvest = ItemCtx.GetRegexBegEnd("计划共安排投资", "万元");
                                    }
                                }
                            }

                            parser = new Parser(new Lexer(CtxHtml));
                            NodeList contentNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "table_suoyin")));
                            if (contentNode != null && contentNode.Count > 0)
                            {
                                TableTag dtlTable = contentNode[0] as TableTag;
                                ItemContent = dtlTable.Rows[dtlTable.RowCount - 1].Columns[dtlTable.Rows[dtlTable.RowCount - 1].ColumnCount - 1].ToNodePlainString();
                            }

                            PlanType = "项目审批信息";
                            MsgType  = "深圳市罗湖区发改局";
                            ItemPlan info = ToolDb.GenItemPlan("广东省", "深圳市区", "罗湖区", ItemCode, ItemName, ItemAddress, BuildUnit, BuildNature, TotalInvest, PlanInvest, IssuedPlan, InvestSource, ApprovalUnit, ApprovalDate, ApprovalCode, MsgUnit, PlanDate, PlanType, PlanBeginDate, PlanEndDate, CtxHtml, ItemCtx, ItemContent, MsgType, InfoUrl);

                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Esempio n. 28
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <ItemPlan>();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl);
            }
            catch { }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "pagebar")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("/", "页");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://www.hljdpc.gov.cn/xzgs/index_" + i + ".jhtml");
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "right-list")), true), new TagNameFilter("dl")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        ATag aTag = listNode[j].GetATag();
                        if (aTag == null)
                        {
                            continue;
                        }
                        string ItemCode = string.Empty, ItemName = string.Empty, ItemAddress = string.Empty, BuildUnit = string.Empty, BuildNature = string.Empty, TotalInvest = string.Empty, PlanInvest = string.Empty, IssuedPlan = string.Empty, InvestSource = string.Empty, ApprovalUnit = string.Empty, ApprovalDate = string.Empty, ApprovalCode = string.Empty, MsgUnit = string.Empty, PlanDate = string.Empty, PlanType = string.Empty, PlanBeginDate = string.Empty, PlanEndDate = string.Empty, CtxHtml = string.Empty, ItemCtx = string.Empty, ItemContent = string.Empty, InfoUrl = string.Empty, MsgType = string.Empty;

                        ItemName = aTag.GetAttribute("title");
                        PlanDate = listNode[j].ToPlainTextString().GetDateRegex();
                        InfoUrl  = "http://www.hljdpc.gov.cn" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "MsoNormalTable")));
                        if (dtlNode == null || dtlNode.Count < 1)
                        {
                            parser.Reset();
                            dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "700")));
                        }
                        if (dtlNode == null || dtlNode.Count < 1)
                        {
                            parser.Reset();
                            dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "590")));
                        }
                        if (dtlNode == null || dtlNode.Count < 1)
                        {
                            parser.Reset();
                            dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "content")), true), new TagNameFilter("table")));
                        }
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            CtxHtml = dtlNode.AsHtml();
                            TableTag table = dtlNode[0] as TableTag;
                            for (int r = 0; r < table.RowCount; r++)
                            {
                                for (int c = 0; c < table.Rows[r].ColumnCount; c++)
                                {
                                    string temp = table.Rows[r].Columns[c].ToNodePlainString();
                                    if ((c + 1) % 2 == 0)
                                    {
                                        ItemCtx += temp.GetReplace(":,:") + "\r\n";
                                    }
                                    else
                                    {
                                        ItemCtx += temp.GetReplace(":,:") + ":";
                                    }
                                }
                            }

                            ItemCode     = ApprovalCode = ItemCtx.GetRegex("文件号");
                            ItemContent  = ItemCtx.GetRegex("主要内容", true, 500);
                            ApprovalDate = ItemCtx.GetRegex("生成日期").GetDateRegex("yyyy年MM月dd日");
                            MsgUnit      = ItemCtx.GetRegex("发布处室");
                            TotalInvest  = ItemCtx.GetRegexBegEnd("总投资", "万元").GetChina();

                            PlanType = "行政公示 ";
                            MsgType  = "黑龙江省发展和改革委员会";
                            ItemPlan info = ToolDb.GenItemPlan("黑龙江省", "黑龙江省及地市", "", ItemCode, ItemName, ItemAddress, BuildUnit, BuildNature, TotalInvest, PlanInvest, IssuedPlan, InvestSource, ApprovalUnit, ApprovalDate, ApprovalCode, MsgUnit, PlanDate, PlanType, PlanBeginDate, PlanEndDate, CtxHtml, ItemCtx, ItemContent, MsgType, InfoUrl);

                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Esempio n. 29
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <ItemPlan>();
            string html            = string.Empty;
            string cookiestr       = string.Empty;
            string viewState       = string.Empty;
            int    pageInt         = 1;
            string eventValidation = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl);
            }
            catch { return(null); }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "pagenav")), true), new TagNameFilter("a")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode[pageNode.Count - 1].GetATagHref();
                    pageInt = int.Parse(temp.GetReplace(new string[] { "/xxgk/spgbsp/Index_", ".html" }));
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://www.hnfgw.gov.cn/xxgk/spgbsp/Index_" + i + ".html");
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "list14bline")), true), new TagNameFilter("li")));
                if (listNode != null && listNode.Count > 0)
                {
                    for (int j = 0; j < listNode.Count; j++)
                    {
                        ATag aTag = listNode[j].GetATag();
                        if (aTag == null)
                        {
                            continue;
                        }
                        string ItemCode = string.Empty, ItemName = string.Empty, ItemAddress = string.Empty, BuildUnit = string.Empty, BuildNature = string.Empty, TotalInvest = string.Empty, PlanInvest = string.Empty, IssuedPlan = string.Empty, InvestSource = string.Empty, ApprovalUnit = string.Empty, ApprovalDate = string.Empty, ApprovalCode = string.Empty, MsgUnit = string.Empty, PlanDate = string.Empty, PlanType = string.Empty, PlanBeginDate = string.Empty, PlanEndDate = string.Empty, CtxHtml = string.Empty, ItemCtx = string.Empty, ItemContent = string.Empty, InfoUrl = string.Empty, MsgType = string.Empty;

                        ItemName = aTag.GetAttribute("title").Replace("我委批复", "");
                        PlanDate = listNode[j].ToPlainTextString().GetDateRegex();
                        InfoUrl  = "http://www.hnfgw.gov.cn" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "PrintTxt")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            CtxHtml     = dtlNode.AsHtml();
                            ItemCtx     = CtxHtml.ToCtxString();
                            TotalInvest = ItemCtx.GetRegexBegEnd("总投资", "万元").GetChina();

                            PlanType = "项目审批信息";
                            MsgType  = "湖南省发展和改革委员会";
                            ItemPlan info = ToolDb.GenItemPlan("湖南省", "湖南省及地市", "", ItemCode, ItemName, ItemAddress, BuildUnit, BuildNature, TotalInvest, PlanInvest, IssuedPlan, InvestSource, ApprovalUnit, ApprovalDate, ApprovalCode, MsgUnit, PlanDate, PlanType, PlanBeginDate, PlanEndDate, CtxHtml, ItemCtx, ItemContent, MsgType, InfoUrl);

                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }
Esempio n. 30
0
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList  list            = new List <ItemPlan>();
            int    pageInt         = 1;
            string html            = string.Empty;
            string viewState       = string.Empty;
            string eventValidation = string.Empty;
            string cookiestr       = string.Empty;

            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl);
            }
            catch
            {
                return(list);
            }
            Parser   parser   = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "pagebox")));

            if (pageNode != null && pageNode.Count > 0)
            {
                try
                {
                    string temp = pageNode.AsString().GetRegexBegEnd("共", "页");
                    pageInt = int.Parse(temp);
                }
                catch { }
            }
            for (int i = 1; i <= pageInt; i++)
            {
                if (i > 1)
                {
                    try
                    {
                        html = this.ToolWebSite.GetHtmlByUrl("http://www.ynbidding.net/classlist.aspx?no-cache=0.04312339340010729&id=685790278180&id=://www.ynbidding.net/list&page=" + i + "&_=");
                    }
                    catch { continue; }
                }
                parser = new Parser(new Lexer(html));
                NodeList listNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("width", "100%")));
                if (listNode != null && listNode.Count > 0)
                {
                    TableTag table = listNode[0] as TableTag;
                    for (int j = 1; j < table.RowCount; j++)
                    {
                        TableRow tr   = table.Rows[j];
                        ATag     aTag = tr.GetATag();
                        if (aTag == null)
                        {
                            continue;
                        }
                        string ItemCode = string.Empty, ItemName = string.Empty, ItemAddress = string.Empty, BuildUnit = string.Empty, BuildNature = string.Empty, TotalInvest = string.Empty, PlanInvest = string.Empty, IssuedPlan = string.Empty, InvestSource = string.Empty, ApprovalUnit = string.Empty, ApprovalDate = string.Empty, ApprovalCode = string.Empty, MsgUnit = string.Empty, PlanDate = string.Empty, PlanType = string.Empty, PlanBeginDate = string.Empty, PlanEndDate = string.Empty, CtxHtml = string.Empty, ItemCtx = string.Empty, ItemContent = string.Empty, InfoUrl = string.Empty, MsgType = string.Empty;
                        ItemName = aTag.LinkText;
                        PlanDate = tr.Columns[0].ToNodePlainString().GetDateRegex("yyyy/MM/dd");
                        InfoUrl  = "http://www.ynbidding.net" + aTag.Link;
                        string htmldtl = string.Empty;
                        try
                        {
                            htmldtl = this.ToolWebSite.GetHtmlByUrl(InfoUrl).GetJsString();
                        }
                        catch { continue; }
                        parser = new Parser(new Lexer(htmldtl));
                        NodeList dtlNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("td"), new HasAttributeFilter("id", "Content")));
                        if (dtlNode != null && dtlNode.Count > 0)
                        {
                            CtxHtml  = dtlNode.AsHtml();
                            ItemCtx  = CtxHtml.ToCtxString();
                            PlanType = "项目信息";
                            MsgType  = "云南省发展和改革委员会";
                            ItemPlan info = ToolDb.GenItemPlan("云南省", "云南省及地市", "", ItemCode, ItemName, ItemAddress, BuildUnit, BuildNature, TotalInvest, PlanInvest, IssuedPlan, InvestSource, ApprovalUnit, ApprovalDate, ApprovalCode, MsgUnit, PlanDate, PlanType, PlanBeginDate, PlanEndDate, CtxHtml, ItemCtx, ItemContent, MsgType, InfoUrl);

                            list.Add(info);
                            if (!crawlAll && list.Count >= this.MaxCount)
                            {
                                return(list);
                            }
                        }
                    }
                }
            }
            return(list);
        }