Пример #1
0
        private void ReadInfo(GetPage page, string strLink, List <PackageInfo> rList)
        {
            const string strUrl     = "http://www.gdgpo.gov.cn";
            string       strContent = page.GetPageInfo(strLink);

            string[] strRegs = { "title=\"[^\"]*",        //项目名称
                                 "<em>.*<",               //时间
                                 page.GetValidKey,
                                 "/showNotice/id[^\"]*",  //详情url地址(序号)
                                 "项目编号[^<]*</span>",      //编号
                                 "预算金额(元)[^(span)]*span", //价格
                                 "受.*的委托",                //学校
            };


            Regex[] rUrl = new Regex[strRegs.Length];
            for (int i = 0; i < strRegs.Length; i++)
            {
                rUrl[i] = new Regex(strRegs[i]);
            }
            MatchCollection mList = rUrl[0].Matches(strContent);

            for (int i = 0; i < mList.Count; i++)
            {
                if (rUrl[2].Matches(mList[i].Value).Count == 0)
                {//没发现
                    continue;
                }

                try
                {
                    PackageInfo p = new PackageInfo();

                    p.Name  = mList[i].Value.Substring(7);
                    p.Time  = rUrl[1].Matches(strContent)[i].Value.Substring(4);
                    p.Time  = p.Time.Substring(0, p.Time.Length - 1);
                    p.Index = rUrl[3].Matches(strContent)[i].Value;
                    p.Url   = strUrl;

                    string strDetail = page.GetPageInfo(strUrl + p.Index);

                    if (strDetail.Length > 0)
                    {
                        p.Number = rUrl[4].Match(strDetail).Value.Substring(5);
                        p.Number = p.Number.Substring(0, p.Number.IndexOf('<'));
                        p.Price  = rUrl[5].Match(strDetail).Value.Substring(8);
                        p.Price  = p.Price.Substring(0, p.Price.IndexOf('<'));
                        p.Seller = rUrl[6].Match(strDetail).Value;
                    }
                    rList.Add(p);
                }
                catch (Exception e)
                {
                    new SaveInfo().WriteError(e);
                }
            }
        }
Пример #2
0
        public List <PackageInfo> GetPackage()
        {
            const string strUrl     = "http://www.gdedulscg.cn/";
            GetPage      page       = new GetPage();
            string       strUrlLink = "http://www.gdedulscg.cn/home/bill/billlist";
            string       strContent = page.GetPageInfo(strUrlLink);

            string[]           strRegs = { "list_title_num_data.*</div>",         //编号
                                           "list_title_unit_data.*</div>",        //学校(采购单位)
                                           "<div.*list_title_theme_data.*</div>", //项目名称
                                           "list_title_high_data.*</div>",        //学校报价
                                           "list_title_time_data.*</div>",        //发布时间
                                           page.GetValidKey,
                                           "=\"[^\"]*\"",                         //项目名称 - 具体文字
                                           ">.*</div>",                           //编号,价格,学校,时间
                                           "see_info.*;",                         //序号
            };
            List <PackageInfo> rList = new List <PackageInfo>();

            Regex[] rUrl = new Regex[strRegs.Length];
            for (int i = 0; i < strRegs.Length; i++)
            {
                rUrl[i] = new Regex(strRegs[i]);
            }

            MatchCollection mList = rUrl[2].Matches(strContent);

            for (int i = 0; i < mList.Count; i++)
            {
                if (rUrl[5].Matches(mList[i].Value).Count == 0)
                {//没发现
                    continue;
                }

                try
                {
                    rList.Insert(0, new PackageInfo()
                    {
                        Name   = rUrl[6].Matches(mList[i].Value)[0].Value.Substring(2),
                        Number = rUrl[7].Matches(rUrl[0].Matches(strContent)[i].Value)[0].Value.Substring(1),
                        Index  = rUrl[8].Matches(mList[i].Value)[0].Value.Substring(8),
                        Price  = rUrl[7].Matches(rUrl[3].Matches(strContent)[i].Value)[0].Value.Substring(1),
                        Seller = rUrl[7].Matches(rUrl[1].Matches(strContent)[i].Value)[0].Value.Substring(1),
                        Time   = rUrl[7].Matches(rUrl[4].Matches(strContent)[i].Value)[0].Value.Substring(1),
                        Url    = strUrl
                    });
                }
                catch (Exception e)
                {
                    new SaveInfo().WriteError(e);
                }
            }
            return(rList);
        }
Пример #3
0
        public List <PackageInfo> GetPackage()
        {
            GetPage page = new GetPage();

            string[] strUrlLink = { "http://www.gdgpo.gov.cn/queryMoreCityCountyInfoList2.do",
                                    "http://www.gdgpo.gov.cn/queryMoreCityCountyInfoList2/channelCode/00051.html" };

            List <PackageInfo> rList = new List <PackageInfo>();

            foreach (string str in strUrlLink)
            {
                this.ReadInfo(page, str, rList);
            }
            return(rList);
        }
Пример #4
0
        private string GetNewsContent()
        {
            GetPage page       = new GetPage();
            string  strUrlLink = "https://search.cctv.com/ifsearch.php?page=1&qtext=%E6%96%B0%E9%97%BB%E8%81%94%E6%92%AD&sort=relevance&pageSize=20&type=video&vtime=-1&datepid=1&channel=&pageflag=0&qtext_str=%E6%96%B0%E9%97%BB%E8%81%94%E6%92%AD";
            string  strContent = page.GetPageInfo(strUrlLink);
            string  strRegex   = DateTime.Today.ToString("yyyyMMdd") + ".*http[^(\")]*shtml.*" + DateTime.Today.ToString("yyyy-MM-dd");
            Regex   rDateUrl   = new Regex(strRegex);

            strContent = rDateUrl.Match(strContent).Value;

            if (strContent.Equals(string.Empty))
            {
                return(string.Empty);
            }

            strRegex = "http[^(\")]*shtml";
            Regex rUrl = new Regex(strRegex);

            strContent = rUrl.Match(strContent).Value;

            strContent = page.GetPageInfo(strContent.Replace("\\/", "/"));

            //有时内容会加换行导致p的正则无法匹配
            //Regex rContent = new Regex("<p>.*</p>");
            //strContent = rContent.Matches(strContent)[2].Value;
            //int strStart = strContent.IndexOf("本期节目");
            //strContent = strContent.Substring(strStart);
            //strContent = strContent.Substring(0, strContent.Length - 4);

            int strStart = strContent.IndexOf("本期节目主要内容");
            int strEnd   = strContent.IndexOf("(《新闻联播》");

            if (strStart > 0 && strEnd > strStart)
            {
                strContent = strContent.Substring(strStart, strEnd - strStart);
            }
            else
            {
                strContent = "内容解析错误";
            }

            return(strContent);
        }