Пример #1
0
        static void Main(string[] args)
        {
            string urlfx = string.Empty;
            string title = string.Empty;

            ArrayList keyword = keywords();
            DataTable dt      = DBZheng.getDataTable("SELECT *  FROM [dbo].[caijiset] where cishu>0 and state=1 order by id desc ");

            if (dt != null && dt.Rows.Count > 0)
            {
                //string str = "";
                //DataRow dr = dt.Rows[0];
                //string chinaname = dr["chinaname"].ToString();
                foreach (DataRow dr in dt.Rows)
                {
                    Caiji cj = new Caiji();
                    cj.id        = Convert.ToInt16(dr["id"]);
                    cj.chinaname = dr["chinaname"].ToString();

                    cj.classname = dr["classname"].ToString();
                    cj.url       = dr["url"].ToString();
                    cj.domain    = dr["domain"].ToString();
                    cj.startstr  = dr["startstr"].ToString();

                    cj.endstr      = dr["endstr"].ToString();
                    cj.configstart = dr["configstart"].ToString();
                    cj.configend   = dr["configend"].ToString();
                    cj.textstart   = dr["textstart"].ToString();
                    cj.textend     = dr["textend"].ToString();

                    cj.fwhaostart = dr["fwhaostart"].ToString();
                    cj.fwhaoend   = dr["fwhaoend"].ToString();

                    cj.cwdatestart = dr["cwdatestart"].ToString();
                    cj.cwdateend   = dr["cwdateend"].ToString();

                    cj.fwjigoustart = dr["fwjigoustart"].ToString();
                    cj.fwjigouend   = dr["fwjigouend"].ToString();

                    cj.fbriqistart = dr["fbriqistart"].ToString();
                    cj.fbriqiend   = dr["fbriqiend"].ToString();

                    cj.urlstart = dr["urlstart"].ToString();
                    cj.urlend   = dr["urlend"].ToString();

                    cj.titlestart = dr["titlestart"].ToString();
                    cj.titleend   = dr["titleend"].ToString();
                    try { cj.cityid = Convert.ToInt32(dr["cityid"]); }
                    catch { cj.cityid = 0; }

                    Console.WriteLine("采集网站:" + cj.chinaname);
                    Console.WriteLine("采集分类:" + cj.classname);
                    Console.WriteLine("采集网址:" + cj.url);
                    string Html = GetHtml(cj.url);
                    //Console.WriteLine(html);
                    //提取首页内容字符
                    if ((Html.IndexOf(cj.startstr.Trim()) < 0) || (Html.IndexOf(cj.endstr.Trim()) < 0))
                    {
                        string sql1   = @"update [dbo].[caijiset] set error=0 where id=" + cj.id + "";
                        int    count1 = DBZheng.getRowsCount(sql1);
                        if (count1 > 0)
                        {
                            return;
                        }
                    }
                    string Introduce = Html.Substring(Html.IndexOf(cj.startstr.Trim()));
                    Introduce = Introduce.Remove(Introduce.IndexOf(cj.endstr.Trim())).Trim();

                    ArrayList     al     = GetMatchesStr(Introduce, "<a[^>]*?>.*?</a>");
                    StringBuilder sb     = new StringBuilder();
                    int           i      = 0;
                    int           icount = 0;
                    int           cf     = 0;
                    foreach (object var in al)
                    {
                        string a = var.ToString().Replace("\"", "").Replace("'", "");
                        a = Regex.Replace(a, cj.urlstart, "", RegexOptions.IgnoreCase | RegexOptions.Multiline); //提取url 地址 开始
                        //string[] urlname = a.Split(' target=_blank>');
                        urlfx = cj.domain.Trim() + a.Substring(0, a.IndexOf(cj.urlend));                         //提取url 地址 结束
                        title = a.Substring(a.IndexOf(cj.titlestart) + cj.titlestart.Length);
                        title = title.Remove(title.IndexOf(cj.titleend.Trim())).Trim();
                        if (a.StartsWith("/"))
                        {
                            a = "" + cj.domain.Trim() + a;
                        }
                        if (!a.StartsWith("http://"))
                        {
                            a = "http://" + a;
                        }
                        else
                        {
                            a = "<a href=" + a;
                        }


                        sb.Append(a + "/r/n");
                        i++;
                        urlfx   = urlfx.Replace("amp;", "");
                        icount += GetHtmlfeixi(urlfx.Trim(), title, cj, keyword);
                    }
                    //Console.WriteLine(sb.ToString());//把提取到网址输出到一个textBox,每个链接占一行

                    Console.WriteLine("共提取" + al.Count.ToString() + "个链接,过滤关键词,采集有效数据:" + youxiao.ToString() + "条,过滤重复:" + chongfu.ToString() + "条,插入数据库成功" + icount + "条,");
                    chongfu = 0;
                    youxiao = 0;
                    if (icount > 0)
                    {
                        string sql = @"update [dbo].[caijiset] set cishu=0 where cishu=1 and id=" + cj.id + "; ";

                        sql += " update [dbo].[caijiset] set lasttime='" + DateTime.Now + "',lastnum='" + icount + "' where id=" + cj.id + "; ";
                        sql += " INSERT INTO [dbo].[caijinum] ([caijiid],[caijitime],[caijinum]) VALUES('" + cj.id + "','" + DateTime.Now + "','" + icount + "');";

                        int count = DBZheng.getRowsCount(sql);
                        //if (count > 0)
                        {
                            Console.WriteLine("采集配制id成功:" + cj.id);
                        }
                    }
                    System.Threading.Thread.Sleep(500);
                }
            }
            else
            {
                Console.WriteLine("没有查询到配制信息,请先配制政策采集信息! ");
            }
            Console.WriteLine("程序将在10秒后,自动退出! ");
            System.Threading.Thread.Sleep(10000);
        }
Пример #2
0
        public static int GetHtmlfeixi(string url, string title, Caiji cj, ArrayList keyword)
        {
            int    count = 0;
            string sql   = string.Empty;
            string Html  = GetHtml(url.Trim());

            try
            {
                Html = Html.ToString().Replace("\"", "").Replace("'", "");

                string xinxi = Html.Substring(Html.IndexOf(cj.configstart.Trim()));
                xinxi = xinxi.Remove(xinxi.IndexOf(cj.configend.Trim())).Trim();                                   //信息 //Response.Write( xinxi);

                string Introduce = Html.Substring(Html.IndexOf(cj.textstart.Trim()) + cj.textstart.Trim().Length); //全文内容
                Introduce = Introduce.Remove(Introduce.IndexOf(cj.textend.Trim())).Trim();                         //内容
                //关键词过滤
                bool   bkeyword = false;
                string keys     = "";
                //////取消关健过滤
                //////foreach (object k1 in keyword)
                //////{
                //////    if (Introduce.Contains(k1.ToString()) || title.Contains(k1.ToString()))
                //////    {
                //////        bkeyword = true;
                //////        keys = k1.ToString();
                //////        break;

                //////    }
                //////}
                //////if (bkeyword)
                //////
                {
                    youxiao++;
                    //string sql = @"INSERT INTO [dbo].[zhengce]
                    //               ([mingcheng]           ,[wenhao]           ,[faburiqi]           ,[fawendanwen]
                    //               ,[cengji]           ,[buwensheng]           ,[gongcheng]           ,[lingyu]
                    //               ,[yiju]           ,[mubiao]           ,[youxiaoqi]           ,[hangye]
                    //               ,[chanpin]            ,[zhengceqw]           ,[zcywdizhi]         ,[state]           ,[createdate]
                    //                          ,[userid])
                    //         VALUES
                    //               ('" + Common.strFilter(mingcheng.Text) + "','" + Common.strFilter(wenhao.Text) + "','" + Common.strFilter(faburiqi.Text) + "','" + Common.strFilter(fawendanwen.Text) + "','" +
                    //                     Common.strFilter(cengji.SelectedValue) + "','" + Common.strFilter(buwensheng.Text) + "','" + Common.strFilter(gongcheng.Text) + "','" + Common.strFilter(lingyu.Text) + "','" +
                    //                     Common.strFilter(yiju.Text) + "','" + Common.strFilter(mubiao.Text) + "','" + Common.strFilter(youxiaoqi.Text) + "','" + Common.strFilter(hangye.SelectedValue) + "','" +
                    //                     Common.strFilter(chanpin.Text) + "','" + Common.strFilter(content.Text) + "','" + Common.strFilter(zcywdizhi.Text) + "',1,'" + DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss")
                    //                     + "','" + Session["userid"] + "')";

                    string wenhao   = chuli(xinxi, cj.fwhaostart, cj.fwhaoend, cj.fwhaostart.Length);
                    string faburiqi = chuli(xinxi, cj.cwdatestart, cj.cwdateend, cj.cwdatestart.Length);
                    faburiqi = shijiancl(faburiqi, faburiqi.Length);
                    string fawendanwen = chuli(xinxi, cj.fwjigoustart, cj.fwjigouend, cj.fwjigoustart.Length);
                    string youxiaoqi   = chuli(xinxi, cj.fbriqistart, cj.fbriqiend, cj.fbriqistart.Length);
                    youxiaoqi = shijiancl(youxiaoqi, youxiaoqi.Length);
                    string sqlc   = "select * from zhengce where mingcheng='" + Common.strFilter(title) + "' and wenhao='" + Common.strFilter(wenhao) + "' ";
                    int    icount = DBZheng.getSelectRowsCount(sqlc);
                    if (icount == 0)//过滤重复数据
                    {
                        sql = @"INSERT INTO [dbo].[zhengce]
                                       ([mingcheng]          ,[wenhao]           ,[faburiqi]           ,[fawendanwen],[youxiaoqi]
                                        ,[zhengceqw]           ,[zcywdizhi]         ,[state]           ,[createdate]
                                                  ,[userid],[caiji],[keys],[buwensheng],[url])
                                 VALUES
                                       ('" + Common.strFilter(title) + "','" + Common.strFilter(wenhao) + "','" + Common.strFilter(faburiqi) + "','" + Common.strFilter(fawendanwen) + "','" + Common.strFilter(youxiaoqi)
                              + "','" + Common.strFilter(Introduce) + "','" + Common.strFilter(url) + "',1,'" + DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss")
                              + "','888',1,'" + Common.strFilter(keys) + "','" + Common.strFilter(cj.cityid.ToString()) + "','" + Common.strFilter(cj.url.ToString()) + "')";
                        count = DBZheng.getRowsCount(sql);
                    }
                    else
                    {
                        icount++;
                        chongfu++;
                    }
                }
            }
            catch { return(0); }
            if (count > 0)
            {
                return(1);
            }
            else
            {
                //数据插入错误处理
                string sql1 = "INSERT INTO[dbo].[Log]([tablename],[op],[sql],[update])VALUES('zhengce','INSERT','" + sql + "','" + DateTime.Now + "')";
                DBZheng.getRowsCount(sql1);
                return(0);
            }
        }