Exemple #1
0
        private static ArrayList keywords()
        {
            ArrayList al2 = new ArrayList();
            DataTable dt;
            string    sql = @"SELECT   [ID]      ,[SettingID]      ,[Name]  FROM [dbo].[Setting] where [SettingID]='53' and (state=1 or state is null)";

            dt = DBZheng.getDataTable(sql);
            try
            {
                if (dt.Rows.Count > 0)
                {
                    string str = "";
                    //DataRow dr = dt.Rows[0];

                    //int i = 0;
                    foreach (DataRow dr in dt.Rows)
                    {
                        //keyword[i] = dr["Name"].ToString();
                        //i++;
                        al2.Add(dr["Name"].ToString());
                    }
                }
            }
            catch
            {
                //myGrid. = 0;
            }
            finally
            {
            }
            return(al2);
        }
Exemple #2
0
        static void Main(string[] args)
        {
            string urlfx = string.Empty;
            string title = string.Empty;

            ArrayList keyword = keywords();
            DataTable dt      = DBZheng.getDataTable("SELECT *  FROM [dbo].[caijiset] where cishu>0 and state=1 order by id desc ");

            if (dt != null && dt.Rows.Count > 0)
            {
                //string str = "";
                //DataRow dr = dt.Rows[0];
                //string chinaname = dr["chinaname"].ToString();
                foreach (DataRow dr in dt.Rows)
                {
                    Caiji cj = new Caiji();
                    cj.id        = Convert.ToInt16(dr["id"]);
                    cj.chinaname = dr["chinaname"].ToString();

                    cj.classname = dr["classname"].ToString();
                    cj.url       = dr["url"].ToString();
                    cj.domain    = dr["domain"].ToString();
                    cj.startstr  = dr["startstr"].ToString();

                    cj.endstr      = dr["endstr"].ToString();
                    cj.configstart = dr["configstart"].ToString();
                    cj.configend   = dr["configend"].ToString();
                    cj.textstart   = dr["textstart"].ToString();
                    cj.textend     = dr["textend"].ToString();

                    cj.fwhaostart = dr["fwhaostart"].ToString();
                    cj.fwhaoend   = dr["fwhaoend"].ToString();

                    cj.cwdatestart = dr["cwdatestart"].ToString();
                    cj.cwdateend   = dr["cwdateend"].ToString();

                    cj.fwjigoustart = dr["fwjigoustart"].ToString();
                    cj.fwjigouend   = dr["fwjigouend"].ToString();

                    cj.fbriqistart = dr["fbriqistart"].ToString();
                    cj.fbriqiend   = dr["fbriqiend"].ToString();

                    cj.urlstart = dr["urlstart"].ToString();
                    cj.urlend   = dr["urlend"].ToString();

                    cj.titlestart = dr["titlestart"].ToString();
                    cj.titleend   = dr["titleend"].ToString();
                    try { cj.cityid = Convert.ToInt32(dr["cityid"]); }
                    catch { cj.cityid = 0; }

                    Console.WriteLine("采集网站:" + cj.chinaname);
                    Console.WriteLine("采集分类:" + cj.classname);
                    Console.WriteLine("采集网址:" + cj.url);
                    string Html = GetHtml(cj.url);
                    //Console.WriteLine(html);
                    //提取首页内容字符
                    if ((Html.IndexOf(cj.startstr.Trim()) < 0) || (Html.IndexOf(cj.endstr.Trim()) < 0))
                    {
                        string sql1   = @"update [dbo].[caijiset] set error=0 where id=" + cj.id + "";
                        int    count1 = DBZheng.getRowsCount(sql1);
                        if (count1 > 0)
                        {
                            return;
                        }
                    }
                    string Introduce = Html.Substring(Html.IndexOf(cj.startstr.Trim()));
                    Introduce = Introduce.Remove(Introduce.IndexOf(cj.endstr.Trim())).Trim();

                    ArrayList     al     = GetMatchesStr(Introduce, "<a[^>]*?>.*?</a>");
                    StringBuilder sb     = new StringBuilder();
                    int           i      = 0;
                    int           icount = 0;
                    int           cf     = 0;
                    foreach (object var in al)
                    {
                        string a = var.ToString().Replace("\"", "").Replace("'", "");
                        a = Regex.Replace(a, cj.urlstart, "", RegexOptions.IgnoreCase | RegexOptions.Multiline); //提取url 地址 开始
                        //string[] urlname = a.Split(' target=_blank>');
                        urlfx = cj.domain.Trim() + a.Substring(0, a.IndexOf(cj.urlend));                         //提取url 地址 结束
                        title = a.Substring(a.IndexOf(cj.titlestart) + cj.titlestart.Length);
                        title = title.Remove(title.IndexOf(cj.titleend.Trim())).Trim();
                        if (a.StartsWith("/"))
                        {
                            a = "" + cj.domain.Trim() + a;
                        }
                        if (!a.StartsWith("http://"))
                        {
                            a = "http://" + a;
                        }
                        else
                        {
                            a = "<a href=" + a;
                        }


                        sb.Append(a + "/r/n");
                        i++;
                        urlfx   = urlfx.Replace("amp;", "");
                        icount += GetHtmlfeixi(urlfx.Trim(), title, cj, keyword);
                    }
                    //Console.WriteLine(sb.ToString());//把提取到网址输出到一个textBox,每个链接占一行

                    Console.WriteLine("共提取" + al.Count.ToString() + "个链接,过滤关键词,采集有效数据:" + youxiao.ToString() + "条,过滤重复:" + chongfu.ToString() + "条,插入数据库成功" + icount + "条,");
                    chongfu = 0;
                    youxiao = 0;
                    if (icount > 0)
                    {
                        string sql = @"update [dbo].[caijiset] set cishu=0 where cishu=1 and id=" + cj.id + "; ";

                        sql += " update [dbo].[caijiset] set lasttime='" + DateTime.Now + "',lastnum='" + icount + "' where id=" + cj.id + "; ";
                        sql += " INSERT INTO [dbo].[caijinum] ([caijiid],[caijitime],[caijinum]) VALUES('" + cj.id + "','" + DateTime.Now + "','" + icount + "');";

                        int count = DBZheng.getRowsCount(sql);
                        //if (count > 0)
                        {
                            Console.WriteLine("采集配制id成功:" + cj.id);
                        }
                    }
                    System.Threading.Thread.Sleep(500);
                }
            }
            else
            {
                Console.WriteLine("没有查询到配制信息,请先配制政策采集信息! ");
            }
            Console.WriteLine("程序将在10秒后,自动退出! ");
            System.Threading.Thread.Sleep(10000);
        }