Example #1
0
        //导入数据总数
        public int GetTotal(GetTotalDelegate gettotaldelegate)
        {
            int          total = 0;
            wznr_Servise wznr  = new wznr_Servise();
            Stopwatch    sw    = new Stopwatch();
            DataTable    dt    = wznr.GetOldbDataTable("select * from Content");//wznr.GetDataTable1("select * from News");//wznr.GetDataTable("select * from News"); //wznr.GetOldbDataTable("select 标题,内容 from Content");

            if (dt.Rows.Count > 0)
            {
                for (int i = 0; i < dt.Rows.Count; i++)
                {
                    total++;
                    gettotaldelegate(total);
                }
            }

            return(total);
        }
Example #2
0
        public void MyThreadInsert(GetTotalDelegate getTotalRecordsDelegate, System.Diagnostics.Stopwatch sw, int pagesize)
        {
            int          totalRecords = 0;
            FenCiHelper  fch          = new FenCiHelper();
            wznr_Servise wznr         = new wznr_Servise();
            DataTable    dt           = new DataTable();
            string       sqlitesql    = "SELECT  标题,内容 FROM  content ORDER BY id asc LIMIT 5000 OFFSET (5000*'" + pagesize + "')";

            //dtsqllist.Add(sqlitesql);
            dt = new SqliteHelper().GetQuery(sqlitesql);                                                                                //查询sqlite数据库new SqliteHelper().gettablequert(dtsqllist); //
            dt.Columns.AddRange(new DataColumn[] { new DataColumn("keys", typeof(string)), new DataColumn("bztype", typeof(string)) }); //添加新列
            DataRow dr = dt.NewRow();                                                                                                   //实例化新行

            for (int k = 0; k < dt.Rows.Count; k++)
            {
                totalRecords++;
                getTotalRecordsDelegate(totalRecords);
                string title   = Convert.ToString(dt.Rows[k][0]);  //获取到标题
                string content = Convert.ToString(dt.Rows[k][1]);  //获取到内容
                //调用PanguFenCi(aa)进行分词添加到datatable中批量录入到关键词表中。
                title = fch.PanguFenCi(title);
                //调用PanGuContentFenCi(content)进行分词同时输出同义词
                dt.Rows[k][1] = content;
                dt.Rows[k][2] = title;
                dt.Rows[k][3] = 1;
                //Thread.Sleep(Convert.ToInt32(pagesize));
                //threadEvent.Invoke(k, new EventArgs());//通知主界面我正在执行,i表示进度条当前进度
            }
            string sql = "insert into test3Table (Title,Content,keys,bztype)" +
                         " SELECT  nc.Title,nc.Content,nc.keys,nc.bztype" +
                         " FROM @NewBulkTestTvp AS nc";

            sw.Start();
            wznr.TableValuedToDB(dt, sql, "dbo.test3Udt");
            sw.Stop();
            getTotalRecordsDelegate(totalRecords);
        }
Example #3
0
        //导入数据十万级数据
        public void insertsqliteshuju(GetTotalDelegate getTotalRecordsDelegate, System.Diagnostics.Stopwatch sw)
        {
            int          totalRecords = 0;
            FenCiHelper  fch          = new FenCiHelper();
            wznr_Servise wznr         = new wznr_Servise();
            int          num          = new SqliteHelper().GetMaxID("ID", "Content");

            string line;
            string p;//每个p标签
            int    pagesize = num / 1000;

            if (pagesize == 0)
            {
                DataTable dt = wznr.GetOldbDataTable("select 标题,内容 from Content");                                                          //wznr.GetDataTable1("SELECT Title, Content FROM News");
                dt.Columns.AddRange(new DataColumn[] { new DataColumn("keys", typeof(string)), new DataColumn("bztype", typeof(string)) }); //添加新列
                DataRow   dr    = dt.NewRow();                                                                                              //实例化新行
                DataTable newdt = wznr.GetTableSchema();
                DataRow   newdr;
                for (int i = 0; i < dt.Rows.Count; i++)//添加新列分词录入
                {
                    totalRecords++;
                    getTotalRecordsDelegate(totalRecords);
                    string title   = Convert.ToString(dt.Rows[i][0]); //获取到标题
                    string content = Convert.ToString(dt.Rows[i][1]); //获取到内容
                    //string bztype = Convert.ToString(dt.Rows[i][2]);//类别
                    //调用PanguFenCi(aa)进行分词添加到datatable中批量录入到关键词表中。
                    title = fch.PanguFenCi(title);
                    //p标签获取获取每个p---2012-5-18日修改
                    //p = GetPhtml(content);
                    //获取路径,循环每次都需要读取文本文件里设置的关键词
                    string path    = Directory.GetCurrentDirectory();
                    string txtpath = path + @"\App_Data\sDict.txt";
                    //读取文本内容逐行
                    StreamReader file = new StreamReader(txtpath);
                    //分类操作,读取文本与标题分词判断
                    string[] arr = title.Split('/');
                    while ((line = file.ReadLine()) != null)
                    {
                        string[] arrtxt = line.Split(',');
                        for (int j = 0; j < arr.Length; j++)
                        {
                            if (arrtxt[0].Equals(arr[j]))
                            {
                                dt.Rows[i][3] += arrtxt[1] + ",";//类别
                                break;
                                //strtxt = arrtxt[1];//所属类别
                            }
                        }
                    }
                    //调用PanGuContentFenCi(content)进行分词同时输出同义词
                    dt.Rows[i][1] = fch.PanGuContentFenCi(content);
                    dt.Rows[i][2] = title;
                    //dt.Rows[i][3] = bztype;
                    foreach (Match m in Regex.Matches(content, @"<(\w+)>[^P]*[^<]*</(\w+)>"))
                    {
                        newdr           = newdt.NewRow();
                        newdr["Title"]  = dt.Rows[i][0];
                        newdr["p"]      = m.Value;
                        newdr["p1"]     = content;
                        newdr["keys"]   = title;
                        newdr["bztype"] = dt.Rows[i][3];
                        newdt.Rows.Add(newdr);
                    }

                    //newdt.Rows.Add(newdr);
                }
                string sql = "insert into test3Table (Title,Content,keys,bztype)" +
                             " SELECT  nc.Title,nc.Content,nc.keys,nc.bztype" +
                             " FROM @NewBulkTestTvp AS nc";
                string sqlp = "insert into test4Table (Title,p,p1,keys,bztype)" +
                              " SELECT  nc.Title,nc.p,nc.p1,nc.keys,nc.bztype" +
                              " FROM @NewBulkTestTvp AS nc";
                sw.Start();
                wznr.TableValuedToDB(dt, sql, "dbo.test3Udt");
                wznr.TableValuedToDB(newdt, sqlp, "dbo.test4Udt");
                sw.Stop();
                getTotalRecordsDelegate(totalRecords);
            }
            else
            {
                pagesize = pagesize + 1;
                List <String> dtsqllist = new List <string>();
                DataTable     dt        = new DataTable();
                for (int i = 0; i < pagesize; i++)
                {
                    string sqlitesql = "SELECT  标题,内容 FROM  content ORDER BY id asc LIMIT 1000 OFFSET (1000*'" + i + "')";
                    //dtsqllist.Add(sqlitesql);
                    dt = new SqliteHelper().GetQuery(sqlitesql);                                                                                //查询sqlite数据库new SqliteHelper().gettablequert(dtsqllist); //
                    dt.Columns.AddRange(new DataColumn[] { new DataColumn("keys", typeof(string)), new DataColumn("bztype", typeof(string)) }); //添加新列
                    DataRow dr = dt.NewRow();                                                                                                   //实例化新行
                    for (int k = 0; k < dt.Rows.Count; k++)
                    {
                        totalRecords++;
                        getTotalRecordsDelegate(totalRecords);
                        string title   = Convert.ToString(dt.Rows[k][0]); //获取到标题
                        string content = Convert.ToString(dt.Rows[k][1]); //获取到内容
                        //调用PanguFenCi(aa)进行分词添加到datatable中批量录入到关键词表中。
                        title = fch.PanguFenCi(title);
                        //调用PanGuContentFenCi(content)进行分词同时输出同义词
                        dt.Rows[k][1] = content;
                        dt.Rows[k][2] = title;
                        dt.Rows[k][3] = 1;
                    }
                    string sql = "insert into test3Table (Title,Content,keys,bztype)" +
                                 " SELECT  nc.Title,nc.Content,nc.keys,nc.bztype" +
                                 " FROM @NewBulkTestTvp AS nc";
                    sw.Start();
                    wznr.TableValuedToDB(dt, sql, "dbo.test3Udt");
                    sw.Stop();
                    getTotalRecordsDelegate(totalRecords);
                }
            }
        }
Example #4
0
        //导入数据
        public void fBatchImport(GetTotalDelegate getTotalRecordsDelegate, System.Diagnostics.Stopwatch sw)
        {
            int          totalRecords = 0;
            FenCiHelper  fch          = new FenCiHelper();
            wznr_Servise wznr         = new wznr_Servise();
            DataTable    dt           = wznr.GetOldbDataTable("select 标题,内容 from Content");                                             //wznr.GetDataTable1("SELECT Title, Content FROM News");

            dt.Columns.AddRange(new DataColumn[] { new DataColumn("keys", typeof(string)), new DataColumn("bztype", typeof(string)) }); //添加新列
            DataRow   dr    = dt.NewRow();                                                                                              //实例化新行
            DataTable newdt = wznr.GetTableSchema();
            DataRow   newdr;
            string    line;
            string    p;                                          //每个p标签

            for (int i = 0; i < dt.Rows.Count; i++)               //添加新列分词录入
            {
                string title   = Convert.ToString(dt.Rows[i][0]); //获取到标题
                string content = Convert.ToString(dt.Rows[i][1]); //获取到内容
                //string bztype = Convert.ToString(dt.Rows[i][2]);//类别
                //调用PanguFenCi(aa)进行分词添加到datatable中批量录入到关键词表中。
                title = fch.PanguFenCi(title);
                //p标签获取获取每个p---2012-5-18日修改
                //p = GetPhtml(content);
                //获取路径,循环每次都需要读取文本文件里设置的关键词
                string path    = Directory.GetCurrentDirectory();
                string txtpath = path + @"\App_Data\sDict.txt";
                //读取文本内容逐行
                StreamReader file = new StreamReader(txtpath);
                //分类操作,读取文本与标题分词判断
                string[] arr = title.Split('/');
                while ((line = file.ReadLine()) != null)
                {
                    string[] arrtxt = line.Split(',');
                    for (int j = 0; j < arr.Length; j++)
                    {
                        if (arrtxt[0].Equals(arr[j]))
                        {
                            dt.Rows[i][3] += arrtxt[1] + ",";//类别
                            break;
                            //strtxt = arrtxt[1];//所属类别
                        }
                    }
                }
                //调用PanGuContentFenCi(content)进行分词同时输出同义词
                dt.Rows[i][1] = fch.PanGuContentFenCi(content);
                dt.Rows[i][2] = title;
                //dt.Rows[i][3] = bztype;<(\w+)>[^P]*[^<]*</(\w+)>||<p[^>]*>[^<]*</p>
                // Regex("\ba\w{6}\b", RegexOptions.IgnoreCase);, RegexOptions.IgnoreCase//区别大小写
                foreach (Match m in Regex.Matches(content, @"<p[^>]*>[^<]*</p>", RegexOptions.IgnoreCase))
                {
                    if (m.Value.Length > 50)
                    {
                        newdr           = newdt.NewRow();
                        newdr["Title"]  = dt.Rows[i][0];
                        newdr["p"]      = m.Value;
                        newdr["p1"]     = content;
                        newdr["keys"]   = title;
                        newdr["bztype"] = dt.Rows[i][3];
                        newdt.Rows.Add(newdr);
                    }
                }
                totalRecords++;
                getTotalRecordsDelegate(totalRecords);
                //newdt.Rows.Add(newdr);
            }

            string sql = "insert into test3Table (Title,Content,keys,bztype)" +
                         " SELECT  nc.Title,nc.Content,nc.keys,nc.bztype" +
                         " FROM @NewBulkTestTvp AS nc";
            string sqlp = "insert into test4Table (Title,p,p1,keys,bztype)" +
                          " SELECT  nc.Title,nc.p,nc.p1,nc.keys,nc.bztype" +
                          " FROM @NewBulkTestTvp AS nc";

            sw.Start();
            wznr.TableValuedToDB(dt, sql, "dbo.test3Udt");
            wznr.TableValuedToDB(newdt, sqlp, "dbo.test4Udt");
            sw.Stop();
            getTotalRecordsDelegate(totalRecords);
        }
Example #5
0
        //创建索引导入数据2012-5-29添加--修改类(弃用)
        public void CreateIndexImport(GetTotalDelegate getTotalRecordsDelegate, System.Diagnostics.Stopwatch sw, System.Windows.Forms.RichTextBox rich)
        {
            int          totalRecords = 0;
            FenCiHelper  fch          = new FenCiHelper();
            wznr_Servise wznr         = new wznr_Servise();
            DataTable    dt           = wznr.GetOldbDataTable("select 标题,内容 from Content");                                             //wznr.GetDataTable1("SELECT Title, Content FROM News");

            dt.Columns.AddRange(new DataColumn[] { new DataColumn("keys", typeof(string)), new DataColumn("bztype", typeof(string)) }); //添加新列
            DataRow   dr    = dt.NewRow();                                                                                              //实例化新行
            DataTable newdt = wznr.GetTableSchema();
            DataRow   newdr;
            string    line;
            string    p;                            //每个p标签

            for (int i = 0; i < dt.Rows.Count; i++) //添加新列分词录入
            {
                totalRecords++;
                getTotalRecordsDelegate(totalRecords);
                string title   = Convert.ToString(dt.Rows[i][0]); //获取到标题
                string content = Convert.ToString(dt.Rows[i][1]); //获取到内容
                //string bztype = Convert.ToString(dt.Rows[i][2]);//类别
                //调用PanguFenCi(aa)进行分词添加到datatable中批量录入到关键词表中。
                title = fch.PanguFenCi(title);
                //p标签获取获取每个p---2012-5-18日修改
                //p = GetPhtml(content);
                //获取路径,循环每次都需要读取文本文件里设置的关键词
                string path    = Directory.GetCurrentDirectory();
                string txtpath = path + @"\App_Data\sDict.txt";
                //读取文本内容逐行
                StreamReader file = new StreamReader(txtpath);
                //分类操作,读取文本与标题分词判断
                string[] arr = title.Split('/');
                while ((line = file.ReadLine()) != null)
                {
                    string[] arrtxt = line.Split(',');
                    for (int j = 0; j < arr.Length; j++)
                    {
                        if (arrtxt[0].Equals(arr[j]))
                        {
                            dt.Rows[i][3] += arrtxt[1] + ",";//类别
                            break;
                            //strtxt = arrtxt[1];//所属类别
                        }
                    }
                }
                //调用PanGuContentFenCi(content)进行分词同时输出同义词
                dt.Rows[i][1] = fch.PanGuContentFenCi(content);
                dt.Rows[i][2] = title;
                //dt.Rows[i][3] = bztype;
                Analyzer           analyzer  = new PanGuAnalyzer();//盘古Analyzer
                DirectoryInfo      dirInfo   = Directory.CreateDirectory(Config.INDEX_STORE_PATH);
                LuceneIO.Directory directory = LuceneIO.FSDirectory.Open(dirInfo);
                IndexWriter        writer    = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
                foreach (Match m in Regex.Matches(content, @"<(\w+)>[^P]*[^<]*</(\w+)>"))
                {
                    newdr           = newdt.NewRow();
                    newdr["Title"]  = dt.Rows[i][0];
                    newdr["p"]      = m.Value;
                    newdr["p1"]     = content;
                    newdr["keys"]   = title;
                    newdr["bztype"] = dt.Rows[i][3];
                    newdt.Rows.Add(newdr);
                    // CreateIndex(writer, dt.Rows[i][0].ToString(), m.Value);创建索引
                }
                writer.Optimize();
                writer.Close();
                rich.Text     += dt.Rows[i][2].ToString() + "-----索引创建成功\n";
                rich.ForeColor = System.Drawing.Color.Green;//ConsoleColor.Green;
            }
            string sql = "insert into test3Table (Title,Content,keys,bztype)" +
                         " SELECT  nc.Title,nc.Content,nc.keys,nc.bztype" +
                         " FROM @NewBulkTestTvp AS nc";
            string sqlp = "insert into test4Table (Title,p,p1,keys,bztype)" +
                          " SELECT  nc.Title,nc.p,nc.p1,nc.keys,nc.bztype" +
                          " FROM @NewBulkTestTvp AS nc";

            sw.Start();
            wznr.TableValuedToDB(dt, sql, "dbo.test3Udt");
            wznr.TableValuedToDB(newdt, sqlp, "dbo.test4Udt");
            sw.Stop();
            getTotalRecordsDelegate(totalRecords);
        }