Beispiel #1
0
        public void runMethod(object i)
        {
            FenCiHelper  fch  = new FenCiHelper();
            wznr_Servise wznr = new wznr_Servise();

            DataTable dt = new DataTable();

            string sqlitesql = "SELECT  标题,内容 FROM  content ORDER BY id asc LIMIT 1000 OFFSET (1000*'" + i + "')";

            //dtsqllist.Add(sqlitesql);
            dt = new SqliteHelper().GetQuery(sqlitesql);                                                                                //查询sqlite数据库new SqliteHelper().gettablequert(dtsqllist); //
            dt.Columns.AddRange(new DataColumn[] { new DataColumn("keys", typeof(string)), new DataColumn("bztype", typeof(string)) }); //添加新列
            DataRow dr = dt.NewRow();                                                                                                   //实例化新行

            threadStartEvent.Invoke(dt.Rows.Count, new EventArgs());                                                                    //通知主界面,我开始了,count用来设置进度条的最大值
            for (int k = 0; k < dt.Rows.Count; k++)
            {
                Thread.Sleep(0);                                  //0毫秒过去后从新计算优先级
                threadEvent.Invoke(k, new EventArgs());           //通知主界面我正在执行,i表示进度条当前进度
                string title   = Convert.ToString(dt.Rows[k][0]); //获取到标题
                string content = Convert.ToString(dt.Rows[k][1]); //获取到内容
                //调用PanguFenCi(aa)进行分词添加到datatable中批量录入到关键词表中。
                title = fch.PanguFenCi(title);
                //调用PanGuContentFenCi(content)进行分词同时输出同义词
                dt.Rows[k][1] = content;
                dt.Rows[k][2] = title;
                dt.Rows[k][3] = 1;
            }
            threadEndEvent.Invoke(new object(), new EventArgs());    //通知主界面我已经完成了
            string sql = "insert into test3Table (Title,Content,keys,bztype)" +
                         " SELECT  nc.Title,nc.Content,nc.keys,nc.bztype" +
                         " FROM @NewBulkTestTvp AS nc";

            wznr.TableValuedToDB(dt, sql, "dbo.test3Udt");
        }
Beispiel #2
0
        //盘古测试查询
        private void button1_Click_1(object sender, EventArgs e)
        {
            FenCiHelper     fch       = new FenCiHelper();
            WycLuceneSearch wycsearch = new WycLuceneSearch();
            string          keyword   = this.textBox1.Text;
            string          keyto     = this.textBox1.Text;

            keyword = fch.PanguFenCi(keyword);//对关键字进行分词处理//fch.PanguFenCi(keyword);
            string line;
            //获取路径,循环每次都需要读取文本文件里设置的关键词
            string path    = Directory.GetCurrentDirectory();
            string txtpath = path + @"\App_Data\sDict.txt";
            //读取文本内容逐行
            StreamReader file = new StreamReader(txtpath);

            //分类操作,读取文本与标题分词判断
            string[] arr = keyword.Split('/');
            while ((line = file.ReadLine()) != null)
            {
                string[] arrtxt = line.Split(',');
                for (int j = 0; j < arr.Length; j++)
                {
                    if (arrtxt[0].Equals(arr[j]))
                    {
                        keyword = arrtxt[0] + keyto;//dt.Rows[i][3] += arrtxt[1] + ",";//类别
                        break;
                        //strtxt = arrtxt[1];//所属类别
                    }
                }
            }
            string field = "contents";                                          //搜索的对应字段

            string[]         fieldArr     = new string[] { field, "title" };    //两个字段
            string           rangeField   = "createdate";                       //范围搜索对应字段
            IList <Analyzer> listAnalyzer = WycLuceneAnalyzer.BuildAnalyzers(); //LuceneAnalyzer.BuildAnalyzers();

            BooleanClause.Occur[] occurs = new BooleanClause.Occur[] { BooleanClause.Occur.MUST, BooleanClause.Occur.SHOULD };
            foreach (Analyzer analyzer in listAnalyzer)
            {
                WycLuceneSearch.PanguQueryTest(analyzer, field, keyword, richTextBox1);//通过盘古分词搜索
                WycLuceneSearch.PanguQueryTest(analyzer, field, keyto, richTextBox2);
            }
        }
Beispiel #3
0
        public void MyThreadInsert(GetTotalDelegate getTotalRecordsDelegate, System.Diagnostics.Stopwatch sw, int pagesize)
        {
            int          totalRecords = 0;
            FenCiHelper  fch          = new FenCiHelper();
            wznr_Servise wznr         = new wznr_Servise();
            DataTable    dt           = new DataTable();
            string       sqlitesql    = "SELECT  标题,内容 FROM  content ORDER BY id asc LIMIT 5000 OFFSET (5000*'" + pagesize + "')";

            //dtsqllist.Add(sqlitesql);
            dt = new SqliteHelper().GetQuery(sqlitesql);                                                                                //查询sqlite数据库new SqliteHelper().gettablequert(dtsqllist); //
            dt.Columns.AddRange(new DataColumn[] { new DataColumn("keys", typeof(string)), new DataColumn("bztype", typeof(string)) }); //添加新列
            DataRow dr = dt.NewRow();                                                                                                   //实例化新行

            for (int k = 0; k < dt.Rows.Count; k++)
            {
                totalRecords++;
                getTotalRecordsDelegate(totalRecords);
                string title   = Convert.ToString(dt.Rows[k][0]);  //获取到标题
                string content = Convert.ToString(dt.Rows[k][1]);  //获取到内容
                //调用PanguFenCi(aa)进行分词添加到datatable中批量录入到关键词表中。
                title = fch.PanguFenCi(title);
                //调用PanGuContentFenCi(content)进行分词同时输出同义词
                dt.Rows[k][1] = content;
                dt.Rows[k][2] = title;
                dt.Rows[k][3] = 1;
                //Thread.Sleep(Convert.ToInt32(pagesize));
                //threadEvent.Invoke(k, new EventArgs());//通知主界面我正在执行,i表示进度条当前进度
            }
            string sql = "insert into test3Table (Title,Content,keys,bztype)" +
                         " SELECT  nc.Title,nc.Content,nc.keys,nc.bztype" +
                         " FROM @NewBulkTestTvp AS nc";

            sw.Start();
            wznr.TableValuedToDB(dt, sql, "dbo.test3Udt");
            sw.Stop();
            getTotalRecordsDelegate(totalRecords);
        }
Beispiel #4
0
        //导入数据十万级数据
        public void insertsqliteshuju(GetTotalDelegate getTotalRecordsDelegate, System.Diagnostics.Stopwatch sw)
        {
            int          totalRecords = 0;
            FenCiHelper  fch          = new FenCiHelper();
            wznr_Servise wznr         = new wznr_Servise();
            int          num          = new SqliteHelper().GetMaxID("ID", "Content");

            string line;
            string p;//每个p标签
            int    pagesize = num / 1000;

            if (pagesize == 0)
            {
                DataTable dt = wznr.GetOldbDataTable("select 标题,内容 from Content");                                                          //wznr.GetDataTable1("SELECT Title, Content FROM News");
                dt.Columns.AddRange(new DataColumn[] { new DataColumn("keys", typeof(string)), new DataColumn("bztype", typeof(string)) }); //添加新列
                DataRow   dr    = dt.NewRow();                                                                                              //实例化新行
                DataTable newdt = wznr.GetTableSchema();
                DataRow   newdr;
                for (int i = 0; i < dt.Rows.Count; i++)//添加新列分词录入
                {
                    totalRecords++;
                    getTotalRecordsDelegate(totalRecords);
                    string title   = Convert.ToString(dt.Rows[i][0]); //获取到标题
                    string content = Convert.ToString(dt.Rows[i][1]); //获取到内容
                    //string bztype = Convert.ToString(dt.Rows[i][2]);//类别
                    //调用PanguFenCi(aa)进行分词添加到datatable中批量录入到关键词表中。
                    title = fch.PanguFenCi(title);
                    //p标签获取获取每个p---2012-5-18日修改
                    //p = GetPhtml(content);
                    //获取路径,循环每次都需要读取文本文件里设置的关键词
                    string path    = Directory.GetCurrentDirectory();
                    string txtpath = path + @"\App_Data\sDict.txt";
                    //读取文本内容逐行
                    StreamReader file = new StreamReader(txtpath);
                    //分类操作,读取文本与标题分词判断
                    string[] arr = title.Split('/');
                    while ((line = file.ReadLine()) != null)
                    {
                        string[] arrtxt = line.Split(',');
                        for (int j = 0; j < arr.Length; j++)
                        {
                            if (arrtxt[0].Equals(arr[j]))
                            {
                                dt.Rows[i][3] += arrtxt[1] + ",";//类别
                                break;
                                //strtxt = arrtxt[1];//所属类别
                            }
                        }
                    }
                    //调用PanGuContentFenCi(content)进行分词同时输出同义词
                    dt.Rows[i][1] = fch.PanGuContentFenCi(content);
                    dt.Rows[i][2] = title;
                    //dt.Rows[i][3] = bztype;
                    foreach (Match m in Regex.Matches(content, @"<(\w+)>[^P]*[^<]*</(\w+)>"))
                    {
                        newdr           = newdt.NewRow();
                        newdr["Title"]  = dt.Rows[i][0];
                        newdr["p"]      = m.Value;
                        newdr["p1"]     = content;
                        newdr["keys"]   = title;
                        newdr["bztype"] = dt.Rows[i][3];
                        newdt.Rows.Add(newdr);
                    }

                    //newdt.Rows.Add(newdr);
                }
                string sql = "insert into test3Table (Title,Content,keys,bztype)" +
                             " SELECT  nc.Title,nc.Content,nc.keys,nc.bztype" +
                             " FROM @NewBulkTestTvp AS nc";
                string sqlp = "insert into test4Table (Title,p,p1,keys,bztype)" +
                              " SELECT  nc.Title,nc.p,nc.p1,nc.keys,nc.bztype" +
                              " FROM @NewBulkTestTvp AS nc";
                sw.Start();
                wznr.TableValuedToDB(dt, sql, "dbo.test3Udt");
                wznr.TableValuedToDB(newdt, sqlp, "dbo.test4Udt");
                sw.Stop();
                getTotalRecordsDelegate(totalRecords);
            }
            else
            {
                pagesize = pagesize + 1;
                List <String> dtsqllist = new List <string>();
                DataTable     dt        = new DataTable();
                for (int i = 0; i < pagesize; i++)
                {
                    string sqlitesql = "SELECT  标题,内容 FROM  content ORDER BY id asc LIMIT 1000 OFFSET (1000*'" + i + "')";
                    //dtsqllist.Add(sqlitesql);
                    dt = new SqliteHelper().GetQuery(sqlitesql);                                                                                //查询sqlite数据库new SqliteHelper().gettablequert(dtsqllist); //
                    dt.Columns.AddRange(new DataColumn[] { new DataColumn("keys", typeof(string)), new DataColumn("bztype", typeof(string)) }); //添加新列
                    DataRow dr = dt.NewRow();                                                                                                   //实例化新行
                    for (int k = 0; k < dt.Rows.Count; k++)
                    {
                        totalRecords++;
                        getTotalRecordsDelegate(totalRecords);
                        string title   = Convert.ToString(dt.Rows[k][0]); //获取到标题
                        string content = Convert.ToString(dt.Rows[k][1]); //获取到内容
                        //调用PanguFenCi(aa)进行分词添加到datatable中批量录入到关键词表中。
                        title = fch.PanguFenCi(title);
                        //调用PanGuContentFenCi(content)进行分词同时输出同义词
                        dt.Rows[k][1] = content;
                        dt.Rows[k][2] = title;
                        dt.Rows[k][3] = 1;
                    }
                    string sql = "insert into test3Table (Title,Content,keys,bztype)" +
                                 " SELECT  nc.Title,nc.Content,nc.keys,nc.bztype" +
                                 " FROM @NewBulkTestTvp AS nc";
                    sw.Start();
                    wznr.TableValuedToDB(dt, sql, "dbo.test3Udt");
                    sw.Stop();
                    getTotalRecordsDelegate(totalRecords);
                }
            }
        }
Beispiel #5
0
        //导入数据
        public void fBatchImport(GetTotalDelegate getTotalRecordsDelegate, System.Diagnostics.Stopwatch sw)
        {
            int          totalRecords = 0;
            FenCiHelper  fch          = new FenCiHelper();
            wznr_Servise wznr         = new wznr_Servise();
            DataTable    dt           = wznr.GetOldbDataTable("select 标题,内容 from Content");                                             //wznr.GetDataTable1("SELECT Title, Content FROM News");

            dt.Columns.AddRange(new DataColumn[] { new DataColumn("keys", typeof(string)), new DataColumn("bztype", typeof(string)) }); //添加新列
            DataRow   dr    = dt.NewRow();                                                                                              //实例化新行
            DataTable newdt = wznr.GetTableSchema();
            DataRow   newdr;
            string    line;
            string    p;                                          //每个p标签

            for (int i = 0; i < dt.Rows.Count; i++)               //添加新列分词录入
            {
                string title   = Convert.ToString(dt.Rows[i][0]); //获取到标题
                string content = Convert.ToString(dt.Rows[i][1]); //获取到内容
                //string bztype = Convert.ToString(dt.Rows[i][2]);//类别
                //调用PanguFenCi(aa)进行分词添加到datatable中批量录入到关键词表中。
                title = fch.PanguFenCi(title);
                //p标签获取获取每个p---2012-5-18日修改
                //p = GetPhtml(content);
                //获取路径,循环每次都需要读取文本文件里设置的关键词
                string path    = Directory.GetCurrentDirectory();
                string txtpath = path + @"\App_Data\sDict.txt";
                //读取文本内容逐行
                StreamReader file = new StreamReader(txtpath);
                //分类操作,读取文本与标题分词判断
                string[] arr = title.Split('/');
                while ((line = file.ReadLine()) != null)
                {
                    string[] arrtxt = line.Split(',');
                    for (int j = 0; j < arr.Length; j++)
                    {
                        if (arrtxt[0].Equals(arr[j]))
                        {
                            dt.Rows[i][3] += arrtxt[1] + ",";//类别
                            break;
                            //strtxt = arrtxt[1];//所属类别
                        }
                    }
                }
                //调用PanGuContentFenCi(content)进行分词同时输出同义词
                dt.Rows[i][1] = fch.PanGuContentFenCi(content);
                dt.Rows[i][2] = title;
                //dt.Rows[i][3] = bztype;<(\w+)>[^P]*[^<]*</(\w+)>||<p[^>]*>[^<]*</p>
                // Regex("\ba\w{6}\b", RegexOptions.IgnoreCase);, RegexOptions.IgnoreCase//区别大小写
                foreach (Match m in Regex.Matches(content, @"<p[^>]*>[^<]*</p>", RegexOptions.IgnoreCase))
                {
                    if (m.Value.Length > 50)
                    {
                        newdr           = newdt.NewRow();
                        newdr["Title"]  = dt.Rows[i][0];
                        newdr["p"]      = m.Value;
                        newdr["p1"]     = content;
                        newdr["keys"]   = title;
                        newdr["bztype"] = dt.Rows[i][3];
                        newdt.Rows.Add(newdr);
                    }
                }
                totalRecords++;
                getTotalRecordsDelegate(totalRecords);
                //newdt.Rows.Add(newdr);
            }

            string sql = "insert into test3Table (Title,Content,keys,bztype)" +
                         " SELECT  nc.Title,nc.Content,nc.keys,nc.bztype" +
                         " FROM @NewBulkTestTvp AS nc";
            string sqlp = "insert into test4Table (Title,p,p1,keys,bztype)" +
                          " SELECT  nc.Title,nc.p,nc.p1,nc.keys,nc.bztype" +
                          " FROM @NewBulkTestTvp AS nc";

            sw.Start();
            wznr.TableValuedToDB(dt, sql, "dbo.test3Udt");
            wznr.TableValuedToDB(newdt, sqlp, "dbo.test4Udt");
            sw.Stop();
            getTotalRecordsDelegate(totalRecords);
        }
Beispiel #6
0
        //创建索引导入数据2012-5-29添加--修改类(弃用)
        public void CreateIndexImport(GetTotalDelegate getTotalRecordsDelegate, System.Diagnostics.Stopwatch sw, System.Windows.Forms.RichTextBox rich)
        {
            int          totalRecords = 0;
            FenCiHelper  fch          = new FenCiHelper();
            wznr_Servise wznr         = new wznr_Servise();
            DataTable    dt           = wznr.GetOldbDataTable("select 标题,内容 from Content");                                             //wznr.GetDataTable1("SELECT Title, Content FROM News");

            dt.Columns.AddRange(new DataColumn[] { new DataColumn("keys", typeof(string)), new DataColumn("bztype", typeof(string)) }); //添加新列
            DataRow   dr    = dt.NewRow();                                                                                              //实例化新行
            DataTable newdt = wznr.GetTableSchema();
            DataRow   newdr;
            string    line;
            string    p;                            //每个p标签

            for (int i = 0; i < dt.Rows.Count; i++) //添加新列分词录入
            {
                totalRecords++;
                getTotalRecordsDelegate(totalRecords);
                string title   = Convert.ToString(dt.Rows[i][0]); //获取到标题
                string content = Convert.ToString(dt.Rows[i][1]); //获取到内容
                //string bztype = Convert.ToString(dt.Rows[i][2]);//类别
                //调用PanguFenCi(aa)进行分词添加到datatable中批量录入到关键词表中。
                title = fch.PanguFenCi(title);
                //p标签获取获取每个p---2012-5-18日修改
                //p = GetPhtml(content);
                //获取路径,循环每次都需要读取文本文件里设置的关键词
                string path    = Directory.GetCurrentDirectory();
                string txtpath = path + @"\App_Data\sDict.txt";
                //读取文本内容逐行
                StreamReader file = new StreamReader(txtpath);
                //分类操作,读取文本与标题分词判断
                string[] arr = title.Split('/');
                while ((line = file.ReadLine()) != null)
                {
                    string[] arrtxt = line.Split(',');
                    for (int j = 0; j < arr.Length; j++)
                    {
                        if (arrtxt[0].Equals(arr[j]))
                        {
                            dt.Rows[i][3] += arrtxt[1] + ",";//类别
                            break;
                            //strtxt = arrtxt[1];//所属类别
                        }
                    }
                }
                //调用PanGuContentFenCi(content)进行分词同时输出同义词
                dt.Rows[i][1] = fch.PanGuContentFenCi(content);
                dt.Rows[i][2] = title;
                //dt.Rows[i][3] = bztype;
                Analyzer           analyzer  = new PanGuAnalyzer();//盘古Analyzer
                DirectoryInfo      dirInfo   = Directory.CreateDirectory(Config.INDEX_STORE_PATH);
                LuceneIO.Directory directory = LuceneIO.FSDirectory.Open(dirInfo);
                IndexWriter        writer    = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
                foreach (Match m in Regex.Matches(content, @"<(\w+)>[^P]*[^<]*</(\w+)>"))
                {
                    newdr           = newdt.NewRow();
                    newdr["Title"]  = dt.Rows[i][0];
                    newdr["p"]      = m.Value;
                    newdr["p1"]     = content;
                    newdr["keys"]   = title;
                    newdr["bztype"] = dt.Rows[i][3];
                    newdt.Rows.Add(newdr);
                    // CreateIndex(writer, dt.Rows[i][0].ToString(), m.Value);创建索引
                }
                writer.Optimize();
                writer.Close();
                rich.Text     += dt.Rows[i][2].ToString() + "-----索引创建成功\n";
                rich.ForeColor = System.Drawing.Color.Green;//ConsoleColor.Green;
            }
            string sql = "insert into test3Table (Title,Content,keys,bztype)" +
                         " SELECT  nc.Title,nc.Content,nc.keys,nc.bztype" +
                         " FROM @NewBulkTestTvp AS nc";
            string sqlp = "insert into test4Table (Title,p,p1,keys,bztype)" +
                          " SELECT  nc.Title,nc.p,nc.p1,nc.keys,nc.bztype" +
                          " FROM @NewBulkTestTvp AS nc";

            sw.Start();
            wznr.TableValuedToDB(dt, sql, "dbo.test3Udt");
            wznr.TableValuedToDB(newdt, sqlp, "dbo.test4Udt");
            sw.Stop();
            getTotalRecordsDelegate(totalRecords);
        }
Beispiel #7
0
        //按标题搜索生成内容 2012-6-5创建
        public void ShengChengNeiRong(GetTitletotal getTotalRecordsDelegate, System.Diagnostics.Stopwatch sw, System.Windows.Forms.RichTextBox rich)
        {
            int          totalRecords = 0;
            wznr_Servise wznr         = new wznr_Servise();
            DataTable    newdt        = wznr.GetNeiRongTable();
            DataRow      newdr;
            FenCiHelper  fch = new FenCiHelper();
            //获取路径,循环每次都需要读取文本文件里设置的关键词
            string       path      = Directory.GetCurrentDirectory();
            string       titlepath = path + @"\App_Data\sytitle.txt";
            StreamReader file      = new StreamReader(titlepath, System.Text.Encoding.GetEncoding("GB2312"));
            string       keyword;                    //文章标题行
            string       line;                       //关键词行
            string       strFile = file.ReadToEnd(); //获取所有行

            string[] arraFile = strFile.Split('\n');
            string   tongyicititle;//同义词标题

            for (int i = 0; i < arraFile.Length; i++)
            {
                totalRecords++;
                getTotalRecordsDelegate(totalRecords);
                string key = arraFile[i];
                //tongyicititle = fch.PanGuFenCiTYC( arraFile[i]);//输出同义词标题
                keyword = fch.PanguFenCi(arraFile[i]);
                string[] arr     = keyword.Split('/');
                string   txtpath = path + @"\App_Data\sDict.txt";
                //读取文本内容逐行
                StreamReader file1 = new StreamReader(txtpath);

                while ((line = file1.ReadLine()) != null)
                {
                    string[] arrtxt = line.Split(',');
                    for (int j = 0; j < arr.Length; j++)
                    {
                        if (arrtxt[0].Equals(arr[j]))
                        {
                            keyword        = arrtxt[0] + key;
                            rich.Text     += keyword;
                            rich.ForeColor = System.Drawing.Color.Green;//ConsoleColor.Green;
                            break;
                        }
                    }
                }
                string                field        = "contents";                         //搜索的对应字段
                string[]              fieldArr     = new string[] { field, "title" };    //两个字段
                string                rangeField   = "createdate";                       //范围搜索对应字段
                IList <Analyzer>      listAnalyzer = WycLuceneAnalyzer.BuildAnalyzers(); //LuceneAnalyzer.BuildAnalyzers();
                BooleanClause.Occur[] occurs       = new BooleanClause.Occur[] { BooleanClause.Occur.MUST, BooleanClause.Occur.SHOULD };
                foreach (Analyzer analyzer in listAnalyzer)
                {
                    WycLuceneSearch.PanguQueryTest(analyzer, field, keyword, rich);//通过盘古分词搜索
                }
                newdr          = newdt.NewRow();
                newdr["Title"] = key;//keyword;//tongyicititle;
                newdr["p"]     = rich.Text;
                newdt.Rows.Add(newdr);
            }
            string sqlp = "insert into test5Table (Title,p)" +
                          " SELECT  nc.Title,nc.p" +
                          " FROM @NewBulkTestTvp AS nc";

            sw.Start();
            wznr.TableValuedToDB(newdt, sqlp, "dbo.test5Udt");
            sw.Stop();
            getTotalRecordsDelegate(totalRecords);
        }