コード例 #1
0
        /// <summary>
        /// Lemma操作
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        protected void lemmanew_Click(object sender, EventArgs e)
        {
            /********变量**********/
            string titleStr;                         //标题
            string nameStr;                          //用户名
            string txtStr = txtcontent.Value.Trim(); //正文文本

            //检验文本册次,单元,篇章属性选择完毕
            if (!isSelected())
            {
                PageAlert("你尚未选择本次筛查的文章册次、单元或篇章!", this);
                return;
            }

            if (string.IsNullOrEmpty(txtcontent.Value)) //处理的文本还未输入
            {
                PageAlert("你还未输入或导入需要处理的文本,请确认后再试!", this);
                txtcontent.Focus();
                lemmanew.Enabled = true;
                return;
            }
            else
            {
                string regEx = @"((file|gopher|news|nntp|telnet|http|ftp|https|ftps|sftp)://)(([a-zA-Z0-9\._-]+\.[a-zA-Z]{2,6})|([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}))(:[0-9]{1,4})*(/[a-zA-Z0-9\&%_\./-~-]*)?";
                txtStr = Regex.Replace(txtcontent.Value, regEx, ";");//正则表达式排除文中的网址
            }

            /***********1、过滤文本判断是否包含有英文单词*************/
            string ignoreWordsFile  = GetDbPath() + "words/ignoreWords.txt";
            string ordinalWordsFile = GetDbPath() + "words/OrdinalWords.txt";
            string symbolFile       = GetDbPath() + "words/symbol.txt";
            var    txtlist          = TextInput.ArticleToList(txtStr, ignoreWordsFile, ordinalWordsFile, symbolFile);//文本转化为字符串数组,将需要处理的单词存到数组中

            //PageAlert(timeRecord, this);
            if (txtlist.Count == 0)//文本中不包含有英文单词
            {
                PageAlert("文本中不包含英文单词!", this);
                txtcontent.Focus();
                lemmanew.Enabled = true;
                return;
            }

            string wordLevel = "";
            int    maxIndex  = 0;

            maxIndex = ddlBook.SelectedIndex + 4;
            int selectIndex = ddlBook.SelectedIndex + ddlUnit.SelectedIndex + ddlText.SelectedIndex;

            if (maxIndex == 0 || selectIndex < 3)
            {
                PageAlert("你还未选定课文对应的册、单元或篇目!", this);
                lemmanew.Enabled = true;
                return;
            }
            else
            {
                wordLevel = ddlBook.SelectedValue + ddlUnit.SelectedValue + ddlText.SelectedValue;
            }
            /*********2、查询是否已经处理过本篇课文************/
            titleStr = ddlBook.SelectedItem.Text + ddlUnit.SelectedItem.Text + ddlText.SelectedItem.Text;
            List <string[]> oldwordsList = new List <string[]>();
            List <words>    newWordsList = new List <words>();

            oldwordsList = WordBLL.GetWordLookup(wordLevel);
            if (oldwordsList.Count > 0)
            {
                PageAlert("“" + titleStr + "”课文已经处理过,系统将为您直接输出生词表!", this);
                for (int i = 0; i < oldwordsList.Count; i++)
                {
                    newWordsList.Add(new words(i + 1, oldwordsList[i][0].ToString(), oldwordsList[i][1].ToString()));
                }
                wordgv.DataSource = newWordsList;
                wordgv.DataBind();
                totalW.Text       = "你所筛查的本篇课文共有 " + newWordsList.Count.ToString() + " 生词";
                inputDiv.Visible  = false;
                outputDiv.Visible = true;
                Titlelb.Text      = "生词表输出";
            }
            else
            {
                oldwordsList = new List <string[]>();
                /**********3、保存要处理的文本************/
                SPUser currentUser = SPContext.Current.Web.CurrentUser;
                nameStr = currentUser.Name;

                titleStr = TextInput.FilterSpecial(titleStr, "");
                string filePath  = GetDbPath() + @"export/";                              //txt文件保存的路径
                string nowStr    = string.Format("{0:yyyyMMddHHmmssffff}", DateTime.Now); //时间格式字符串:年月日时分秒4位毫秒
                string fileTitle = titleStr + "(" + nameStr + ")" + nowStr + ".txt";;     //文章标题+ _ + 处理人姓名 + 处理人所属院校(登录名)+ 当前时间
                TextInput.FileWrite(fileTitle, txtStr, filePath);                         //将即将处理的文本保存到服务器上的指定目录中;

                inputDiv.Visible = false;

                /***************4、词汇与级别表筛查*******************/
                string fileName = GetDbPath() + "words/AllWords.txt";                //包含原型与变型以及对应等级的词汇表文本
                Dictionary <int, object> allwordsList = WordBLL.SearchWordsWithTxt(txtlist, fileName, 0);
                DataTable wordsTable = (DataTable)allwordsList[1];                   //包含要输出的单词的元词汇、级别、频次

                if (wordsTable.Rows.Count > 0)                                       //有生词
                {
                    DataRow[] drs = wordsTable.Select("[level]>'" + maxIndex + "'"); //生词词汇的原词、级别、频次
                    for (int i = 0; i < drs.Length; i++)
                    {
                        int      level = (int)drs[i][1];
                        string   signs = WordBLL.leveltoSigns(level);
                        string[] wds   = new string[2] {
                            drs[i][0].ToString(), signs
                        };
                        oldwordsList.Add(wds);
                    }
                    oldwordsList = WordBLL.GetWordLookup(oldwordsList, wordLevel); //与动态词汇表对比筛查
                                                                                   /***************5、输出本文生词词汇表********************/

                    for (int i = 0; i < oldwordsList.Count; i++)
                    {
                        newWordsList.Add(new words(i + 1, oldwordsList[i][0].ToString(), oldwordsList[i][1].ToString()));
                    }
                    wordgv.DataSource = newWordsList;
                    wordgv.DataBind();
                    totalW.Text = "你所筛查的本篇课文共有 " + newWordsList.Count.ToString() + " 生词";


                    outputDiv.Visible = true;
                    Titlelb.Text      = "生词表输出";
                }
            }
            lemmanew.Enabled = true;
        }
コード例 #2
0
ファイル: txtin.aspx.cs プロジェクト: vsfsc/VAExtension
        /// <summary>
        /// Lemma操作
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        protected void lemmanew_Click(object sender, EventArgs e)
        {
            lemmanew.Enabled = false;
            #region 0 变量定义与表单校验
            DateTime t1; //时间1
            DateTime t2; //时间2


            string titleStr;                              //标题
            string nameStr;                               //用户名
            string txtStr = this.txtcontent.Value.Trim(); //正文文本

            //检验文档标题、用户名、正文是否输入完成
            if (string.IsNullOrEmpty(homecity_name.Value) || homecity_name.Value == "Type the title or click to choose it") //标题为空或者为文本框提示值,即未输入标题
            {
                PageAlert("你还未选择或输入文档标题!", this);
                homecity_name.Focus();
                lemmanew.Enabled = true;
                return;
            }
            else
            {
                titleStr = homecity_name.Value;       //标题
            }
            if (string.IsNullOrEmpty(username.Value)) //用户名为空,即未输入有效用户名
            {
                PageAlert("请先输入你的姓名,本系统不支持匿名操作!", this);
                username.Focus();
                lemmanew.Enabled = true;
                return;
            }
            else
            {
                nameStr = username.Value;               //用户名
            }
            if (string.IsNullOrEmpty(txtcontent.Value)) //处理的文本还未输入
            {
                PageAlert("你还未输入或导入需要处理的文本,请确认后再试!", this);
                txtcontent.Focus();
                lemmanew.Enabled = true;
                return;
            }
            else
            {
                string regEx = @"((file|gopher|news|nntp|telnet|http|ftp|https|ftps|sftp)://)(([a-zA-Z0-9\._-]+\.[a-zA-Z]{2,6})|([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}))(:[0-9]{1,4})*(/[a-zA-Z0-9\&%_\./-~-]*)?";
                txtStr = Regex.Replace(txtcontent.Value, regEx, ";");//正则表达式排除文中的网址
            }
            #endregion

            #region 1 过滤文本判断是否包含有英文单词
            t1 = DateTime.Now;
            string ignoreWordsFile  = GetDbPath() + "words/ignoreWords.txt";
            string ordinalWordsFile = GetDbPath() + "words/OrdinalWords.txt";
            string symbolFile       = GetDbPath() + "words/symbol.txt";

            var txtlist = TextInput.ArticleToList(txtStr, ignoreWordsFile, ordinalWordsFile, symbolFile);//文本转化为字符串数组,将需要处理的单词存到数组中

            t2 = DateTime.Now;
            string timeRecord = "文本过滤耗时:" + TimeSpend(t1, t2);//时间记录

            //PageAlert(timeRecord, this);
            if (txtlist.Count == 0)//文本中不包含有英文单词
            {
                PageAlert("文本中不包含需要处理的英文单词!", this);
                txtcontent.Focus();
                lemmanew.Enabled = true;
                return;
            }
            #endregion

            #region 2 参照词库选择
            int itemsCount = this.cblist.Items.Count;
            int maxIndex   = 0;
            for (int i = 0; i < itemsCount; i++)
            {
                if (cblist.Items[i].Selected)
                {
                    maxIndex = i + 1;//2016-10-17 13:16:43 增补高中词汇,并将级别序号变更为5、6、7、8
                }
            }
            if (maxIndex == 0)
            {
                PageAlert("你还未选定要参照的词汇表,请选择后继续!", this);
                lemmanew.Enabled = true;
                return;
            }
            #endregion

            #region 3 保存要处理的文本
            SPUser currentUser = SPContext.Current.Web.CurrentUser;
            //string loginName = currentUser.LoginName;
            //loginName = loginName.Substring(loginName.IndexOf('\\') + 1);
            //loginName = loginName.Replace(@"i:0#.w|", "");
            string spName = currentUser.Name;
            if (nameStr != spName)
            {
                nameStr = nameStr + "_" + spName;
            }
            titleStr = TextInput.FilterSpecial(titleStr, "");
            string filePath  = GetDbPath() + @"export/";                              //txt文件保存的路径
            string nowStr    = string.Format("{0:yyyyMMddHHmmssffff}", DateTime.Now); //时间格式字符串:年月日时分秒4位毫秒
            string fileTitle = titleStr + "(" + nameStr + ")" + nowStr + ".txt";;     //文章标题+ _ + 处理人姓名 + 处理人所属院校(登录名)+ 当前时间
            t1 = DateTime.Now;
            TextInput.FileWrite(fileTitle, txtStr, filePath);                         //将即将处理的文本保存到服务器上的指定目录中;
            t2         = DateTime.Now;
            timeRecord = timeRecord + "文件保存时间耗时:" + TimeSpend(t1, t2);

            //PageAlert(timeRecord, this);
            inputDiv.Visible = false;
            #endregion

            #region 4 单词还原
            t1 = DateTime.Now;
            string fileName = GetDbPath() + "words/AllWords.txt";//包含原型与变型以及对应等级的词汇表
            int    isEurope = 0;
            //if (ckEurope.Checked)
            //{
            //    isEurope = 1;
            //}
            Dictionary <int, object> allwordsList = WordBLL.SearchWordsWithTxt(txtlist, fileName, isEurope);//对词汇列表进行比对还原和级别确认,输出三个数据集:1、文本词汇对应级别,2、超纲词汇对应词频,3、处理过的单词原型对应级别
            t2         = DateTime.Now;
            timeRecord = timeRecord + "Lemma耗时:" + TimeSpend(t1, t2);
            #endregion

            #region 5 Lemma和结果输出
            if (allwordsList.Count > 0)
            {
                #region 5.1 输出词汇表
                t1 = DateTime.Now;
                DataTable showWordsdt = (DataTable)allwordsList[1];
                //DataTable  = OutputResult.newDataTable(wordsdt, maxIndex);
                //gridview数据绑定
                #region 5.1.1 无超纲或不可处理的词汇可输出
                if (showWordsdt.Rows.Count == 0)
                {
                    showWordsdt.Rows.Add(showWordsdt.NewRow());
                    wordgv.DataSource = showWordsdt;
                    wordgv.DataBind();
                    int nColumnCount = wordgv.Rows[0].Cells.Count;
                    wordgv.Rows[0].Cells.Clear();
                    wordgv.Rows[0].Cells.Add(new TableCell());
                    wordgv.Rows[0].Cells[0].ColumnSpan = nColumnCount;
                    wordgv.Rows[0].Cells[0].Text       = "本次处理的文档不包含超纲或无法处理的词汇!";
                    wordgv.RowStyle.Height             = 30;
                    wordgv.RowStyle.HorizontalAlign    = HorizontalAlign.Center;
                }
                #endregion
                #region 5.1.2 有超纲或不可处理的词汇输出词表
                else
                {
                    DataView dv = showWordsdt.Copy().DefaultView;
                    dv.RowFilter = "[level]<=0 or [level]>" + (maxIndex + 4); //筛选超纲词汇和未处理词汇
                    //先给它一个默认状态,因为下面的bin()方法,需要用到状态值。
                    ViewState["SortOrder"] = "Frequency";
                    ViewState["OrderDire"] = "Desc";
                    string sortStr = (string)ViewState["SortOrder"] + " " + (string)ViewState["OrderDire"];
                    dv.Sort           = sortStr;
                    wordgv.DataSource = dv;
                    totalW.Text       = "Total:" + dv.Table.Rows.Count.ToString() + "Words";
                    wordgv.DataBind();
                    t2         = DateTime.Now;
                    timeRecord = timeRecord + "词表输出:" + TimeSpend(t1, t2);
                    /***************5.3.1 输出词汇级别分布饼图*****************/
                    //int totalwords = showWordsdt.Rows.Count;
                    //int[] wordProfiles =OutputResult.WordProfiles(showWordsdt, maxIndex);
                    //Drawpie(wordProfiles,maxIndex);
                    //t1 = DateTime.Now;
                    //timeRecord = timeRecord + "级别分布图输出:" + TimeSpend(t2, t1);
                    /***************5.3.2 输出前十个最高频词频分布饼图*********************/
                    //DrawSequencePie(showWordsdt);
                    //t2 = DateTime.Now;
                    //timeRecord = timeRecord + "高频词频分布图输出:" + TimeSpend(t1, t2);
                }
                #endregion
                #endregion
                /*****************5.2 输出彩色标记文本*************************/
                var           showWordsList = (List <List <string> >)allwordsList[0];//文本处理后包含的级别及每个级别词频的列表集合
                DataTable     dt            = OutputResult.InitWordsAnalysisTable(showWordsList, maxIndex, symbolFile);
                StringBuilder sb            = new StringBuilder();
                for (int k = 0; k < dt.Rows.Count; k++)
                {
                    DataRow dr = dt.Rows[k];
                    sb.Append(OutputResult.Colored(dr[0] + "(" + dr[1] + ") ", int.Parse(dr[2].ToString())));
                }

                tuliDiv.InnerHtml = sb.ToString();//OutputResult.Tuli(showWordsList, maxIndex, symbolFile); //输出图例颜色表
                BindChart(dt, SeriesChartType.Pie, Chart1);
                //outDiv.InnerHtml = OutputResult.ResultDiv(showWordsList, maxIndex); //输出彩色文本
                outDiv.InnerHtml = OutputResult.ResultDiv(showWordsList, maxIndex).ToString();
                outDiv.Visible   = true;
                outlb.Text       = titleStr;
                t1         = DateTime.Now;
                timeRecord = timeRecord + "彩色标记文本输出:" + TimeSpend(t2, t1);


                //PageAlert(timeRecord,this);
                outputDiv.Visible = true;

                //OutputResult.VisibleOrNot(outputDiv,inputDiv);
                //ClientScript.RegisterStartupScript(ClientScript.GetType(), "", "<script>document.getElementById('inputDiv').style.display = 'none';document.getElementById('outputDiv').style.display = '';</script>", true);//隐藏输入界面,显示输出结果
                Titlelb.Text = "Output";

                //Dictionary<string, int> wordsTimes =(Dictionary<string, int>) ret[2];
                //WordBLL.WriteIntoDB(wordsTimes);
            }
            lemmanew.Enabled = true;
            #endregion
        }
コード例 #3
0
        public static void Lemma(KeyValuePair <string, string> kv, string wordLevel, string dbpath)
        {
            /********变量**********/
            string titleStr = kv.Key; //标题
            string nameStr;           //用户名
            string regEx  = @"((file|gopher|news|nntp|telnet|http|ftp|https|ftps|sftp)://)(([a-zA-Z0-9\._-]+\.[a-zA-Z]{2,6})|([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}))(:[0-9]{1,4})*(/[a-zA-Z0-9\&%_\./-~-]*)?";
            string txtStr = kv.Value;

            txtStr = Regex.Replace(txtStr, regEx, ";");//正则表达式排除文中的网址
            /***********1、过滤文本判断是否包含有英文单词*************/
            string ignoreWordsFile  = dbpath + "words/ignoreWords.txt";
            string ordinalWordsFile = dbpath + "words/OrdinalWords.txt";
            string symbolFile       = dbpath + "words/symbol.txt";
            var    txtlist          = TextInput.ArticleToList(txtStr, ignoreWordsFile, ordinalWordsFile, symbolFile);//文本转化为字符串数组,将需要处理的单词存到数组中

            int maxIndex = 5;

            List <string[]> oldwordsList = new List <string[]>();
            List <words>    newWordsList = new List <words>();

            oldwordsList = WordBLL.GetWordLookup(wordLevel);

            if (oldwordsList.Count > 0)
            {
                for (int i = 0; i < oldwordsList.Count; i++)
                {
                    newWordsList.Add(new words(i + 1, oldwordsList[i][0].ToString(), oldwordsList[i][1].ToString()));
                }
            }
            else
            {
                oldwordsList = new List <string[]>();
                /**********3、保存要处理的文本************/
                SPUser currentUser = SPContext.Current.Web.CurrentUser;
                nameStr = currentUser.Name;

                titleStr = TextInput.FilterSpecial(titleStr, "");
                string filePath  = dbpath + @"export/";                                   //txt文件保存的路径
                string nowStr    = string.Format("{0:yyyyMMddHHmmssffff}", DateTime.Now); //时间格式字符串:年月日时分秒4位毫秒
                string fileTitle = titleStr + "(" + nameStr + ")" + nowStr + ".txt";;     //文章标题+ _ + 处理人姓名 + 处理人所属院校(登录名)+ 当前时间
                TextInput.FileWrite(fileTitle, txtStr, filePath);                         //将即将处理的文本保存到服务器上的指定目录中;


                /***************4、词汇与级别表筛查*******************/
                string fileName = dbpath + "words/AllWords.txt";                     //包含原型与变型以及对应等级的词汇表文本
                Dictionary <int, object> allwordsList = WordBLL.SearchWordsWithTxt(txtlist, fileName, 0);
                DataTable wordsTable = (DataTable)allwordsList[1];                   //包含要输出的单词的元词汇、级别、频次

                if (wordsTable.Rows.Count > 0)                                       //有生词
                {
                    DataRow[] drs = wordsTable.Select("[level]>'" + maxIndex + "'"); //生词词汇的原词、级别、频次
                    for (int i = 0; i < drs.Length; i++)
                    {
                        int      level = (int)drs[i][1];
                        string   signs = WordBLL.leveltoSigns(level);
                        string[] wds   = new string[2] {
                            drs[i][0].ToString(), signs
                        };
                        oldwordsList.Add(wds);
                    }
                    oldwordsList = WordBLL.GetWordLookup(oldwordsList, wordLevel); //与动态词汇表对比筛查
                                                                                   /***************5、输出本文生词词汇表********************/

                    for (int i = 0; i < oldwordsList.Count; i++)
                    {
                        newWordsList.Add(new words(i + 1, oldwordsList[i][0].ToString(), oldwordsList[i][1].ToString()));
                    }
                }
            }
        }