/// <summary> /// Lemma操作 /// </summary> /// <param name="sender"></param> /// <param name="e"></param> protected void lemmanew_Click(object sender, EventArgs e) { /********变量**********/ string titleStr; //标题 string nameStr; //用户名 string txtStr = txtcontent.Value.Trim(); //正文文本 //检验文本册次,单元,篇章属性选择完毕 if (!isSelected()) { PageAlert("你尚未选择本次筛查的文章册次、单元或篇章!", this); return; } if (string.IsNullOrEmpty(txtcontent.Value)) //处理的文本还未输入 { PageAlert("你还未输入或导入需要处理的文本,请确认后再试!", this); txtcontent.Focus(); lemmanew.Enabled = true; return; } else { string regEx = @"((file|gopher|news|nntp|telnet|http|ftp|https|ftps|sftp)://)(([a-zA-Z0-9\._-]+\.[a-zA-Z]{2,6})|([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}))(:[0-9]{1,4})*(/[a-zA-Z0-9\&%_\./-~-]*)?"; txtStr = Regex.Replace(txtcontent.Value, regEx, ";");//正则表达式排除文中的网址 } /***********1、过滤文本判断是否包含有英文单词*************/ string ignoreWordsFile = GetDbPath() + "words/ignoreWords.txt"; string ordinalWordsFile = GetDbPath() + "words/OrdinalWords.txt"; string symbolFile = GetDbPath() + "words/symbol.txt"; var txtlist = TextInput.ArticleToList(txtStr, ignoreWordsFile, ordinalWordsFile, symbolFile);//文本转化为字符串数组,将需要处理的单词存到数组中 //PageAlert(timeRecord, this); if (txtlist.Count == 0)//文本中不包含有英文单词 { PageAlert("文本中不包含英文单词!", this); txtcontent.Focus(); lemmanew.Enabled = true; return; } string wordLevel = ""; int maxIndex = 0; maxIndex = ddlBook.SelectedIndex + 4; int selectIndex = ddlBook.SelectedIndex + ddlUnit.SelectedIndex + ddlText.SelectedIndex; if (maxIndex == 0 || selectIndex < 3) { PageAlert("你还未选定课文对应的册、单元或篇目!", this); lemmanew.Enabled = true; return; } else { wordLevel = ddlBook.SelectedValue + ddlUnit.SelectedValue + ddlText.SelectedValue; } /*********2、查询是否已经处理过本篇课文************/ titleStr = ddlBook.SelectedItem.Text + ddlUnit.SelectedItem.Text + ddlText.SelectedItem.Text; List <string[]> oldwordsList = new List <string[]>(); List <words> newWordsList = new List <words>(); oldwordsList = WordBLL.GetWordLookup(wordLevel); if (oldwordsList.Count > 0) { PageAlert("“" + titleStr + "”课文已经处理过,系统将为您直接输出生词表!", this); for (int i = 0; i < oldwordsList.Count; i++) { newWordsList.Add(new words(i + 1, oldwordsList[i][0].ToString(), oldwordsList[i][1].ToString())); } wordgv.DataSource = newWordsList; wordgv.DataBind(); totalW.Text = "你所筛查的本篇课文共有 " + newWordsList.Count.ToString() + " 生词"; inputDiv.Visible = false; outputDiv.Visible = true; Titlelb.Text = "生词表输出"; } else { oldwordsList = new List <string[]>(); /**********3、保存要处理的文本************/ SPUser currentUser = SPContext.Current.Web.CurrentUser; nameStr = currentUser.Name; titleStr = TextInput.FilterSpecial(titleStr, ""); string filePath = GetDbPath() + @"export/"; //txt文件保存的路径 string nowStr = string.Format("{0:yyyyMMddHHmmssffff}", DateTime.Now); //时间格式字符串:年月日时分秒4位毫秒 string fileTitle = titleStr + "(" + nameStr + ")" + nowStr + ".txt";; //文章标题+ _ + 处理人姓名 + 处理人所属院校(登录名)+ 当前时间 TextInput.FileWrite(fileTitle, txtStr, filePath); //将即将处理的文本保存到服务器上的指定目录中; inputDiv.Visible = false; /***************4、词汇与级别表筛查*******************/ string fileName = GetDbPath() + "words/AllWords.txt"; //包含原型与变型以及对应等级的词汇表文本 Dictionary <int, object> allwordsList = WordBLL.SearchWordsWithTxt(txtlist, fileName, 0); DataTable wordsTable = (DataTable)allwordsList[1]; //包含要输出的单词的元词汇、级别、频次 if (wordsTable.Rows.Count > 0) //有生词 { DataRow[] drs = wordsTable.Select("[level]>'" + maxIndex + "'"); //生词词汇的原词、级别、频次 for (int i = 0; i < drs.Length; i++) { int level = (int)drs[i][1]; string signs = WordBLL.leveltoSigns(level); string[] wds = new string[2] { drs[i][0].ToString(), signs }; oldwordsList.Add(wds); } oldwordsList = WordBLL.GetWordLookup(oldwordsList, wordLevel); //与动态词汇表对比筛查 /***************5、输出本文生词词汇表********************/ for (int i = 0; i < oldwordsList.Count; i++) { newWordsList.Add(new words(i + 1, oldwordsList[i][0].ToString(), oldwordsList[i][1].ToString())); } wordgv.DataSource = newWordsList; wordgv.DataBind(); totalW.Text = "你所筛查的本篇课文共有 " + newWordsList.Count.ToString() + " 生词"; outputDiv.Visible = true; Titlelb.Text = "生词表输出"; } } lemmanew.Enabled = true; }
public static void Lemma(KeyValuePair <string, string> kv, string wordLevel, string dbpath) { /********变量**********/ string titleStr = kv.Key; //标题 string nameStr; //用户名 string regEx = @"((file|gopher|news|nntp|telnet|http|ftp|https|ftps|sftp)://)(([a-zA-Z0-9\._-]+\.[a-zA-Z]{2,6})|([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}))(:[0-9]{1,4})*(/[a-zA-Z0-9\&%_\./-~-]*)?"; string txtStr = kv.Value; txtStr = Regex.Replace(txtStr, regEx, ";");//正则表达式排除文中的网址 /***********1、过滤文本判断是否包含有英文单词*************/ string ignoreWordsFile = dbpath + "words/ignoreWords.txt"; string ordinalWordsFile = dbpath + "words/OrdinalWords.txt"; string symbolFile = dbpath + "words/symbol.txt"; var txtlist = TextInput.ArticleToList(txtStr, ignoreWordsFile, ordinalWordsFile, symbolFile);//文本转化为字符串数组,将需要处理的单词存到数组中 int maxIndex = 5; List <string[]> oldwordsList = new List <string[]>(); List <words> newWordsList = new List <words>(); oldwordsList = WordBLL.GetWordLookup(wordLevel); if (oldwordsList.Count > 0) { for (int i = 0; i < oldwordsList.Count; i++) { newWordsList.Add(new words(i + 1, oldwordsList[i][0].ToString(), oldwordsList[i][1].ToString())); } } else { oldwordsList = new List <string[]>(); /**********3、保存要处理的文本************/ SPUser currentUser = SPContext.Current.Web.CurrentUser; nameStr = currentUser.Name; titleStr = TextInput.FilterSpecial(titleStr, ""); string filePath = dbpath + @"export/"; //txt文件保存的路径 string nowStr = string.Format("{0:yyyyMMddHHmmssffff}", DateTime.Now); //时间格式字符串:年月日时分秒4位毫秒 string fileTitle = titleStr + "(" + nameStr + ")" + nowStr + ".txt";; //文章标题+ _ + 处理人姓名 + 处理人所属院校(登录名)+ 当前时间 TextInput.FileWrite(fileTitle, txtStr, filePath); //将即将处理的文本保存到服务器上的指定目录中; /***************4、词汇与级别表筛查*******************/ string fileName = dbpath + "words/AllWords.txt"; //包含原型与变型以及对应等级的词汇表文本 Dictionary <int, object> allwordsList = WordBLL.SearchWordsWithTxt(txtlist, fileName, 0); DataTable wordsTable = (DataTable)allwordsList[1]; //包含要输出的单词的元词汇、级别、频次 if (wordsTable.Rows.Count > 0) //有生词 { DataRow[] drs = wordsTable.Select("[level]>'" + maxIndex + "'"); //生词词汇的原词、级别、频次 for (int i = 0; i < drs.Length; i++) { int level = (int)drs[i][1]; string signs = WordBLL.leveltoSigns(level); string[] wds = new string[2] { drs[i][0].ToString(), signs }; oldwordsList.Add(wds); } oldwordsList = WordBLL.GetWordLookup(oldwordsList, wordLevel); //与动态词汇表对比筛查 /***************5、输出本文生词词汇表********************/ for (int i = 0; i < oldwordsList.Count; i++) { newWordsList.Add(new words(i + 1, oldwordsList[i][0].ToString(), oldwordsList[i][1].ToString())); } } } }