/// <summary> /// Lemma操作 /// </summary> /// <param name="sender"></param> /// <param name="e"></param> protected void lemmanew_Click(object sender, EventArgs e) { lemmanew.Enabled = false; #region 0 变量定义与表单校验 DateTime t1; //时间1 DateTime t2; //时间2 string titleStr; //标题 string nameStr; //用户名 string txtStr = this.txtcontent.Value.Trim(); //正文文本 //检验文档标题、用户名、正文是否输入完成 if (string.IsNullOrEmpty(homecity_name.Value) || homecity_name.Value == "Type the title or click to choose it") //标题为空或者为文本框提示值,即未输入标题 { PageAlert("你还未选择或输入文档标题!", this); homecity_name.Focus(); lemmanew.Enabled = true; return; } else { titleStr = homecity_name.Value; //标题 } if (string.IsNullOrEmpty(username.Value)) //用户名为空,即未输入有效用户名 { PageAlert("请先输入你的姓名,本系统不支持匿名操作!", this); username.Focus(); lemmanew.Enabled = true; return; } else { nameStr = username.Value; //用户名 } if (string.IsNullOrEmpty(txtcontent.Value)) //处理的文本还未输入 { PageAlert("你还未输入或导入需要处理的文本,请确认后再试!", this); txtcontent.Focus(); lemmanew.Enabled = true; return; } else { string regEx = @"((file|gopher|news|nntp|telnet|http|ftp|https|ftps|sftp)://)(([a-zA-Z0-9\._-]+\.[a-zA-Z]{2,6})|([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}))(:[0-9]{1,4})*(/[a-zA-Z0-9\&%_\./-~-]*)?"; txtStr = Regex.Replace(txtcontent.Value, regEx, ";");//正则表达式排除文中的网址 } #endregion #region 1 过滤文本判断是否包含有英文单词 t1 = DateTime.Now; string ignoreWordsFile = GetDbPath() + "words/ignoreWords.txt"; string ordinalWordsFile = GetDbPath() + "words/OrdinalWords.txt"; string symbolFile = GetDbPath() + "words/symbol.txt"; var txtlist = TextInput.ArticleToList(txtStr, ignoreWordsFile, ordinalWordsFile, symbolFile);//文本转化为字符串数组,将需要处理的单词存到数组中 t2 = DateTime.Now; string timeRecord = "文本过滤耗时:" + TimeSpend(t1, t2);//时间记录 //PageAlert(timeRecord, this); if (txtlist.Count == 0)//文本中不包含有英文单词 { PageAlert("文本中不包含需要处理的英文单词!", this); txtcontent.Focus(); lemmanew.Enabled = true; return; } #endregion #region 2 参照词库选择 int itemsCount = this.cblist.Items.Count; int maxIndex = 0; for (int i = 0; i < itemsCount; i++) { if (cblist.Items[i].Selected) { maxIndex = i + 1;//2016-10-17 13:16:43 增补高中词汇,并将级别序号变更为5、6、7、8 } } if (maxIndex == 0) { PageAlert("你还未选定要参照的词汇表,请选择后继续!", this); lemmanew.Enabled = true; return; } #endregion #region 3 保存要处理的文本 SPUser currentUser = SPContext.Current.Web.CurrentUser; //string loginName = currentUser.LoginName; //loginName = loginName.Substring(loginName.IndexOf('\\') + 1); //loginName = loginName.Replace(@"i:0#.w|", ""); string spName = currentUser.Name; if (nameStr != spName) { nameStr = nameStr + "_" + spName; } titleStr = TextInput.FilterSpecial(titleStr, ""); string filePath = GetDbPath() + @"export/"; //txt文件保存的路径 string nowStr = string.Format("{0:yyyyMMddHHmmssffff}", DateTime.Now); //时间格式字符串:年月日时分秒4位毫秒 string fileTitle = titleStr + "(" + nameStr + ")" + nowStr + ".txt";; //文章标题+ _ + 处理人姓名 + 处理人所属院校(登录名)+ 当前时间 t1 = DateTime.Now; TextInput.FileWrite(fileTitle, txtStr, filePath); //将即将处理的文本保存到服务器上的指定目录中; t2 = DateTime.Now; timeRecord = timeRecord + "文件保存时间耗时:" + TimeSpend(t1, t2); //PageAlert(timeRecord, this); inputDiv.Visible = false; #endregion #region 4 单词还原 t1 = DateTime.Now; string fileName = GetDbPath() + "words/AllWords.txt";//包含原型与变型以及对应等级的词汇表 int isEurope = 0; //if (ckEurope.Checked) //{ // isEurope = 1; //} Dictionary <int, object> allwordsList = WordBLL.SearchWordsWithTxt(txtlist, fileName, isEurope);//对词汇列表进行比对还原和级别确认,输出三个数据集:1、文本词汇对应级别,2、超纲词汇对应词频,3、处理过的单词原型对应级别 t2 = DateTime.Now; timeRecord = timeRecord + "Lemma耗时:" + TimeSpend(t1, t2); #endregion #region 5 Lemma和结果输出 if (allwordsList.Count > 0) { #region 5.1 输出词汇表 t1 = DateTime.Now; DataTable showWordsdt = (DataTable)allwordsList[1]; //DataTable = OutputResult.newDataTable(wordsdt, maxIndex); //gridview数据绑定 #region 5.1.1 无超纲或不可处理的词汇可输出 if (showWordsdt.Rows.Count == 0) { showWordsdt.Rows.Add(showWordsdt.NewRow()); wordgv.DataSource = showWordsdt; wordgv.DataBind(); int nColumnCount = wordgv.Rows[0].Cells.Count; wordgv.Rows[0].Cells.Clear(); wordgv.Rows[0].Cells.Add(new TableCell()); wordgv.Rows[0].Cells[0].ColumnSpan = nColumnCount; wordgv.Rows[0].Cells[0].Text = "本次处理的文档不包含超纲或无法处理的词汇!"; wordgv.RowStyle.Height = 30; wordgv.RowStyle.HorizontalAlign = HorizontalAlign.Center; } #endregion #region 5.1.2 有超纲或不可处理的词汇输出词表 else { DataView dv = showWordsdt.Copy().DefaultView; dv.RowFilter = "[level]<=0 or [level]>" + (maxIndex + 4); //筛选超纲词汇和未处理词汇 //先给它一个默认状态,因为下面的bin()方法,需要用到状态值。 ViewState["SortOrder"] = "Frequency"; ViewState["OrderDire"] = "Desc"; string sortStr = (string)ViewState["SortOrder"] + " " + (string)ViewState["OrderDire"]; dv.Sort = sortStr; wordgv.DataSource = dv; totalW.Text = "Total:" + dv.Table.Rows.Count.ToString() + "Words"; wordgv.DataBind(); t2 = DateTime.Now; timeRecord = timeRecord + "词表输出:" + TimeSpend(t1, t2); /***************5.3.1 输出词汇级别分布饼图*****************/ //int totalwords = showWordsdt.Rows.Count; //int[] wordProfiles =OutputResult.WordProfiles(showWordsdt, maxIndex); //Drawpie(wordProfiles,maxIndex); //t1 = DateTime.Now; //timeRecord = timeRecord + "级别分布图输出:" + TimeSpend(t2, t1); /***************5.3.2 输出前十个最高频词频分布饼图*********************/ //DrawSequencePie(showWordsdt); //t2 = DateTime.Now; //timeRecord = timeRecord + "高频词频分布图输出:" + TimeSpend(t1, t2); } #endregion #endregion /*****************5.2 输出彩色标记文本*************************/ var showWordsList = (List <List <string> >)allwordsList[0];//文本处理后包含的级别及每个级别词频的列表集合 DataTable dt = OutputResult.InitWordsAnalysisTable(showWordsList, maxIndex, symbolFile); StringBuilder sb = new StringBuilder(); for (int k = 0; k < dt.Rows.Count; k++) { DataRow dr = dt.Rows[k]; sb.Append(OutputResult.Colored(dr[0] + "(" + dr[1] + ") ", int.Parse(dr[2].ToString()))); } tuliDiv.InnerHtml = sb.ToString();//OutputResult.Tuli(showWordsList, maxIndex, symbolFile); //输出图例颜色表 BindChart(dt, SeriesChartType.Pie, Chart1); //outDiv.InnerHtml = OutputResult.ResultDiv(showWordsList, maxIndex); //输出彩色文本 outDiv.InnerHtml = OutputResult.ResultDiv(showWordsList, maxIndex).ToString(); outDiv.Visible = true; outlb.Text = titleStr; t1 = DateTime.Now; timeRecord = timeRecord + "彩色标记文本输出:" + TimeSpend(t2, t1); //PageAlert(timeRecord,this); outputDiv.Visible = true; //OutputResult.VisibleOrNot(outputDiv,inputDiv); //ClientScript.RegisterStartupScript(ClientScript.GetType(), "", "<script>document.getElementById('inputDiv').style.display = 'none';document.getElementById('outputDiv').style.display = '';</script>", true);//隐藏输入界面,显示输出结果 Titlelb.Text = "Output"; //Dictionary<string, int> wordsTimes =(Dictionary<string, int>) ret[2]; //WordBLL.WriteIntoDB(wordsTimes); } lemmanew.Enabled = true; #endregion }