/// <summary> /// 代词替换 /// </summary> /// <param name="content"></param> /// <param name="buildWithLast"></param> /// <returns></returns> public string[] ReplacePronouns(string[] content, bool buildWithLast = true) { List <string> lines = new List <string>(); Framework.Text.Segment.IctclasSegment segment = Text.Segment.IctclasSegment.GetInstance(); Tuple <string[], string[]> lastTuple = new Tuple <string[], string[]>(new string[0], new string[0]); for (int i = 0; i < content.Length; i++) { Tuple <string[], string[]> currentTuple = segment.SplitToArray(content[i]); lines.Add(ReplacePronouns(ref currentTuple, lastTuple)); if (buildWithLast) { lastTuple = currentTuple; } } return(lines.ToArray()); }
public void Debug_TestReplace() { string context = File.ReadAllText("代词替换测试.txt", Encoding.UTF8); Framework.Text.Segment.IctclasSegment segment = Text.Segment.IctclasSegment.GetInstance(); string[] content = context.Split(new[] { "\r\n", "\n", ",", ".", "。", "," }, StringSplitOptions.RemoveEmptyEntries); StringBuilder builder = new StringBuilder(); Tuple <string[], string[]> lastTuple = new Tuple <string[], string[]>(new string[0], new string[0]); for (int i = 0; i < content.Length; i++) { Tuple <string[], string[]> currentTuple = segment.SplitToArray(content[i]); builder.AppendLine(ReplacePronouns(ref currentTuple, lastTuple)); lastTuple = currentTuple; } File.WriteAllText("代词替换测试.txt", string.Join("\r\n", content)); File.WriteAllText("代词替换测试_result.txt", builder.ToString(), Encoding.UTF8); }
/// <summary> /// 获取含有代词的句子 /// </summary> /// <param name="fileName"></param> public void Debug_GetAllPronouns(string fileName) { Framework.Text.Segment.IctclasSegment segment = Text.Segment.IctclasSegment.GetInstance(); string[] allLines = File.ReadAllText(fileName, Encoding.UTF8).Split(new string[] { "\r\n", "\r", "\n", ".", "。", "!", "!" }, StringSplitOptions.RemoveEmptyEntries); List <string> lines = new List <string>(); foreach (string line in allLines) { if (line.Trim().Length > 1) { Tuple <string[], string[]> tuple = segment.SplitToArray(line.Trim()); for (int i = 0; i < tuple.Item2.Length; i++) { if (tuple.Item2[i] == "rr" || tuple.Item2[i] == "rz" || tuple.Item2[i] == "rzt" || tuple.Item2[i] == "rzs" || tuple.Item2[i] == "rzv") { lines.Add(line); break; } } } } StringBuilder builder = new StringBuilder(); foreach (string line in lines) { //Tuple<string[], string[]> tuple = segment.SplitToArray(line); //for (int i = 0; i < tuple.Item2.Length; i++) //{ // builder.Append(tuple.Item1[i] + "/" + tuple.Item2[i] + " "); //} //builder.AppendLine("\r\n"); builder.AppendLine(line); } File.WriteAllText("代词替换测试_Pronouns.txt", builder.ToString(), Encoding.UTF8); }
/// <summary> /// 获取问题类型 /// </summary> /// <param name="sentence"></param> /// <returns></returns> public List <string> GetQuestionTypes(string sentence) { List <string> qTypes = new List <string>(); bool isOther = false; Framework.Text.Segment.IctclasSegment ictclasSegment = IctclasSegment.GetInstance(); string[] sentenceSemantic = ictclasSegment.SplitToArray(sentence).Item2; if (sentenceSemantic != null && sentenceSemantic.Length > 0) { string text = string.Join(" ", sentenceSemantic); // Regex regex; foreach (DictionaryEntry dictionaryEntry in _questionRules) { //regex = new Regex(dictionaryEntry.Key.ToString()); if (dictionaryEntry.Key.ToString().Contains(text)) { qTypes.AddRange(((Iveely.Framework.DataStructure.SortedList <string>)dictionaryEntry.Value).ToArray()); } } } return(qTypes.Distinct().ToList()); }