public static void PatternMatch(List <NS> nss, List <Vertex> vertices, WordNet wordnet_op, WordNet wordnet_all) { var sb = new StringBuilder(nss.Count); for (int i = 0; i < nss.Count; i++) { sb.Append(nss[i].ToString()); } var patternStr = sb.ToString(); var vertexArr = vertices.ToArray(); trie.Match(patternStr, (begin, end, keyword) => { var sbName = new StringBuilder(); for (int i = begin; i < end; i++) { sbName.Append(vertexArr[i].realWord); } var name = sbName.ToString(); if (IsBadCase(name)) { return; } int offset = 0; for (int i = 0; i < begin; i++) { offset += vertexArr[i].realWord.Length; } wordnet_op.Insert(offset, new Vertex(TAG_PLACE, name, ATTRIBUTE, CoreDictionary.NS_WORD_ID), wordnet_all); }); }
public static void PatternMatch(List <NT> nts, List <Vertex> vertices, WordNet wordNetOptimum, WordNet wordNetAll) { var sb = new StringBuilder(nts.Count); for (int i = 0; i < nts.Count; i++) { sb.Append(nts[i].ToString()); } var patternStr = sb.ToString(); var vertexArr = vertices.ToArray(); _trie.Match(patternStr, (begin, end, keyword) => { var sbName = new StringBuilder(); for (int i = begin; i < end; i++) { sbName.Append(vertexArr[i].realWord); } var name = sbName.ToString(); if (IsBadCase(name)) { return; // 对一些basecase 做出调整 } int offset = 0; for (int i = 0; i < begin; i++) { offset += vertexArr[i].realWord.Length; } wordNetOptimum.Insert(offset, new Vertex(TAG_GROUP, name, ATTRIBUTE, CoreDictionary.NT_WORD_ID), wordNetAll); }); }
public static void Recognition(List <Vertex> vertices, WordNet wordNetOptimum, WordNet wordNetAll) { var sb = new StringBuilder(); int appendTimes = 0; // stringbuilder 附加次数 int line = 1; // 行号,跳过起始辅助节点 int activeLine = 1; // for (int i = 1; i < vertices.Count; i++) // 遍历节点,跳过起始辅助节点 { var vertex = vertices[i]; if (appendTimes > 0) // 已经附加过 { // 如果顶点词性为音译人名,或者音译人名词典包含顶点字符串值 if (vertex.GuessNature() == Nature.nrf || TranslatedPersonDictionary.ContainsKey(vertex.realWord)) { sb.Append(vertex.realWord); ++appendTimes; } else { // 识别结束 if (appendTimes > 1) // 附加两次才算是一个完整的音译人名吗? { wordNetOptimum.Insert(activeLine, new Vertex(TAG_PEOPLE, sb.ToString(), new WordAttr(Nature.nrf), CoreDictionary.NR_WORD_ID), wordNetAll); } sb.Clear(); appendTimes = 0; } } else // 尚未附加过 { if (vertex.GuessNature() == Nature.nrf || TranslatedPersonDictionary.ContainsKey(vertex.realWord)) { sb.Append(vertex.realWord); ++appendTimes; activeLine = line; // 第一次附加,记录活跃行号 } } line += vertex.realWord.Length; // 更新下一个顶点的行号 } }
/// <summary> /// 模式匹配 /// </summary> /// <param name="nrs">确定的标注序列</param> /// <param name="vertexs">原始的未加角色标注的序列</param> /// <param name="wordNetOptimum"></param> /// <param name="wordNetAll"></param> public static void PatternMatch(List <NR> nrs, List <Vertex> vertexs, WordNet wordNetOptimum, WordNet wordNetAll) { var sb = new StringBuilder(nrs.Count); // 存储 NR 的枚举模式串 var preNR = NR.A; bool backUp = false; int index = 0; for (int i = 0; i < nrs.Count; i++, index++) { var cur = vertexs[index]; switch (nrs[i]) { case NR.U: // 人名上文和姓成词, 比如: 这里【有关】天培的壮烈 if (!backUp) // 如果尚未备份,则备份一下 { vertexs = new List <Vertex>(vertexs); backUp = true; } sb.Append(NR.K.ToString()); sb.Append(NR.B.ToString()); preNR = NR.B; var nowK = cur.realWord.Substring(0, cur.realWord.Length - 1); // 人名的上文,参见上面 “有关”的“有” var nowB = cur.realWord.Substring(cur.realWord.Length - 1); // 最后一个字表示姓氏,单独提取出来,参见上面 “有关”的“关” // 因为匹配到人名前缀与人名合在一个节点里面,将当前节点拆分 vertexs[index] = new Vertex(nowK); vertexs.Insert(++index, new Vertex(nowB)); continue; case NR.V: // 人名末与下文成词,比如:“龚学平等领导”中的“龚学平等”,“邓颖超生前”中的“邓颖超生” if (!backUp) { vertexs = new List <Vertex>(vertexs); backUp = true; } if (preNR == NR.B) { sb.Append(NR.E.ToString()); // BE } else { sb.Append(NR.D.ToString()); // CD } sb.Append(NR.L.ToString()); var nowED = cur.realWord.Substring(cur.realWord.Length - 1); // 提取最后一个字 var nowL = cur.realWord.Substring(0, cur.realWord.Length - 1); // vertexs[index] = new Vertex(nowL); // 1. vertexs.Insert(++index, new Vertex(nowED)); // 2. 这两行与原java代码执行顺序相反 continue; default: sb.Append(nrs[i].ToString()); break; } preNR = nrs[i]; } var patternStr = sb.ToString(); // 所有节点连接起来形成的模式串 var wordArr = vertexs.ToArray(); var offsetArr = new int[wordArr.Length]; offsetArr[0] = 0; for (int i = 1; i < wordArr.Length; i++) { offsetArr[i] = offsetArr[i - 1] + wordArr[i - 1].realWord.Length; } _trie.Match(patternStr, (begin, end, value) => { var sbName = new StringBuilder(); for (int i = begin; i < end; i++) { sbName.Append(wordArr[i].realWord); } var name = sbName.ToString(); switch (value) { case NRPattern.BCD: if (name[0] == name[2]) { return; // 认为姓和最后一个名不可能相等 } break; } if (IsBadCase(name)) { return; } wordNetOptimum.Insert(offsetArr[begin], new Vertex(TAG_PEOPLE, name, ATTRIBUTE, CoreDictionary.NR_WORD_ID), wordNetAll); }); }