示例#1
0
        public static void PatternMatch(List <NS> nss, List <Vertex> vertices, WordNet wordnet_op, WordNet wordnet_all)
        {
            var sb = new StringBuilder(nss.Count);

            for (int i = 0; i < nss.Count; i++)
            {
                sb.Append(nss[i].ToString());
            }

            var patternStr = sb.ToString();
            var vertexArr  = vertices.ToArray();

            trie.Match(patternStr, (begin, end, keyword) =>
            {
                var sbName = new StringBuilder();
                for (int i = begin; i < end; i++)
                {
                    sbName.Append(vertexArr[i].realWord);
                }
                var name = sbName.ToString();
                if (IsBadCase(name))
                {
                    return;
                }
                int offset = 0;
                for (int i = 0; i < begin; i++)
                {
                    offset += vertexArr[i].realWord.Length;
                }

                wordnet_op.Insert(offset, new Vertex(TAG_PLACE, name, ATTRIBUTE, CoreDictionary.NS_WORD_ID), wordnet_all);
            });
        }
        public static void PatternMatch(List <NT> nts, List <Vertex> vertices, WordNet wordNetOptimum, WordNet wordNetAll)
        {
            var sb = new StringBuilder(nts.Count);

            for (int i = 0; i < nts.Count; i++)
            {
                sb.Append(nts[i].ToString());
            }

            var patternStr = sb.ToString();
            var vertexArr  = vertices.ToArray();

            _trie.Match(patternStr, (begin, end, keyword) =>
            {
                var sbName = new StringBuilder();
                for (int i = begin; i < end; i++)
                {
                    sbName.Append(vertexArr[i].realWord);
                }

                var name = sbName.ToString();
                if (IsBadCase(name))
                {
                    return;                         // 对一些basecase 做出调整
                }
                int offset = 0;
                for (int i = 0; i < begin; i++)
                {
                    offset += vertexArr[i].realWord.Length;
                }

                wordNetOptimum.Insert(offset, new Vertex(TAG_GROUP, name, ATTRIBUTE, CoreDictionary.NT_WORD_ID), wordNetAll);
            });
        }
示例#3
0
        public static void Recognition(List <Vertex> vertices, WordNet wordNetOptimum, WordNet wordNetAll)
        {
            var sb          = new StringBuilder();
            int appendTimes = 0;                        // stringbuilder 附加次数

            int line       = 1;                         // 行号,跳过起始辅助节点
            int activeLine = 1;                         //

            for (int i = 1; i < vertices.Count; i++)    // 遍历节点,跳过起始辅助节点
            {
                var vertex = vertices[i];
                if (appendTimes > 0)                     // 已经附加过
                {
                    // 如果顶点词性为音译人名,或者音译人名词典包含顶点字符串值
                    if (vertex.GuessNature() == Nature.nrf || TranslatedPersonDictionary.ContainsKey(vertex.realWord))
                    {
                        sb.Append(vertex.realWord);
                        ++appendTimes;
                    }
                    else
                    {
                        // 识别结束
                        if (appendTimes > 1)             // 附加两次才算是一个完整的音译人名吗?
                        {
                            wordNetOptimum.Insert(activeLine, new Vertex(TAG_PEOPLE, sb.ToString(), new WordAttr(Nature.nrf), CoreDictionary.NR_WORD_ID), wordNetAll);
                        }
                        sb.Clear();
                        appendTimes = 0;
                    }
                }
                else                                    // 尚未附加过
                {
                    if (vertex.GuessNature() == Nature.nrf || TranslatedPersonDictionary.ContainsKey(vertex.realWord))
                    {
                        sb.Append(vertex.realWord);
                        ++appendTimes;
                        activeLine = line;              // 第一次附加,记录活跃行号
                    }
                }

                line += vertex.realWord.Length;         // 更新下一个顶点的行号
            }
        }
        /// <summary>
        /// 模式匹配
        /// </summary>
        /// <param name="nrs">确定的标注序列</param>
        /// <param name="vertexs">原始的未加角色标注的序列</param>
        /// <param name="wordNetOptimum"></param>
        /// <param name="wordNetAll"></param>
        public static void PatternMatch(List <NR> nrs, List <Vertex> vertexs, WordNet wordNetOptimum, WordNet wordNetAll)
        {
            var  sb     = new StringBuilder(nrs.Count); // 存储 NR 的枚举模式串
            var  preNR  = NR.A;
            bool backUp = false;
            int  index  = 0;

            for (int i = 0; i < nrs.Count; i++, index++)
            {
                var cur = vertexs[index];
                switch (nrs[i])
                {
                case NR.U:          // 人名上文和姓成词, 比如: 这里【有关】天培的壮烈
                    if (!backUp)    // 如果尚未备份,则备份一下
                    {
                        vertexs = new List <Vertex>(vertexs);
                        backUp  = true;
                    }
                    sb.Append(NR.K.ToString());
                    sb.Append(NR.B.ToString());
                    preNR = NR.B;
                    var nowK = cur.realWord.Substring(0, cur.realWord.Length - 1);          // 人名的上文,参见上面 “有关”的“有”
                    var nowB = cur.realWord.Substring(cur.realWord.Length - 1);             // 最后一个字表示姓氏,单独提取出来,参见上面 “有关”的“关”
                    // 因为匹配到人名前缀与人名合在一个节点里面,将当前节点拆分
                    vertexs[index] = new Vertex(nowK);
                    vertexs.Insert(++index, new Vertex(nowB));
                    continue;

                case NR.V:          // 人名末与下文成词,比如:“龚学平等领导”中的“龚学平等”,“邓颖超生前”中的“邓颖超生”
                    if (!backUp)
                    {
                        vertexs = new List <Vertex>(vertexs);
                        backUp  = true;
                    }
                    if (preNR == NR.B)
                    {
                        sb.Append(NR.E.ToString());         // BE
                    }
                    else
                    {
                        sb.Append(NR.D.ToString());         // CD
                    }
                    sb.Append(NR.L.ToString());

                    var nowED = cur.realWord.Substring(cur.realWord.Length - 1);            // 提取最后一个字
                    var nowL  = cur.realWord.Substring(0, cur.realWord.Length - 1);         //
                    vertexs[index] = new Vertex(nowL);                                      // 1.
                    vertexs.Insert(++index, new Vertex(nowED));                             // 2. 这两行与原java代码执行顺序相反
                    continue;

                default:
                    sb.Append(nrs[i].ToString());
                    break;
                }
                preNR = nrs[i];
            }

            var patternStr = sb.ToString();     // 所有节点连接起来形成的模式串
            var wordArr    = vertexs.ToArray();

            var offsetArr = new int[wordArr.Length];

            offsetArr[0] = 0;
            for (int i = 1; i < wordArr.Length; i++)
            {
                offsetArr[i] = offsetArr[i - 1] + wordArr[i - 1].realWord.Length;
            }

            _trie.Match(patternStr, (begin, end, value) =>
            {
                var sbName = new StringBuilder();
                for (int i = begin; i < end; i++)
                {
                    sbName.Append(wordArr[i].realWord);
                }

                var name = sbName.ToString();
                switch (value)
                {
                case NRPattern.BCD:
                    if (name[0] == name[2])
                    {
                        return;                             // 认为姓和最后一个名不可能相等
                    }
                    break;
                }
                if (IsBadCase(name))
                {
                    return;
                }

                wordNetOptimum.Insert(offsetArr[begin], new Vertex(TAG_PEOPLE, name, ATTRIBUTE, CoreDictionary.NR_WORD_ID), wordNetAll);
            });
        }