Example #1
0
        /// <summary>
        /// 依存句法分析(神经网络句法模型需要-Xms1g -Xmx1g -Xmn512m)
        /// 内部采用NeuralNetworkDependencyParser实现,用户可以直接调用NeuralNetworkDependencyParser.compute(sentence)
        /// 也可以调用基于MaxEnt的依存句法分析器MaxEntDependencyParser.compute(sentence)
        /// 《基于神经网络的高性能依存句法分析器》
        ///https://www.hankcs.com/nlp/parsing/neural-network-based-dependency-parser.html
        ///《最大熵依存句法分析器的实现》
        ///https://www.hankcs.com/nlp/parsing/to-achieve-the-maximum-entropy-of-the-dependency-parser.html
        ///《基于CRF序列标注的中文依存句法分析器的Java实现》
        ///https://www.hankcs.com/nlp/parsing/crf-sequence-annotation-chinese-dependency-parser-implementation-based-on-java.html
        /// </summary>
        public void DependencyParser()
        {
            CoNLLSentence sentence = HanLP.parseDependency("徐先生还具体帮助他确定了把画雄鹰、松鼠和麻雀作为主攻目标。");

            Console.WriteLine(sentence);
            // 可以方便地遍历它
            foreach (CoNLLWord word in sentence)
            {
                Console.WriteLine($"{word.LEMMA} --({word.DEPREL})--> {word.HEAD.LEMMA}\n");
            }
            // 也可以直接拿到数组,任意顺序或逆序遍历
            CoNLLWord[] wordArray = sentence.getWordArray();
            for (int i = wordArray.Length - 1; i >= 0; i--)
            {
                CoNLLWord word = wordArray[i];
                Console.WriteLine($"{word.LEMMA} --({word.DEPREL})-->{word.HEAD.LEMMA}\n");
            }
            // 还可以直接遍历子树,从某棵子树的某个节点一路遍历到虚根
            CoNLLWord head = wordArray[12];

            while ((head = head.HEAD) != null)
            {
                if (head == CoNLLWord.ROOT)
                {
                    Console.WriteLine(head.LEMMA);
                }
                else
                {
                    Console.WriteLine($"{head.LEMMA} --({head.DEPREL})--> ");
                }
            }
        }
Example #2
0
        public override CoNLLSentence Parse(List <Term> terms)
        {
            var table = new Table();

            table.v = new string[terms.Count][];
            for (int i = 0; i < terms.Count; i++)
            {
                var term = terms[i];
                var line = new string[4];
                table.v[i] = line;
                line[0]    = term.word;
                line[2]    = DependencyUtil.compilePOS(term.nature);
                line[1]    = line[2].Substring(0, 1);
            }
            _crfModel.Tag(table);

            var words = new CoNLLWord[table.Size];

            for (int i = 0; i < words.Length; i++)
            {
                words[i] = new CoNLLWord(i + 1, table.v[i][0], table.v[i][2], table.v[i][1]);
            }

            for (int i = 0; i < table.Size; i++)
            {
                var line = table.v[i];
                var dtag = new DTag(line[3]);
                if (dtag.pos.EndsWith("ROOT"))
                {
                    words[i].HEAD = CoNLLWord.ROOT;
                }
                else
                {
                    var index = ConvertOffset2Index(dtag, table, i);
                    if (index == -1)
                    {
                        words[i].HEAD = CoNLLWord.NULL;
                    }
                    else
                    {
                        words[i].HEAD = words[index];
                    }
                }
            }

            for (int i = 0; i < words.Length; i++)
            {
                words[i].DEPREL = BigramDependencyModel.Get(words[i].NAME, words[i].POSTAG, words[i].HEAD.NAME, words[i].HEAD.POSTAG);
            }

            return(new CoNLLSentence(words));
        }