コード例 #1
0
        /// <summary>
        /// https://universaldependencies.org/u/dep/index.html
        /// Stratify a sentence according to the subject-predicate relationship
        /// </summary>
        /// <param name="sentence"></param>
        /// <returns>sentence, subject, predicate, objective</returns>
        public List <(edu.stanford.nlp.util.CoreMap, edu.stanford.nlp.ling.IndexedWord, edu.stanford.nlp.ling.IndexedWord, edu.stanford.nlp.ling.IndexedWord)> StepSamplingDependency(edu.stanford.nlp.util.CoreMap sentence)
        {
            List <(edu.stanford.nlp.util.CoreMap, edu.stanford.nlp.ling.IndexedWord, edu.stanford.nlp.ling.IndexedWord, edu.stanford.nlp.ling.IndexedWord)> samplings = new List <(edu.stanford.nlp.util.CoreMap, edu.stanford.nlp.ling.IndexedWord, edu.stanford.nlp.ling.IndexedWord, edu.stanford.nlp.ling.IndexedWord)>();

            edu.stanford.nlp.semgraph.SemanticGraph dependencies = sentence.get(enhancedPlusPlusDependenciesAnnotationClass) as edu.stanford.nlp.semgraph.SemanticGraph;
            java.util.Collection typedDependencies = dependencies.typedDependencies();
            java.util.Iterator   itr = typedDependencies.iterator();
            while (itr.hasNext())
            {
                edu.stanford.nlp.trees.TypedDependency td = itr.next() as edu.stanford.nlp.trees.TypedDependency;
                string relationType = td.reln().getShortName();
                //Nominals
                if (relationType == "nsubj" || relationType == "nsubjpass")
                {
                    edu.stanford.nlp.ling.IndexedWord subject, predicate, objective;
                    subject = td.dep(); predicate = td.gov();
                    //在谓语位置上,缺不满足谓语角色的词,此种谓语可缺省
                    edu.stanford.nlp.ling.IndexedWord expl = FindIndexedWordByDependencyType(dependencies, predicate, "expl");
                    predicate = expl == null ? predicate : null;
                    //直接宾语,
                    objective = FindIndexedWordByDependencyType(dependencies, predicate, "obj", "dobj");
                    //动词或形容词的补语做宾语, open clausal complement
                    objective = objective ?? FindIndexedWordByDependencyType(dependencies, predicate, "ccomp", "xcomp");
                    //加入层次集合
                    samplings.Add((sentence, subject, predicate, objective));
                }
            }
            return(samplings);
        }
コード例 #2
0
        /// <summary>
        /// 对修饰词处理
        /// </summary>
        private void LabelingModifiers(edu.stanford.nlp.ling.IndexedWord word, edu.stanford.nlp.util.CoreMap sentence, Scenario scenario)
        {
            if (word == null)
            {
                return;
            }
            List <(edu.stanford.nlp.ling.IndexedWord, string)> modifiers = word != null?FindTargetIndexWordModifier(sentence, word) : null;

            string key      = word.value();
            string value    = "";
            string relation = "";

            //1.根据ner,做最基本判断
            foreach (var(modifier, typeName) in modifiers)
            {
                switch (typeName)
                {
                case "amod":     //形容词修饰关系
                {
                    value    = modifier.value();
                    relation = "amod";
                }
                break;

                case "advmod":     //副词修饰关系
                {
                    //以为 modifier 为核心搜索修饰关系, 如果存在进一步的修饰关系,则此副词为relation
                    relation = "advmod";
                    edu.stanford.nlp.ling.IndexedWord v = modifier;
                }
                break;

                case "nummod":     //额外搜寻修饰单位
                {
                    edu.stanford.nlp.ling.IndexedWord unit = FindNumericUnit(sentence, modifier);
                    value    = (string)modifier.get(normalizedNamedEntityTagAnnotationClass);
                    relation = "count";
                }
                break;

                case "compound":      //名词短语,连接key
                {
                    key = key.InsertCompound(modifier.value());
                }
                break;
                }
            }
            //1. 聚类算法,计算 relation-triple 与哪个表格匹配
            IScenarioFactor pFactor = scenario.ClusterFactor(relation, key, value);

            if (pFactor != null)
            {
                scenario.MergeValue(relation, key, value, pFactor);
            }
        }
コード例 #3
0
        /// <summary>
        /// 谓语本身用途主要是是修饰主语,
        /// 如果符合要求则构建 :
        /// relation triple "state (s, t)"
        /// </summary>
        /// <param name="scenario"></param>
        /// <param name="samplingSentence"></param>
        /// <param name="subject"></param>
        /// <param name="predicate"></param>
        private void LabelingPredicate(edu.stanford.nlp.ling.IndexedWord subject, edu.stanford.nlp.ling.IndexedWord predicate, edu.stanford.nlp.ling.IndexedWord objective, edu.stanford.nlp.util.CoreMap samplingSentence, Scenario scenario)
        {
            string pos = predicate?.get(partOfSpeechAnnotationClass) as string;
            string key = "", relation = "", value = "";

            switch (pos)
            {
            //表示主语的状态和趋势
            case "VBD":
            case "VBG":
            case "VBN":
            case "VBP":
            case "VBZ":
            {
                if (objective == null)
                {
                    key      = subject.value();
                    relation = "state";
                    value    = predicate.value();
                }
                else
                {
                    key      = subject.value();
                    relation = predicate.value();
                    value    = objective.value();
                }
            }
            break;

            default:
                LabelingModifiers(predicate, samplingSentence, scenario);
                break;
            }
            IScenarioFactor fa = scenario.ClusterFactor(relation, key, value);

            if (fa != null)
            {
                scenario.MergeValue(relation, key, value, fa);
            }
        }
コード例 #4
0
        /// <summary>
        /// 搜索
        /// </summary>
        private edu.stanford.nlp.ling.IndexedWord FindNumericUnit(edu.stanford.nlp.util.CoreMap sentence, edu.stanford.nlp.ling.IndexedWord word)
        {
            edu.stanford.nlp.semgraph.SemanticGraph       dependencies = sentence.get(enhancedPlusPlusDependenciesAnnotationClass) as edu.stanford.nlp.semgraph.SemanticGraph;
            List <edu.stanford.nlp.trees.TypedDependency> tds          = FindRefs(dependencies, word.backingLabel());

            foreach (edu.stanford.nlp.trees.TypedDependency td in tds)
            {
            }
            return(null);
        }
コード例 #5
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="dependencies"></param>
        /// <param name="token"></param>
        /// <param name="depTypeString"></param>
        /// <returns></returns>
        private edu.stanford.nlp.ling.IndexedWord FindIndexedWordByDependencyType(edu.stanford.nlp.semgraph.SemanticGraph dependencies, edu.stanford.nlp.ling.IndexedWord word, params string[] depTypeString)
        {
            if (dependencies == null || word == null)
            {
                return(null);
            }
            List <edu.stanford.nlp.trees.TypedDependency> deps = FindRefs(dependencies, word.backingLabel());

            foreach (edu.stanford.nlp.trees.TypedDependency dep in deps)
            {
                if (depTypeString.Contains(dep.reln().getShortName()))
                {
                    if (dep.gov() == word)
                    {
                        return(dep.dep());
                    }
                }
            }
            return(null);
        }
コード例 #6
0
        /// <summary>
        /// find the sibling relation of target word from sentence
        /// </summary>
        /// <param name="sentence"></param>
        /// <param name="word"></param>
        /// <returns>modifier words, relation type</returns>
        public List <(edu.stanford.nlp.ling.IndexedWord, string)> FindTargetIndexWordModifier(edu.stanford.nlp.util.CoreMap sentence, edu.stanford.nlp.ling.IndexedWord word)
        {
            List <(edu.stanford.nlp.ling.IndexedWord, string)> modifiers = new List <(edu.stanford.nlp.ling.IndexedWord, string)>();

            edu.stanford.nlp.semgraph.SemanticGraph       dependencies = sentence.get(enhancedPlusPlusDependenciesAnnotationClass) as edu.stanford.nlp.semgraph.SemanticGraph;
            List <edu.stanford.nlp.trees.TypedDependency> deps         = FindRefs(dependencies, word.backingLabel());

            foreach (edu.stanford.nlp.trees.TypedDependency dep in deps)
            {
                string relationType = dep.reln().ToString();
                switch (relationType)
                {
                case "nummod":      //数值修饰词,numeric modifier,搜索是否还存在单位修饰, 比如 200吨,10个
                case "amod":        //形容词修饰词,例如:红花-红
                case "nmod:of":     //of关系名词组,例如:the king of night
                case "nmod:to":     //to关系,例如:go to school, came to party
                case "nmod:into":   //名词修饰短语,例如:Investigation into the cause of the collision
                case "nmod:for":    //for关系名词修饰短语, vehicle inspections for damage assessment
                case "compound":    //multiword expression (MVE)形式
                case "advmod":      //副词修饰 safely anchored
                    modifiers.Add((dep.dep(), relationType));
                    break;
                }
                ;
            }
            return(modifiers);
        }