/// <summary> /// https://universaldependencies.org/u/dep/index.html /// Stratify a sentence according to the subject-predicate relationship /// </summary> /// <param name="sentence"></param> /// <returns>sentence, subject, predicate, objective</returns> public List <(edu.stanford.nlp.util.CoreMap, edu.stanford.nlp.ling.IndexedWord, edu.stanford.nlp.ling.IndexedWord, edu.stanford.nlp.ling.IndexedWord)> StepSamplingDependency(edu.stanford.nlp.util.CoreMap sentence) { List <(edu.stanford.nlp.util.CoreMap, edu.stanford.nlp.ling.IndexedWord, edu.stanford.nlp.ling.IndexedWord, edu.stanford.nlp.ling.IndexedWord)> samplings = new List <(edu.stanford.nlp.util.CoreMap, edu.stanford.nlp.ling.IndexedWord, edu.stanford.nlp.ling.IndexedWord, edu.stanford.nlp.ling.IndexedWord)>(); edu.stanford.nlp.semgraph.SemanticGraph dependencies = sentence.get(enhancedPlusPlusDependenciesAnnotationClass) as edu.stanford.nlp.semgraph.SemanticGraph; java.util.Collection typedDependencies = dependencies.typedDependencies(); java.util.Iterator itr = typedDependencies.iterator(); while (itr.hasNext()) { edu.stanford.nlp.trees.TypedDependency td = itr.next() as edu.stanford.nlp.trees.TypedDependency; string relationType = td.reln().getShortName(); //Nominals if (relationType == "nsubj" || relationType == "nsubjpass") { edu.stanford.nlp.ling.IndexedWord subject, predicate, objective; subject = td.dep(); predicate = td.gov(); //在谓语位置上,缺不满足谓语角色的词,此种谓语可缺省 edu.stanford.nlp.ling.IndexedWord expl = FindIndexedWordByDependencyType(dependencies, predicate, "expl"); predicate = expl == null ? predicate : null; //直接宾语, objective = FindIndexedWordByDependencyType(dependencies, predicate, "obj", "dobj"); //动词或形容词的补语做宾语, open clausal complement objective = objective ?? FindIndexedWordByDependencyType(dependencies, predicate, "ccomp", "xcomp"); //加入层次集合 samplings.Add((sentence, subject, predicate, objective)); } } return(samplings); }
/// <summary> /// 对修饰词处理 /// </summary> private void LabelingModifiers(edu.stanford.nlp.ling.IndexedWord word, edu.stanford.nlp.util.CoreMap sentence, Scenario scenario) { if (word == null) { return; } List <(edu.stanford.nlp.ling.IndexedWord, string)> modifiers = word != null?FindTargetIndexWordModifier(sentence, word) : null; string key = word.value(); string value = ""; string relation = ""; //1.根据ner,做最基本判断 foreach (var(modifier, typeName) in modifiers) { switch (typeName) { case "amod": //形容词修饰关系 { value = modifier.value(); relation = "amod"; } break; case "advmod": //副词修饰关系 { //以为 modifier 为核心搜索修饰关系, 如果存在进一步的修饰关系,则此副词为relation relation = "advmod"; edu.stanford.nlp.ling.IndexedWord v = modifier; } break; case "nummod": //额外搜寻修饰单位 { edu.stanford.nlp.ling.IndexedWord unit = FindNumericUnit(sentence, modifier); value = (string)modifier.get(normalizedNamedEntityTagAnnotationClass); relation = "count"; } break; case "compound": //名词短语,连接key { key = key.InsertCompound(modifier.value()); } break; } } //1. 聚类算法,计算 relation-triple 与哪个表格匹配 IScenarioFactor pFactor = scenario.ClusterFactor(relation, key, value); if (pFactor != null) { scenario.MergeValue(relation, key, value, pFactor); } }
/// <summary> /// 谓语本身用途主要是是修饰主语, /// 如果符合要求则构建 : /// relation triple "state (s, t)" /// </summary> /// <param name="scenario"></param> /// <param name="samplingSentence"></param> /// <param name="subject"></param> /// <param name="predicate"></param> private void LabelingPredicate(edu.stanford.nlp.ling.IndexedWord subject, edu.stanford.nlp.ling.IndexedWord predicate, edu.stanford.nlp.ling.IndexedWord objective, edu.stanford.nlp.util.CoreMap samplingSentence, Scenario scenario) { string pos = predicate?.get(partOfSpeechAnnotationClass) as string; string key = "", relation = "", value = ""; switch (pos) { //表示主语的状态和趋势 case "VBD": case "VBG": case "VBN": case "VBP": case "VBZ": { if (objective == null) { key = subject.value(); relation = "state"; value = predicate.value(); } else { key = subject.value(); relation = predicate.value(); value = objective.value(); } } break; default: LabelingModifiers(predicate, samplingSentence, scenario); break; } IScenarioFactor fa = scenario.ClusterFactor(relation, key, value); if (fa != null) { scenario.MergeValue(relation, key, value, fa); } }
/// <summary> /// 搜索 /// </summary> private edu.stanford.nlp.ling.IndexedWord FindNumericUnit(edu.stanford.nlp.util.CoreMap sentence, edu.stanford.nlp.ling.IndexedWord word) { edu.stanford.nlp.semgraph.SemanticGraph dependencies = sentence.get(enhancedPlusPlusDependenciesAnnotationClass) as edu.stanford.nlp.semgraph.SemanticGraph; List <edu.stanford.nlp.trees.TypedDependency> tds = FindRefs(dependencies, word.backingLabel()); foreach (edu.stanford.nlp.trees.TypedDependency td in tds) { } return(null); }
/// <summary> /// /// </summary> /// <param name="dependencies"></param> /// <param name="token"></param> /// <param name="depTypeString"></param> /// <returns></returns> private edu.stanford.nlp.ling.IndexedWord FindIndexedWordByDependencyType(edu.stanford.nlp.semgraph.SemanticGraph dependencies, edu.stanford.nlp.ling.IndexedWord word, params string[] depTypeString) { if (dependencies == null || word == null) { return(null); } List <edu.stanford.nlp.trees.TypedDependency> deps = FindRefs(dependencies, word.backingLabel()); foreach (edu.stanford.nlp.trees.TypedDependency dep in deps) { if (depTypeString.Contains(dep.reln().getShortName())) { if (dep.gov() == word) { return(dep.dep()); } } } return(null); }
/// <summary> /// find the sibling relation of target word from sentence /// </summary> /// <param name="sentence"></param> /// <param name="word"></param> /// <returns>modifier words, relation type</returns> public List <(edu.stanford.nlp.ling.IndexedWord, string)> FindTargetIndexWordModifier(edu.stanford.nlp.util.CoreMap sentence, edu.stanford.nlp.ling.IndexedWord word) { List <(edu.stanford.nlp.ling.IndexedWord, string)> modifiers = new List <(edu.stanford.nlp.ling.IndexedWord, string)>(); edu.stanford.nlp.semgraph.SemanticGraph dependencies = sentence.get(enhancedPlusPlusDependenciesAnnotationClass) as edu.stanford.nlp.semgraph.SemanticGraph; List <edu.stanford.nlp.trees.TypedDependency> deps = FindRefs(dependencies, word.backingLabel()); foreach (edu.stanford.nlp.trees.TypedDependency dep in deps) { string relationType = dep.reln().ToString(); switch (relationType) { case "nummod": //数值修饰词,numeric modifier,搜索是否还存在单位修饰, 比如 200吨,10个 case "amod": //形容词修饰词,例如:红花-红 case "nmod:of": //of关系名词组,例如:the king of night case "nmod:to": //to关系,例如:go to school, came to party case "nmod:into": //名词修饰短语,例如:Investigation into the cause of the collision case "nmod:for": //for关系名词修饰短语, vehicle inspections for damage assessment case "compound": //multiword expression (MVE)形式 case "advmod": //副词修饰 safely anchored modifiers.Add((dep.dep(), relationType)); break; } ; } return(modifiers); }