/// <summary> /// https://universaldependencies.org/u/dep/index.html /// Stratify a sentence according to the subject-predicate relationship /// </summary> /// <param name="sentence"></param> /// <returns>sentence, subject, predicate, objective</returns> public List <(edu.stanford.nlp.util.CoreMap, edu.stanford.nlp.ling.IndexedWord, edu.stanford.nlp.ling.IndexedWord, edu.stanford.nlp.ling.IndexedWord)> StepSamplingDependency(edu.stanford.nlp.util.CoreMap sentence) { List <(edu.stanford.nlp.util.CoreMap, edu.stanford.nlp.ling.IndexedWord, edu.stanford.nlp.ling.IndexedWord, edu.stanford.nlp.ling.IndexedWord)> samplings = new List <(edu.stanford.nlp.util.CoreMap, edu.stanford.nlp.ling.IndexedWord, edu.stanford.nlp.ling.IndexedWord, edu.stanford.nlp.ling.IndexedWord)>(); edu.stanford.nlp.semgraph.SemanticGraph dependencies = sentence.get(enhancedPlusPlusDependenciesAnnotationClass) as edu.stanford.nlp.semgraph.SemanticGraph; java.util.Collection typedDependencies = dependencies.typedDependencies(); java.util.Iterator itr = typedDependencies.iterator(); while (itr.hasNext()) { edu.stanford.nlp.trees.TypedDependency td = itr.next() as edu.stanford.nlp.trees.TypedDependency; string relationType = td.reln().getShortName(); //Nominals if (relationType == "nsubj" || relationType == "nsubjpass") { edu.stanford.nlp.ling.IndexedWord subject, predicate, objective; subject = td.dep(); predicate = td.gov(); //在谓语位置上,缺不满足谓语角色的词,此种谓语可缺省 edu.stanford.nlp.ling.IndexedWord expl = FindIndexedWordByDependencyType(dependencies, predicate, "expl"); predicate = expl == null ? predicate : null; //直接宾语, objective = FindIndexedWordByDependencyType(dependencies, predicate, "obj", "dobj"); //动词或形容词的补语做宾语, open clausal complement objective = objective ?? FindIndexedWordByDependencyType(dependencies, predicate, "ccomp", "xcomp"); //加入层次集合 samplings.Add((sentence, subject, predicate, objective)); } } return(samplings); }
/// <summary> /// find the sibling relation of target word from sentence /// </summary> /// <param name="sentence"></param> /// <param name="word"></param> /// <returns>modifier words, relation type</returns> public List <(edu.stanford.nlp.ling.IndexedWord, string)> FindTargetIndexWordModifier(edu.stanford.nlp.util.CoreMap sentence, edu.stanford.nlp.ling.IndexedWord word) { List <(edu.stanford.nlp.ling.IndexedWord, string)> modifiers = new List <(edu.stanford.nlp.ling.IndexedWord, string)>(); edu.stanford.nlp.semgraph.SemanticGraph dependencies = sentence.get(enhancedPlusPlusDependenciesAnnotationClass) as edu.stanford.nlp.semgraph.SemanticGraph; List <edu.stanford.nlp.trees.TypedDependency> deps = FindRefs(dependencies, word.backingLabel()); foreach (edu.stanford.nlp.trees.TypedDependency dep in deps) { string relationType = dep.reln().ToString(); switch (relationType) { case "nummod": //数值修饰词,numeric modifier,搜索是否还存在单位修饰, 比如 200吨,10个 case "amod": //形容词修饰词,例如:红花-红 case "nmod:of": //of关系名词组,例如:the king of night case "nmod:to": //to关系,例如:go to school, came to party case "nmod:into": //名词修饰短语,例如:Investigation into the cause of the collision case "nmod:for": //for关系名词修饰短语, vehicle inspections for damage assessment case "compound": //multiword expression (MVE)形式 case "advmod": //副词修饰 safely anchored modifiers.Add((dep.dep(), relationType)); break; } ; } return(modifiers); }
/// <summary> /// 搜索 /// </summary> private edu.stanford.nlp.ling.IndexedWord FindNumericUnit(edu.stanford.nlp.util.CoreMap sentence, edu.stanford.nlp.ling.IndexedWord word) { edu.stanford.nlp.semgraph.SemanticGraph dependencies = sentence.get(enhancedPlusPlusDependenciesAnnotationClass) as edu.stanford.nlp.semgraph.SemanticGraph; List <edu.stanford.nlp.trees.TypedDependency> tds = FindRefs(dependencies, word.backingLabel()); foreach (edu.stanford.nlp.trees.TypedDependency td in tds) { } return(null); }
/// <summary> /// 因为句子并非要求语法上严格正确,所有经常出现错误的结果,但是结果不会变化 /// </summary> /// <returns></returns> private List <edu.stanford.nlp.trees.TypedDependency> FindDeptypeFromSentence(edu.stanford.nlp.util.CoreMap sentence, string depTypeString) { List <edu.stanford.nlp.trees.TypedDependency> tds = new List <edu.stanford.nlp.trees.TypedDependency>(); edu.stanford.nlp.semgraph.SemanticGraph dependencies = sentence.get(enhancedPlusPlusDependenciesAnnotationClass) as edu.stanford.nlp.semgraph.SemanticGraph; java.util.Collection typedDependencies = dependencies.typedDependencies(); java.util.Iterator itr = typedDependencies.iterator(); while (itr.hasNext()) { edu.stanford.nlp.trees.TypedDependency td = itr.next() as edu.stanford.nlp.trees.TypedDependency; if (td.reln().getShortName() == depTypeString) { tds.Add(td); } } return(tds); }
/// <summary> /// 搜索与target token相关的dpendency关系集合 /// </summary> /// <param name="dependencies"></param> /// <param name="token"></param> /// <returns></returns> private List <edu.stanford.nlp.trees.TypedDependency> FindRefs(edu.stanford.nlp.semgraph.SemanticGraph dependencies, edu.stanford.nlp.ling.CoreLabel token) { List <edu.stanford.nlp.trees.TypedDependency> tds = new List <edu.stanford.nlp.trees.TypedDependency>(); string tokenValue = token.ToString(); java.util.Collection typedDependencies = dependencies.typedDependencies(); java.util.Iterator itr = typedDependencies.iterator(); while (itr.hasNext()) { edu.stanford.nlp.trees.TypedDependency td = itr.next() as edu.stanford.nlp.trees.TypedDependency; string tdValue = td.toString(); if (tdValue.IndexOf(tokenValue) != -1) { tds.Add(td); } } return(tds); }
/// <summary> /// 搜索token的直接关联关系 /// </summary> /// <param name="sentence"></param> /// <param name="token"></param> /// <returns></returns> private List <edu.stanford.nlp.trees.TypedDependency> FindDeirctRefs(edu.stanford.nlp.util.CoreMap sentence, edu.stanford.nlp.ling.CoreLabel token) { string tokenValue = token.ToString(); List <edu.stanford.nlp.trees.TypedDependency> tds = new List <edu.stanford.nlp.trees.TypedDependency>(); edu.stanford.nlp.semgraph.SemanticGraph dependencies = sentence.get(enhancedPlusPlusDependenciesAnnotationClass) as edu.stanford.nlp.semgraph.SemanticGraph; java.util.Collection typedDependencies = dependencies.typedDependencies(); java.util.Iterator itr = typedDependencies.iterator(); while (itr.hasNext()) { edu.stanford.nlp.trees.TypedDependency td = itr.next() as edu.stanford.nlp.trees.TypedDependency; string tdValue = td.toString(); if (tdValue.IndexOf(tokenValue) != -1) { tds.Add(td); } } return(tds); }
/// <summary> /// /// </summary> /// <param name="dependencies"></param> /// <param name="token"></param> /// <param name="depTypeString"></param> /// <returns></returns> private edu.stanford.nlp.ling.IndexedWord FindIndexedWordByDependencyType(edu.stanford.nlp.semgraph.SemanticGraph dependencies, edu.stanford.nlp.ling.IndexedWord word, params string[] depTypeString) { if (dependencies == null || word == null) { return(null); } List <edu.stanford.nlp.trees.TypedDependency> deps = FindRefs(dependencies, word.backingLabel()); foreach (edu.stanford.nlp.trees.TypedDependency dep in deps) { if (depTypeString.Contains(dep.reln().getShortName())) { if (dep.gov() == word) { return(dep.dep()); } } } return(null); }