/// <summary>Get a representative span for the relation expressed by this triple.</summary> /// <remarks> /// Get a representative span for the relation expressed by this triple. /// This is a bit more complicated than the subject and object spans, as the relation /// span is occasionally discontinuous. /// If this is the case, this method returns the largest contiguous chunk. /// If the relation span is empty, return the object span. /// </remarks> public virtual Pair <int, int> RelationTokenSpan() { if (relation.IsEmpty()) { return(ObjectTokenSpan()); } else { if (relation.Count == 1) { return(Pair.MakePair(relation[0].Index() - 1, relation[0].Index())); } else { // Variables to keep track of the longest chunk int longestChunk = 0; int longestChunkStart = 0; int thisChunk = 1; int thisChunkStart = 0; // Find the longest chunk for (int i = 1; i < relation.Count; ++i) { CoreLabel token = relation[i]; CoreLabel lastToken = relation[i - 1]; if (lastToken.Index() + 1 == token.Index()) { thisChunk += 1; } else { if (lastToken.Index() + 2 == token.Index()) { thisChunk += 2; } else { // a skip of one character is _usually_ punctuation if (thisChunk > longestChunk) { longestChunk = thisChunk; longestChunkStart = thisChunkStart; } thisChunkStart = i; thisChunk = 1; } } } // (subcase: the last chunk is the longest) if (thisChunk > longestChunk) { longestChunk = thisChunk; longestChunkStart = thisChunkStart; } // Return the longest chunk return(Pair.MakePair(relation[longestChunkStart].Index() - 1, relation[longestChunkStart].Index() - 1 + longestChunk)); } } }
/// <summary>Returns a 0-based index of the head of the tree.</summary> /// <remarks>Returns a 0-based index of the head of the tree. Assumes the leaves had been indexed from 1</remarks> internal static int HeadIndex(Tree tree) { CoreLabel label = ErasureUtils.UncheckedCast(tree.Label()); CoreLabel headLabel = label.Get(typeof(TreeCoreAnnotations.HeadWordLabelAnnotation)); return(headLabel.Index() - 1); }
protected internal virtual ICollection <string> FeaturesC(PaddedList <In> cInfo, int loc) { ICollection <string> features = new List <string>(); CoreLabel c = cInfo[loc]; CoreLabel n = cInfo[loc + 1]; CoreLabel n2 = cInfo[loc + 2]; CoreLabel p = cInfo[loc - 1]; CoreLabel p2 = cInfo[loc - 2]; string charc = c.Get(typeof(CoreAnnotations.CharAnnotation)); string charn = n.Get(typeof(CoreAnnotations.CharAnnotation)); string charn2 = n2.Get(typeof(CoreAnnotations.CharAnnotation)); string charp = p.Get(typeof(CoreAnnotations.CharAnnotation)); string charp2 = p2.Get(typeof(CoreAnnotations.CharAnnotation)); // Default feature set...a 5 character window // plus a few other language-independent features features.Add(charc + "-c"); features.Add(charn + "-n1"); features.Add(charn2 + "-n2"); features.Add(charp + "-p"); features.Add(charp2 + "-p2"); // Length feature if (charc.Length > 1) { features.Add("length"); } // Character-level class features bool seenPunc = false; bool seenDigit = false; for (int i = 0; i < limit; ++i) { char charcC = charc[i]; seenPunc = seenPunc || Characters.IsPunctuation(charcC); seenDigit = seenDigit || char.IsDigit(charcC); string cuBlock = Characters.UnicodeBlockStringOf(charcC); features.Add(cuBlock + "-uBlock"); string cuType = char.GetType(charcC).ToString(); features.Add(cuType + "-uType"); } if (seenPunc) { features.Add("haspunc"); } if (seenDigit) { features.Add("hasdigit"); } // Token-level features string word = c.Word(); int index = c.Index(); features.Add(Math.Min(MaxBefore, index) + "-before"); features.Add(Math.Min(MaxAfter, word.Length - charc.Length - index) + "-after"); features.Add(Math.Min(MaxLength, word.Length) + "-length"); // Indicator transition feature features.Add("cliqueC"); return(features); }
/// <summary>Returns a 0-based index of the right leaf of the tree.</summary> /// <remarks>Returns a 0-based index of the right leaf of the tree. Assumes the leaves had been indexed from 1</remarks> internal static int RightIndex(Tree tree) { if (tree.IsLeaf()) { CoreLabel label = ErasureUtils.UncheckedCast(tree.Label()); return(label.Index() - 1); } return(RightIndex(tree.Children()[tree.Children().Length - 1])); }
/// <summary>Returns a 0-based index of the left leaf of the tree.</summary> /// <remarks>Returns a 0-based index of the left leaf of the tree. Assumes the leaves had been indexed from 1</remarks> internal static int LeftIndex(Tree tree) { if (tree.IsLeaf()) { CoreLabel label = ErasureUtils.UncheckedCast(tree.Label()); return(label.Index() - 1); } return(LeftIndex(tree.Children()[0])); }
/// <summary> /// Returns the indices of all imperative verbs in the /// tree t. /// </summary> private static ICollection <int> GetImperatives(Tree t) { ICollection <int> imps = new HashSet <int>(); TregexMatcher matcher = ImperativePattern.Matcher(t); while (matcher.Find()) { IList <ILabel> verbs = matcher.GetMatch().Yield(); CoreLabel cl = (CoreLabel)verbs[0]; imps.Add(cl.Index()); } return(imps); }
protected internal override ICollection <string> FeaturesCpC(PaddedList <IN> cInfo, int loc) { ICollection <string> features = base.FeaturesCpC(cInfo, loc); CoreLabel c = cInfo[loc]; // "Wrapper" feature: identity of first and last two chars of the current word. // This helps detect ma+_+sh in dialect, as well as avoiding segmenting possessive // pronouns if the word starts with al-. if (c.Word().Length > 3) { string start = Sharpen.Runtime.Substring(c.Word(), 0, 2); string end = Sharpen.Runtime.Substring(c.Word(), c.Word().Length - 2); if (c.Index() == 2) { features.Add(start + "_" + end + "-begin-wrap"); } if (c.Index() == c.Word().Length - 1) { features.Add(start + "_" + end + "-end-wrap"); } } return(features); }
/// <summary> /// Get the index for the current node. /// </summary> public int Index() { return(_label.Index()); }
/// <summary>Get the index for the current node.</summary> public virtual int Index() { return(label.Index()); }