Ejemplo n.º 1
0
 /// <summary>Get a representative span for the relation expressed by this triple.</summary>
 /// <remarks>
 /// Get a representative span for the relation expressed by this triple.
 /// This is a bit more complicated than the subject and object spans, as the relation
 /// span is occasionally discontinuous.
 /// If this is the case, this method returns the largest contiguous chunk.
 /// If the relation span is empty, return the object span.
 /// </remarks>
 public virtual Pair <int, int> RelationTokenSpan()
 {
     if (relation.IsEmpty())
     {
         return(ObjectTokenSpan());
     }
     else
     {
         if (relation.Count == 1)
         {
             return(Pair.MakePair(relation[0].Index() - 1, relation[0].Index()));
         }
         else
         {
             // Variables to keep track of the longest chunk
             int longestChunk      = 0;
             int longestChunkStart = 0;
             int thisChunk         = 1;
             int thisChunkStart    = 0;
             // Find the longest chunk
             for (int i = 1; i < relation.Count; ++i)
             {
                 CoreLabel token     = relation[i];
                 CoreLabel lastToken = relation[i - 1];
                 if (lastToken.Index() + 1 == token.Index())
                 {
                     thisChunk += 1;
                 }
                 else
                 {
                     if (lastToken.Index() + 2 == token.Index())
                     {
                         thisChunk += 2;
                     }
                     else
                     {
                         // a skip of one character is _usually_ punctuation
                         if (thisChunk > longestChunk)
                         {
                             longestChunk      = thisChunk;
                             longestChunkStart = thisChunkStart;
                         }
                         thisChunkStart = i;
                         thisChunk      = 1;
                     }
                 }
             }
             // (subcase: the last chunk is the longest)
             if (thisChunk > longestChunk)
             {
                 longestChunk      = thisChunk;
                 longestChunkStart = thisChunkStart;
             }
             // Return the longest chunk
             return(Pair.MakePair(relation[longestChunkStart].Index() - 1, relation[longestChunkStart].Index() - 1 + longestChunk));
         }
     }
 }
Ejemplo n.º 2
0
        /// <summary>Returns a 0-based index of the head of the tree.</summary>
        /// <remarks>Returns a 0-based index of the head of the tree.  Assumes the leaves had been indexed from 1</remarks>
        internal static int HeadIndex(Tree tree)
        {
            CoreLabel label     = ErasureUtils.UncheckedCast(tree.Label());
            CoreLabel headLabel = label.Get(typeof(TreeCoreAnnotations.HeadWordLabelAnnotation));

            return(headLabel.Index() - 1);
        }
Ejemplo n.º 3
0
        protected internal virtual ICollection <string> FeaturesC(PaddedList <In> cInfo, int loc)
        {
            ICollection <string> features = new List <string>();
            CoreLabel            c        = cInfo[loc];
            CoreLabel            n        = cInfo[loc + 1];
            CoreLabel            n2       = cInfo[loc + 2];
            CoreLabel            p        = cInfo[loc - 1];
            CoreLabel            p2       = cInfo[loc - 2];
            string charc  = c.Get(typeof(CoreAnnotations.CharAnnotation));
            string charn  = n.Get(typeof(CoreAnnotations.CharAnnotation));
            string charn2 = n2.Get(typeof(CoreAnnotations.CharAnnotation));
            string charp  = p.Get(typeof(CoreAnnotations.CharAnnotation));
            string charp2 = p2.Get(typeof(CoreAnnotations.CharAnnotation));

            // Default feature set...a 5 character window
            // plus a few other language-independent features
            features.Add(charc + "-c");
            features.Add(charn + "-n1");
            features.Add(charn2 + "-n2");
            features.Add(charp + "-p");
            features.Add(charp2 + "-p2");
            // Length feature
            if (charc.Length > 1)
            {
                features.Add("length");
            }
            // Character-level class features
            bool seenPunc  = false;
            bool seenDigit = false;

            for (int i = 0; i < limit; ++i)
            {
                char charcC = charc[i];
                seenPunc  = seenPunc || Characters.IsPunctuation(charcC);
                seenDigit = seenDigit || char.IsDigit(charcC);
                string cuBlock = Characters.UnicodeBlockStringOf(charcC);
                features.Add(cuBlock + "-uBlock");
                string cuType = char.GetType(charcC).ToString();
                features.Add(cuType + "-uType");
            }
            if (seenPunc)
            {
                features.Add("haspunc");
            }
            if (seenDigit)
            {
                features.Add("hasdigit");
            }
            // Token-level features
            string word  = c.Word();
            int    index = c.Index();

            features.Add(Math.Min(MaxBefore, index) + "-before");
            features.Add(Math.Min(MaxAfter, word.Length - charc.Length - index) + "-after");
            features.Add(Math.Min(MaxLength, word.Length) + "-length");
            // Indicator transition feature
            features.Add("cliqueC");
            return(features);
        }
Ejemplo n.º 4
0
 /// <summary>Returns a 0-based index of the right leaf of the tree.</summary>
 /// <remarks>Returns a 0-based index of the right leaf of the tree.  Assumes the leaves had been indexed from 1</remarks>
 internal static int RightIndex(Tree tree)
 {
     if (tree.IsLeaf())
     {
         CoreLabel label = ErasureUtils.UncheckedCast(tree.Label());
         return(label.Index() - 1);
     }
     return(RightIndex(tree.Children()[tree.Children().Length - 1]));
 }
Ejemplo n.º 5
0
 /// <summary>Returns a 0-based index of the left leaf of the tree.</summary>
 /// <remarks>Returns a 0-based index of the left leaf of the tree.  Assumes the leaves had been indexed from 1</remarks>
 internal static int LeftIndex(Tree tree)
 {
     if (tree.IsLeaf())
     {
         CoreLabel label = ErasureUtils.UncheckedCast(tree.Label());
         return(label.Index() - 1);
     }
     return(LeftIndex(tree.Children()[0]));
 }
Ejemplo n.º 6
0
        /// <summary>
        /// Returns the indices of all imperative verbs in the
        /// tree t.
        /// </summary>
        private static ICollection <int> GetImperatives(Tree t)
        {
            ICollection <int> imps    = new HashSet <int>();
            TregexMatcher     matcher = ImperativePattern.Matcher(t);

            while (matcher.Find())
            {
                IList <ILabel> verbs = matcher.GetMatch().Yield();
                CoreLabel      cl    = (CoreLabel)verbs[0];
                imps.Add(cl.Index());
            }
            return(imps);
        }
        protected internal override ICollection <string> FeaturesCpC(PaddedList <IN> cInfo, int loc)
        {
            ICollection <string> features = base.FeaturesCpC(cInfo, loc);
            CoreLabel            c        = cInfo[loc];

            // "Wrapper" feature: identity of first and last two chars of the current word.
            // This helps detect ma+_+sh in dialect, as well as avoiding segmenting possessive
            // pronouns if the word starts with al-.
            if (c.Word().Length > 3)
            {
                string start = Sharpen.Runtime.Substring(c.Word(), 0, 2);
                string end   = Sharpen.Runtime.Substring(c.Word(), c.Word().Length - 2);
                if (c.Index() == 2)
                {
                    features.Add(start + "_" + end + "-begin-wrap");
                }
                if (c.Index() == c.Word().Length - 1)
                {
                    features.Add(start + "_" + end + "-end-wrap");
                }
            }
            return(features);
        }
Ejemplo n.º 8
0
 /// <summary>
 /// Get the index for the current node.
 /// </summary>
 public int Index()
 {
     return(_label.Index());
 }
 /// <summary>Get the index for the current node.</summary>
 public virtual int Index()
 {
     return(label.Index());
 }