Exemplo n.º 1
0
        /// <summary>
        /// Returns the sentence as a string, based on the original text and spacing
        /// prior to tokenization.
        /// </summary>
        /// <remarks>
        /// Returns the sentence as a string, based on the original text and spacing
        /// prior to tokenization.
        /// This method assumes that this extra information has been encoded in CoreLabel
        /// objects for each token of the sentence, which do have the original spacing
        /// preserved (done with "invertible=true" for PTBTokenizer). If that information
        /// is not there, you will see null outputs, and if you do not pass in a List
        /// of CoreLabel objects, then the code will Exception.
        /// The method has loose typing for easier inter-operation
        /// with old code that still works with a
        /// <c>List&lt;HasWord&gt;</c>
        /// .
        /// </remarks>
        /// <param name="list">The sentence (List of tokens) to print out</param>
        /// <param name="printBeforeBeforeStart">
        /// Whether to print the BeforeAnnotation before the first token
        /// of the sentence. (In general, the BeforeAnnotation is the same
        /// as the AfterAnnotation of the preceding token. So, usually this
        /// is correct to do only for the first sentence of a text.)
        /// </param>
        /// <returns>The original sentence String, which may contain newlines or other artifacts of spacing</returns>
        public static string ListToOriginalTextString <T>(IList <T> list, bool printBeforeBeforeStart)
            where T : IHasWord
        {
            if (list == null)
            {
                return(null);
            }
            StringBuilder s = new StringBuilder();

            foreach (IHasWord word in list)
            {
                CoreLabel cl = (CoreLabel)word;
                if (printBeforeBeforeStart)
                {
                    // Only print Before for first token, since otherwise same as After of previous token
                    // BUG: if you print a sequence of sentences, you double up between sentence spacing.
                    if (cl.Get(typeof(CoreAnnotations.BeforeAnnotation)) != null)
                    {
                        s.Append(cl.Get(typeof(CoreAnnotations.BeforeAnnotation)));
                    }
                    printBeforeBeforeStart = false;
                }
                s.Append(cl.Get(typeof(CoreAnnotations.OriginalTextAnnotation)));
                if (cl.Get(typeof(CoreAnnotations.AfterAnnotation)) != null)
                {
                    s.Append(cl.Get(typeof(CoreAnnotations.AfterAnnotation)));
                }
                else
                {
                    s.Append(' ');
                }
            }
            return(s.ToString());
        }
Exemplo n.º 2
0
 public virtual VALUE Get <Value>(Type key)
 {
     return(label.Get(key));
 }