public override object Aggregate <_T0>(Type key, IList <_T0> @in)
            {
                if (@in == null)
                {
                    return(null);
                }
                string text = ChunkAnnotationUtils.GetTokenText(@in, key);

                return(text);
            }
        public virtual void AnnotateGroup(int group, IDictionary <string, string> attributes)
        {
            int groupStart = Start(group);

            if (groupStart >= 0)
            {
                int groupEnd = End(group);
                ChunkAnnotationUtils.AnnotateChunks(elements, groupStart, groupEnd, attributes);
            }
        }
Пример #3
0
        public virtual ICoreMap Merge <_T0>(IList <_T0> @in, int start, int end)
            where _T0 : ICoreMap
        {
            ICoreMap merged = ChunkAnnotationUtils.GetMergedChunk(@in, start, end, aggregators, tokenFactory);

            if (mergedKey != null)
            {
                merged.Set(mergedKey, new List <_T2094911265>(@in.SubList(start, end)));
            }
            return(merged);
        }
Пример #4
0
        /// <summary>Find and annotate chunks.</summary>
        /// <remarks>
        /// Find and annotate chunks.  Returns list of CoreMap (Annotation) objects
        /// each representing a chunk with the following annotations set:
        /// CharacterOffsetBeginAnnotation - set to CharacterOffsetBeginAnnotation of first token in chunk
        /// CharacterOffsetEndAnnotation - set to CharacterOffsetEndAnnotation of last token in chunk
        /// TokensAnnotation - List of tokens in this chunk
        /// TokenBeginAnnotation - Index of first token in chunk (index in original list of tokens)
        /// TokenEndAnnotation - Index of last token in chunk (index in original list of tokens)
        /// TextAnnotation - String representing tokens in this chunks (token text separated by space)
        /// </remarks>
        /// <param name="tokens">- List of tokens to look for chunks</param>
        /// <param name="totalTokensOffset">- Index of tokens to offset by</param>
        /// <param name="labelKey">- Key to use to find the token label (to determine if inside chunk or not)</param>
        /// <param name="textKey">- Key to use to find the token text</param>
        /// <param name="tokenChunkKey">- If not null, each token is annotated with the chunk using this key</param>
        /// <param name="tokenLabelKey">- If not null, each token is annotated with the text associated with the chunk using this key</param>
        /// <param name="checkTokensCompatible">- If not null, additional check to see if this token and the previous are compatible</param>
        /// <returns>List of annotations (each as a CoreMap) representing the chunks of tokens</returns>
        public virtual IList <ICoreMap> GetAnnotatedChunks(IList <CoreLabel> tokens, int totalTokensOffset, Type textKey, Type labelKey, Type tokenChunkKey, Type tokenLabelKey, IPredicate <Pair <CoreLabel, CoreLabel> > checkTokensCompatible)
        {
            IList <ICoreMap> chunks = new ArrayList();

            LabeledChunkIdentifier.LabelTagType prevTagType = null;
            int tokenBegin = -1;

            for (int i = 0; i < tokens.Count; i++)
            {
                CoreLabel token = tokens[i];
                string    label = (string)token.Get(labelKey);
                LabeledChunkIdentifier.LabelTagType curTagType = GetTagType(label);
                bool isCompatible = true;
                if (checkTokensCompatible != null)
                {
                    CoreLabel prev = null;
                    if (i > 0)
                    {
                        prev = tokens[i - 1];
                    }
                    Pair <CoreLabel, CoreLabel> p = Pair.MakePair(token, prev);
                    isCompatible = checkTokensCompatible.Test(p);
                }
                if (IsEndOfChunk(prevTagType, curTagType) || !isCompatible)
                {
                    int tokenEnd = i;
                    if (tokenBegin >= 0 && tokenEnd > tokenBegin)
                    {
                        ICoreMap chunk = ChunkAnnotationUtils.GetAnnotatedChunk(tokens, tokenBegin, tokenEnd, totalTokensOffset, tokenChunkKey, textKey, tokenLabelKey);
                        chunk.Set(labelKey, prevTagType.type);
                        chunks.Add(chunk);
                        tokenBegin = -1;
                    }
                }
                if (IsStartOfChunk(prevTagType, curTagType) || (!isCompatible && IsChunk(curTagType)))
                {
                    if (tokenBegin >= 0)
                    {
                        throw new Exception("New chunk started, prev chunk not ended yet!");
                    }
                    tokenBegin = i;
                }
                prevTagType = curTagType;
            }
            if (tokenBegin >= 0)
            {
                ICoreMap chunk = ChunkAnnotationUtils.GetAnnotatedChunk(tokens, tokenBegin, tokens.Count, totalTokensOffset, tokenChunkKey, textKey, tokenLabelKey);
                chunk.Set(labelKey, prevTagType.type);
                chunks.Add(chunk);
            }
            //    System.out.println("number of chunks " +  chunks.size());
            return(chunks);
        }
        private void AddAcronyms(Annotation ann)
        {
            // Find all the organizations in a document
            IList <ICoreMap> allMentionsSoFar = new List <ICoreMap>();

            foreach (ICoreMap sentence in ann.Get(typeof(CoreAnnotations.SentencesAnnotation)))
            {
                Sharpen.Collections.AddAll(allMentionsSoFar, sentence.Get(typeof(CoreAnnotations.MentionsAnnotation)));
            }
            IList <IList <CoreLabel> > organizations = new List <IList <CoreLabel> >();

            foreach (ICoreMap mention in allMentionsSoFar)
            {
                if ("ORGANIZATION".Equals(mention.Get(nerCoreAnnotationClass)))
                {
                    organizations.Add(mention.Get(typeof(CoreAnnotations.TokensAnnotation)));
                }
            }
            // Skip very long documents
            if (organizations.Count > 100)
            {
                return;
            }
            // Iterate over tokens...
            foreach (ICoreMap sentence_1 in ann.Get(typeof(CoreAnnotations.SentencesAnnotation)))
            {
                IList <ICoreMap>  sentenceMentions = new List <ICoreMap>();
                IList <CoreLabel> tokens           = sentence_1.Get(typeof(CoreAnnotations.TokensAnnotation));
                int totalTokensOffset = sentence_1.Get(typeof(CoreAnnotations.TokenBeginAnnotation));
                for (int i = 0; i < tokens.Count; ++i)
                {
                    // ... that look like they might be an acronym and are not already a mention
                    CoreLabel token = tokens[i];
                    if ("O".Equals(token.Ner()) && token.Word().ToUpper().Equals(token.Word()) && token.Word().Length >= 3)
                    {
                        foreach (IList <CoreLabel> org in organizations)
                        {
                            // ... and actually are an acronym
                            if (AcronymMatcher.IsAcronym(token.Word(), org))
                            {
                                // ... and add them.
                                // System.out.println("found ACRONYM ORG");
                                token.SetNER("ORGANIZATION");
                                ICoreMap chunk = ChunkAnnotationUtils.GetAnnotatedChunk(tokens, i, i + 1, totalTokensOffset, null, null, null);
                                chunk.Set(typeof(CoreAnnotations.NamedEntityTagAnnotation), "ORGANIZATION");
                                sentenceMentions.Add(chunk);
                            }
                        }
                    }
                }
            }
        }
        /// <summary>Annotate all the pronominal mentions in the document.</summary>
        /// <param name="ann">The document.</param>
        /// <returns>The list of pronominal mentions in the document.</returns>
        private static IList <ICoreMap> AnnotatePronominalMentions(Annotation ann)
        {
            IList <ICoreMap> pronouns  = new List <ICoreMap>();
            IList <ICoreMap> sentences = ann.Get(typeof(CoreAnnotations.SentencesAnnotation));

            for (int sentenceIndex = 0; sentenceIndex < sentences.Count; sentenceIndex++)
            {
                ICoreMap sentence       = sentences[sentenceIndex];
                int      annoTokenBegin = sentence.Get(typeof(CoreAnnotations.TokenBeginAnnotation));
                if (annoTokenBegin == null)
                {
                    annoTokenBegin = 0;
                }
                IList <CoreLabel> tokens = sentence.Get(typeof(CoreAnnotations.TokensAnnotation));
                for (int tokenIndex = 0; tokenIndex < tokens.Count; tokenIndex++)
                {
                    CoreLabel token = tokens[tokenIndex];
                    if (KbpIsPronominalMention(token))
                    {
                        ICoreMap pronoun = ChunkAnnotationUtils.GetAnnotatedChunk(tokens, tokenIndex, tokenIndex + 1, annoTokenBegin, null, typeof(CoreAnnotations.TextAnnotation), null);
                        pronoun.Set(typeof(CoreAnnotations.SentenceIndexAnnotation), sentenceIndex);
                        pronoun.Set(typeof(CoreAnnotations.NamedEntityTagAnnotation), KBPRelationExtractor.NERTag.Person.name);
                        pronoun.Set(typeof(CoreAnnotations.EntityTypeAnnotation), KBPRelationExtractor.NERTag.Person.name);
                        // set gender
                        string pronounGender = null;
                        if (pronoun.Get(typeof(CoreAnnotations.TextAnnotation)).ToLower().Equals("she"))
                        {
                            pronounGender = "FEMALE";
                            pronoun.Set(typeof(CoreAnnotations.GenderAnnotation), pronounGender);
                        }
                        else
                        {
                            if (pronoun.Get(typeof(CoreAnnotations.TextAnnotation)).ToLower().Equals("he"))
                            {
                                pronounGender = "MALE";
                                pronoun.Set(typeof(CoreAnnotations.GenderAnnotation), pronounGender);
                            }
                        }
                        if (pronounGender != null)
                        {
                            foreach (CoreLabel pronounToken in pronoun.Get(typeof(CoreAnnotations.TokensAnnotation)))
                            {
                                pronounToken.Set(typeof(CoreAnnotations.GenderAnnotation), pronounGender);
                            }
                        }
                        sentence.Get(typeof(CoreAnnotations.MentionsAnnotation)).Add(pronoun);
                        pronouns.Add(pronoun);
                    }
                }
            }
            return(pronouns);
        }
 public override ISequenceMatchResult <T> Apply(ISequenceMatchResult <T> matchResult, params int[] groups)
 {
     foreach (int group in groups)
     {
         int groupStart = matchResult.Start(group);
         if (groupStart >= 0)
         {
             int groupEnd = matchResult.End(group);
             ChunkAnnotationUtils.AnnotateChunks(matchResult.Elements(), groupStart, groupEnd, attributes);
         }
     }
     return(matchResult);
 }
Пример #8
0
        public virtual void TestMergeChunks()
        {
            // Create 4 sentences
            string     text       = "I have created sentence1.  And then sentence2.  Now sentence3. Finally sentence4.";
            IAnnotator tokenizer  = new TokenizerAnnotator("en");
            IAnnotator ssplit     = new WordsToSentencesAnnotator();
            Annotation annotation = new Annotation(text);

            tokenizer.Annotate(annotation);
            ssplit.Annotate(annotation);
            // Get sentences
            IList <ICoreMap> sentences = annotation.Get(typeof(CoreAnnotations.SentencesAnnotation));

            NUnit.Framework.Assert.AreEqual("4 sentence expected", 4, sentences.Count);
            // Merge last 3 into one
            ChunkAnnotationUtils.MergeChunks(sentences, text, 1, 4);
            NUnit.Framework.Assert.AreEqual("2 sentence expected", 2, sentences.Count);
        }
        private bool ExtractAnnotation(ICoreMap sourceAnnotation, CoreMapAggregator aggregator)
        {
            Type tokensAnnotationKey = extractFunc.tokensAnnotationField;

            if (chunkOffsets != null)
            {
                annotation = aggregator.Merge((IList <ICoreMap>)sourceAnnotation.Get(tokensAnnotationKey), chunkOffsets.GetBegin(), chunkOffsets.GetEnd());
                if (sourceAnnotation.ContainsKey(typeof(CoreAnnotations.TextAnnotation)))
                {
                    ChunkAnnotationUtils.AnnotateChunkText(annotation, sourceAnnotation);
                }
                if (tokenOffsets != null)
                {
                    if (annotation.Get(typeof(CoreAnnotations.TokenBeginAnnotation)) == null)
                    {
                        annotation.Set(typeof(CoreAnnotations.TokenBeginAnnotation), tokenOffsets.GetBegin());
                    }
                    if (annotation.Get(typeof(CoreAnnotations.TokenEndAnnotation)) == null)
                    {
                        annotation.Set(typeof(CoreAnnotations.TokenEndAnnotation), tokenOffsets.GetEnd());
                    }
                }
                charOffsets  = Interval.ToInterval(annotation.Get(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation)), annotation.Get(typeof(CoreAnnotations.CharacterOffsetEndAnnotation)));
                tokenOffsets = Interval.ToInterval(annotation.Get(typeof(CoreAnnotations.TokenBeginAnnotation)), annotation.Get(typeof(CoreAnnotations.TokenEndAnnotation)), Interval.IntervalOpenEnd);
            }
            else
            {
                int baseCharOffset = sourceAnnotation.Get(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation));
                if (baseCharOffset == null)
                {
                    baseCharOffset = 0;
                }
                chunkOffsets = ChunkAnnotationUtils.GetChunkOffsetsUsingCharOffsets((IList <ICoreMap>)sourceAnnotation.Get(tokensAnnotationKey), charOffsets.GetBegin() + baseCharOffset, charOffsets.GetEnd() + baseCharOffset);
                ICoreMap annotation2 = aggregator.Merge((IList <ICoreMap>)sourceAnnotation.Get(tokensAnnotationKey), chunkOffsets.GetBegin(), chunkOffsets.GetEnd());
                annotation   = ChunkAnnotationUtils.GetAnnotatedChunkUsingCharOffsets(sourceAnnotation, charOffsets.GetBegin(), charOffsets.GetEnd());
                tokenOffsets = Interval.ToInterval(annotation.Get(typeof(CoreAnnotations.TokenBeginAnnotation)), annotation.Get(typeof(CoreAnnotations.TokenEndAnnotation)), Interval.IntervalOpenEnd);
                annotation.Set(tokensAnnotationKey, annotation2.Get(tokensAnnotationKey));
            }
            text = annotation.Get(typeof(CoreAnnotations.TextAnnotation));
            extractFunc.Annotate(this, (IList <ICoreMap>)annotation.Get(tokensAnnotationKey));
            return(true);
        }
Пример #10
0
        protected internal override ICollection <Interval <int> > Match <_T0>(IList <_T0> nodes, int start)
        {
            IList <Interval <int> > matched = new List <Interval <int> >();
            int minEnd = start + minNodes;
            int maxEnd = nodes.Count;

            if (maxNodes >= 0 && maxNodes + start < nodes.Count)
            {
                maxEnd = maxNodes + start;
            }
            for (int end = minEnd; end <= maxEnd; end++)
            {
                ICoreMap chunk = ChunkAnnotationUtils.GetMergedChunk(nodes, start, end, aggregators, null);
                if (nodePattern.Match(chunk))
                {
                    matched.Add(Interval.ToInterval(start, end));
                }
            }
            return(matched);
        }
        private IList <ICoreMap> ToCoreMaps(ICoreMap annotation, IList <TimeExpression> timeExpressions, SUTime.TimeIndex timeIndex)
        {
            if (timeExpressions == null)
            {
                return(null);
            }
            IList <ICoreMap> coreMaps = new List <ICoreMap>(timeExpressions.Count);

            foreach (TimeExpression te in timeExpressions)
            {
                ICoreMap        cm       = te.GetAnnotation();
                SUTime.Temporal temporal = te.GetTemporal();
                if (temporal != null)
                {
                    string origText = annotation.Get(typeof(CoreAnnotations.TextAnnotation));
                    string text     = cm.Get(typeof(CoreAnnotations.TextAnnotation));
                    if (origText != null)
                    {
                        // Make sure the text is from original (and not from concatenated tokens)
                        ChunkAnnotationUtils.AnnotateChunkText(cm, annotation);
                        text = cm.Get(typeof(CoreAnnotations.TextAnnotation));
                    }
                    IDictionary <string, string> timexAttributes;
                    try
                    {
                        timexAttributes = temporal.GetTimexAttributes(timeIndex);
                        if (options.includeRange)
                        {
                            SUTime.Temporal rangeTemporal = temporal.GetRange();
                            if (rangeTemporal != null)
                            {
                                timexAttributes["range"] = rangeTemporal.ToString();
                            }
                        }
                    }
                    catch (Exception e)
                    {
                        if (options.verbose)
                        {
                            logger.Warn("Failed to get attributes from " + text + ", timeIndex " + timeIndex);
                            logger.Warn(e);
                        }
                        continue;
                    }
                    Timex timex;
                    try
                    {
                        timex = Timex.FromMap(text, timexAttributes);
                    }
                    catch (Exception e)
                    {
                        if (options.verbose)
                        {
                            logger.Warn("Failed to process timex " + text + " with attributes " + timexAttributes);
                            logger.Warn(e);
                        }
                        continue;
                    }
                    System.Diagnostics.Debug.Assert(timex != null);
                    // Timex.fromMap never returns null and if it exceptions, we've already done a continue
                    cm.Set(typeof(TimeAnnotations.TimexAnnotation), timex);
                    coreMaps.Add(cm);
                }
            }
            return(coreMaps);
        }
 public override IValue Apply(Env env, IList <IValue> @in)
 {
     if (@in.Count >= 1)
     {
         SUTime.Temporal temporal = null;
         object          t        = @in[0].Get();
         if (t is SUTime.Temporal)
         {
             temporal = (SUTime.Temporal)@in[0].Get();
         }
         else
         {
             if (t is TimeExpression)
             {
                 temporal = ((TimeExpression)t).GetTemporal();
             }
             else
             {
                 throw new ArgumentException("Type mismatch on arg0: Cannot apply " + this + " to " + @in);
             }
         }
         string quant = null;
         int    scale = 1;
         if (@in.Count >= 2 && @in[1] != null)
         {
             object arg1 = @in[1].Get();
             if (arg1 is string)
             {
                 quant = (string)arg1;
             }
             else
             {
                 if (arg1 is IList)
                 {
                     IList <ICoreMap> cms = (IList <ICoreMap>)arg1;
                     quant = ChunkAnnotationUtils.GetTokenText(cms, typeof(CoreAnnotations.TextAnnotation));
                     if (quant != null)
                     {
                         quant = quant.ToLower();
                     }
                 }
                 else
                 {
                     throw new ArgumentException("Type mismatch on arg1: Cannot apply " + this + " to " + @in);
                 }
             }
         }
         if (@in.Count >= 3 && @in[2] != null)
         {
             Number arg2 = (Number)@in[2].Get();
             if (arg2 != null)
             {
                 scale = arg2;
             }
         }
         SUTime.Duration period = temporal.GetPeriod();
         if (period != null && scale != 1)
         {
             period = period.MultiplyBy(scale);
         }
         return(new Expressions.PrimitiveValue("PeriodicTemporalSet", new SUTime.PeriodicTemporalSet(temporal, period, quant, null)));
     }
     else
     {
         throw new ArgumentException("Invalid number of arguments to " + this.name);
     }
 }
Пример #13
0
        /// <summary>
        /// If setCountLineNumbers is set to true, we count line numbers by
        /// telling the underlying splitter to return empty lists of tokens
        /// and then treating those empty lists as empty lines.
        /// </summary>
        /// <remarks>
        /// If setCountLineNumbers is set to true, we count line numbers by
        /// telling the underlying splitter to return empty lists of tokens
        /// and then treating those empty lists as empty lines.  We don't
        /// actually include empty sentences in the annotation, though.
        /// </remarks>
        public virtual void Annotate(Annotation annotation)
        {
            if (Verbose)
            {
                log.Info("Sentence splitting ... " + annotation);
            }
            if (!annotation.ContainsKey(typeof(CoreAnnotations.TokensAnnotation)))
            {
                throw new ArgumentException("WordsToSentencesAnnotator: unable to find words/tokens in: " + annotation);
            }
            // get text and tokens from the document
            string            text   = annotation.Get(typeof(CoreAnnotations.TextAnnotation));
            IList <CoreLabel> tokens = annotation.Get(typeof(CoreAnnotations.TokensAnnotation));

            if (Verbose)
            {
                log.Info("Tokens are: " + tokens);
            }
            string docID = annotation.Get(typeof(CoreAnnotations.DocIDAnnotation));
            // assemble the sentence annotations
            int lineNumber = 0;
            // section annotations to mark sentences with
            ICoreMap         sectionAnnotations = null;
            IList <ICoreMap> sentences          = new List <ICoreMap>();
            // keep track of current section to assign sentences to sections
            int currSectionIndex      = 0;
            IList <ICoreMap> sections = annotation.Get(typeof(CoreAnnotations.SectionsAnnotation));

            foreach (IList <CoreLabel> sentenceTokens in wts.Process(tokens))
            {
                if (countLineNumbers)
                {
                    ++lineNumber;
                }
                if (sentenceTokens.IsEmpty())
                {
                    if (!countLineNumbers)
                    {
                        throw new InvalidOperationException("unexpected empty sentence: " + sentenceTokens);
                    }
                    else
                    {
                        continue;
                    }
                }
                // get the sentence text from the first and last character offsets
                int    begin        = sentenceTokens[0].Get(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation));
                int    last         = sentenceTokens.Count - 1;
                int    end          = sentenceTokens[last].Get(typeof(CoreAnnotations.CharacterOffsetEndAnnotation));
                string sentenceText = Sharpen.Runtime.Substring(text, begin, end);
                // create a sentence annotation with text and token offsets
                Annotation sentence = new Annotation(sentenceText);
                sentence.Set(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation), begin);
                sentence.Set(typeof(CoreAnnotations.CharacterOffsetEndAnnotation), end);
                sentence.Set(typeof(CoreAnnotations.TokensAnnotation), sentenceTokens);
                sentence.Set(typeof(CoreAnnotations.SentenceIndexAnnotation), sentences.Count);
                if (countLineNumbers)
                {
                    sentence.Set(typeof(CoreAnnotations.LineNumberAnnotation), lineNumber);
                }
                // Annotate sentence with section information.
                // Assume section start and end appear as first and last tokens of sentence
                CoreLabel sentenceStartToken = sentenceTokens[0];
                CoreLabel sentenceEndToken   = sentenceTokens[sentenceTokens.Count - 1];
                ICoreMap  sectionStart       = sentenceStartToken.Get(typeof(CoreAnnotations.SectionStartAnnotation));
                if (sectionStart != null)
                {
                    // Section is started
                    sectionAnnotations = sectionStart;
                }
                if (sectionAnnotations != null)
                {
                    // transfer annotations over to sentence
                    ChunkAnnotationUtils.CopyUnsetAnnotations(sectionAnnotations, sentence);
                }
                string sectionEnd = sentenceEndToken.Get(typeof(CoreAnnotations.SectionEndAnnotation));
                if (sectionEnd != null)
                {
                    sectionAnnotations = null;
                }
                // determine section index for this sentence if keeping track of sections
                if (sections != null)
                {
                    // try to find a section that ends after this sentence ends, check if it encloses sentence
                    // if it doesn't, that means this sentence is in two sections
                    while (currSectionIndex < sections.Count)
                    {
                        int currSectionCharBegin = sections[currSectionIndex].Get(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation));
                        int currSectionCharEnd   = sections[currSectionIndex].Get(typeof(CoreAnnotations.CharacterOffsetEndAnnotation));
                        if (currSectionCharEnd < end)
                        {
                            currSectionIndex++;
                        }
                        else
                        {
                            // if the sentence falls in this current section, link it to this section
                            if (currSectionCharBegin <= begin)
                            {
                                // ... but first check if it's in one of this sections quotes!
                                // if so mark it as quoted
                                foreach (ICoreMap sectionQuote in sections[currSectionIndex].Get(typeof(CoreAnnotations.QuotesAnnotation)))
                                {
                                    if (sectionQuote.Get(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation)) <= begin && end <= sectionQuote.Get(typeof(CoreAnnotations.CharacterOffsetEndAnnotation)))
                                    {
                                        sentence.Set(typeof(CoreAnnotations.QuotedAnnotation), true);
                                        // set the author to the quote author
                                        sentence.Set(typeof(CoreAnnotations.AuthorAnnotation), sectionQuote.Get(typeof(CoreAnnotations.AuthorAnnotation)));
                                    }
                                }
                                // add the sentence to the section's sentence list
                                sections[currSectionIndex].Get(typeof(CoreAnnotations.SentencesAnnotation)).Add(sentence);
                                // set sentence's section date
                                string sectionDate = sections[currSectionIndex].Get(typeof(CoreAnnotations.SectionDateAnnotation));
                                sentence.Set(typeof(CoreAnnotations.SectionDateAnnotation), sectionDate);
                                // set sentence's section index
                                sentence.Set(typeof(CoreAnnotations.SectionIndexAnnotation), currSectionIndex);
                            }
                            break;
                        }
                    }
                }
                if (docID != null)
                {
                    sentence.Set(typeof(CoreAnnotations.DocIDAnnotation), docID);
                }
                int index = 1;
                foreach (CoreLabel token in sentenceTokens)
                {
                    token.SetIndex(index++);
                    token.SetSentIndex(sentences.Count);
                    if (docID != null)
                    {
                        token.SetDocID(docID);
                    }
                }
                // add the sentence to the list
                sentences.Add(sentence);
            }
            // after sentence splitting, remove newline tokens, set token and
            // sentence indexes, and update before and after text appropriately
            // at end of this annotator, it should be as though newline tokens
            // were never used
            // reset token indexes
            IList <CoreLabel> finalTokens = new List <CoreLabel>();
            int       tokenIndex          = 0;
            CoreLabel prevToken           = null;

            foreach (CoreLabel currToken in annotation.Get(typeof(CoreAnnotations.TokensAnnotation)))
            {
                if (!currToken.IsNewline())
                {
                    finalTokens.Add(currToken);
                    currToken.Set(typeof(CoreAnnotations.TokenBeginAnnotation), tokenIndex);
                    currToken.Set(typeof(CoreAnnotations.TokenEndAnnotation), tokenIndex + 1);
                    tokenIndex++;
                    // fix before text for this token
                    if (prevToken != null && prevToken.IsNewline())
                    {
                        string currTokenBeforeText = currToken.Get(typeof(CoreAnnotations.BeforeAnnotation));
                        string prevTokenText       = prevToken.Get(typeof(CoreAnnotations.OriginalTextAnnotation));
                        currToken.Set(typeof(CoreAnnotations.BeforeAnnotation), prevTokenText + currTokenBeforeText);
                    }
                }
                else
                {
                    string newlineText = currToken.Get(typeof(CoreAnnotations.OriginalTextAnnotation));
                    // fix after text for last token
                    if (prevToken != null)
                    {
                        string prevTokenAfterText = prevToken.Get(typeof(CoreAnnotations.AfterAnnotation));
                        prevToken.Set(typeof(CoreAnnotations.AfterAnnotation), prevTokenAfterText + newlineText);
                    }
                }
                prevToken = currToken;
            }
            annotation.Set(typeof(CoreAnnotations.TokensAnnotation), finalTokens);
            // set sentence token begin and token end values
            foreach (ICoreMap sentence_1 in sentences)
            {
                IList <CoreLabel> sentenceTokens_1 = sentence_1.Get(typeof(CoreAnnotations.TokensAnnotation));
                int sentenceTokenBegin             = sentenceTokens_1[0].Get(typeof(CoreAnnotations.TokenBeginAnnotation));
                int sentenceTokenEnd = sentenceTokens_1[sentenceTokens_1.Count - 1].Get(typeof(CoreAnnotations.TokenEndAnnotation));
                sentence_1.Set(typeof(CoreAnnotations.TokenBeginAnnotation), sentenceTokenBegin);
                sentence_1.Set(typeof(CoreAnnotations.TokenEndAnnotation), sentenceTokenEnd);
            }
            // add the sentences annotations to the document
            annotation.Set(typeof(CoreAnnotations.SentencesAnnotation), sentences);
        }