C# (CSharp) CoreLabel.IsNewline примеры использования

Язык программирования: C# (CSharp)

Класс/Тип: CoreLabel

Метод/Функция: IsNewline

Примеров на hotexamples.com: 2

C# (CSharp) CoreLabel.IsNewline - 2 примера найдено. Это лучшие примеры C# (CSharp) кода для CoreLabel.IsNewline, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Set(30)

Word(30)

Get(30)

SetWord(27)

SetValue(27)

Lemma(15)

SetTag(13)

Tag(12)

ContainsKey(11)

GetString(10)

Index(9)

SetIndex(9)

Value(9)

Factory(9)

Remove(8)

SetNER(7)

BeginPosition(6)

SetLemma(6)

SetOriginalText(5)

ToString(4)

get(4)

SetBeginPosition(4)

OriginalText(4)

SetEndPosition(4)

KeySet(3)

ToShorterString(3)

value(2)

Ner(2)

IsNewline(2)

EndPosition(2)

toString(1)

set(1)

lemma(1)

index(1)

SetCategory(1)

endPosition(1)

beginPosition(1)

GetHashCode(1)

Size(1)

LabelFactory(1)

Category(1)

word(1)

Пример #1

Показать файл

        //attribute conversational mentions: assign the mention to the same quote as the
        //if quote X has not been labelled, has no add'l text, and quote X-2 has been labelled, and quotes X-2, X-1, and X are consecutive in paragraph,
        //and X-1's quote does not refer to a name:
        //give quote X the same mention as X-2.
        public override void DoQuoteToMention(Annotation doc)
        {
            IList <ICoreMap>  quotes    = doc.Get(typeof(CoreAnnotations.QuotationsAnnotation));
            IList <CoreLabel> tokens    = doc.Get(typeof(CoreAnnotations.TokensAnnotation));
            IList <ICoreMap>  sentences = doc.Get(typeof(CoreAnnotations.SentencesAnnotation));

            for (int index = 2; index < quotes.Count; index++)
            {
                ICoreMap currQuote    = quotes[index];
                ICoreMap prevQuote    = quotes[index - 1];
                ICoreMap twoPrevQuote = quotes[index - 2];
                int      twoPrevPara  = GetQuoteParagraph(twoPrevQuote);
                //default to first in quote that begins n-2
                for (int i = index - 3; i >= 0; i--)
                {
                    if (GetQuoteParagraph(quotes[i]) == twoPrevPara)
                    {
                        twoPrevQuote = quotes[i];
                    }
                    else
                    {
                        break;
                    }
                }
                int      tokenBeginIdx          = currQuote.Get(typeof(CoreAnnotations.TokenBeginAnnotation));
                int      tokenEndIdx            = currQuote.Get(typeof(CoreAnnotations.TokenEndAnnotation));
                ICoreMap currQuoteBeginSentence = sentences[currQuote.Get(typeof(CoreAnnotations.SentenceBeginAnnotation))];
                bool     isAloneInParagraph     = true;
                if (tokenBeginIdx > 0)
                {
                    CoreLabel prevToken    = tokens[tokenBeginIdx - 1];
                    ICoreMap  prevSentence = sentences[prevToken.Get(typeof(CoreAnnotations.SentenceIndexAnnotation))];
                    if (prevSentence.Get(typeof(CoreAnnotations.ParagraphIndexAnnotation)).Equals(currQuoteBeginSentence.Get(typeof(CoreAnnotations.ParagraphIndexAnnotation))))
                    {
                        isAloneInParagraph = false;
                    }
                }
                if (tokenEndIdx < tokens.Count - 1)
                {
                    // if the next token is *NL*, it won't be in a sentence (if newlines have been tokenized)
                    // so advance to the next non *NL* toke
                    CoreLabel currToken = tokens[tokenEndIdx + 1];
                    while (currToken.IsNewline() && tokenEndIdx + 1 < tokens.Count - 1)
                    {
                        tokenEndIdx++;
                        currToken = tokens[tokenEndIdx + 1];
                    }
                    if (!currToken.IsNewline())
                    {
                        ICoreMap nextSentence = sentences[currToken.Get(typeof(CoreAnnotations.SentenceIndexAnnotation))];
                        if (nextSentence.Get(typeof(CoreAnnotations.ParagraphIndexAnnotation)).Equals(currQuoteBeginSentence.Get(typeof(CoreAnnotations.ParagraphIndexAnnotation))))
                        {
                            isAloneInParagraph = false;
                        }
                    }
                }
                if (twoPrevQuote.Get(typeof(QuoteAttributionAnnotator.MentionAnnotation)) == null || !isAloneInParagraph || currQuote.Get(typeof(QuoteAttributionAnnotator.MentionAnnotation)) != null || twoPrevQuote.Get(typeof(QuoteAttributionAnnotator.MentionTypeAnnotation
                                                                                                                                                                                                                                  )).Equals(Sieve.Pronoun))
                {
                    continue;
                }
                if (GetQuoteParagraph(currQuote) == GetQuoteParagraph(prevQuote) + 1 && GetQuoteParagraph(prevQuote) == GetQuoteParagraph(twoPrevQuote) + 1)
                {
                    FillInMention(currQuote, GetMentionData(twoPrevQuote), sieveName);
                }
            }
        }

Пример #2

Показать файл

        /// <summary>
        /// If setCountLineNumbers is set to true, we count line numbers by
        /// telling the underlying splitter to return empty lists of tokens
        /// and then treating those empty lists as empty lines.
        /// </summary>
        /// <remarks>
        /// If setCountLineNumbers is set to true, we count line numbers by
        /// telling the underlying splitter to return empty lists of tokens
        /// and then treating those empty lists as empty lines.  We don't
        /// actually include empty sentences in the annotation, though.
        /// </remarks>
        public virtual void Annotate(Annotation annotation)
        {
            if (Verbose)
            {
                log.Info("Sentence splitting ... " + annotation);
            }
            if (!annotation.ContainsKey(typeof(CoreAnnotations.TokensAnnotation)))
            {
                throw new ArgumentException("WordsToSentencesAnnotator: unable to find words/tokens in: " + annotation);
            }
            // get text and tokens from the document
            string            text   = annotation.Get(typeof(CoreAnnotations.TextAnnotation));
            IList <CoreLabel> tokens = annotation.Get(typeof(CoreAnnotations.TokensAnnotation));

            if (Verbose)
            {
                log.Info("Tokens are: " + tokens);
            }
            string docID = annotation.Get(typeof(CoreAnnotations.DocIDAnnotation));
            // assemble the sentence annotations
            int lineNumber = 0;
            // section annotations to mark sentences with
            ICoreMap         sectionAnnotations = null;
            IList <ICoreMap> sentences          = new List <ICoreMap>();
            // keep track of current section to assign sentences to sections
            int currSectionIndex      = 0;
            IList <ICoreMap> sections = annotation.Get(typeof(CoreAnnotations.SectionsAnnotation));

            foreach (IList <CoreLabel> sentenceTokens in wts.Process(tokens))
            {
                if (countLineNumbers)
                {
                    ++lineNumber;
                }
                if (sentenceTokens.IsEmpty())
                {
                    if (!countLineNumbers)
                    {
                        throw new InvalidOperationException("unexpected empty sentence: " + sentenceTokens);
                    }
                    else
                    {
                        continue;
                    }
                }
                // get the sentence text from the first and last character offsets
                int    begin        = sentenceTokens[0].Get(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation));
                int    last         = sentenceTokens.Count - 1;
                int    end          = sentenceTokens[last].Get(typeof(CoreAnnotations.CharacterOffsetEndAnnotation));
                string sentenceText = Sharpen.Runtime.Substring(text, begin, end);
                // create a sentence annotation with text and token offsets
                Annotation sentence = new Annotation(sentenceText);
                sentence.Set(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation), begin);
                sentence.Set(typeof(CoreAnnotations.CharacterOffsetEndAnnotation), end);
                sentence.Set(typeof(CoreAnnotations.TokensAnnotation), sentenceTokens);
                sentence.Set(typeof(CoreAnnotations.SentenceIndexAnnotation), sentences.Count);
                if (countLineNumbers)
                {
                    sentence.Set(typeof(CoreAnnotations.LineNumberAnnotation), lineNumber);
                }
                // Annotate sentence with section information.
                // Assume section start and end appear as first and last tokens of sentence
                CoreLabel sentenceStartToken = sentenceTokens[0];
                CoreLabel sentenceEndToken   = sentenceTokens[sentenceTokens.Count - 1];
                ICoreMap  sectionStart       = sentenceStartToken.Get(typeof(CoreAnnotations.SectionStartAnnotation));
                if (sectionStart != null)
                {
                    // Section is started
                    sectionAnnotations = sectionStart;
                }
                if (sectionAnnotations != null)
                {
                    // transfer annotations over to sentence
                    ChunkAnnotationUtils.CopyUnsetAnnotations(sectionAnnotations, sentence);
                }
                string sectionEnd = sentenceEndToken.Get(typeof(CoreAnnotations.SectionEndAnnotation));
                if (sectionEnd != null)
                {
                    sectionAnnotations = null;
                }
                // determine section index for this sentence if keeping track of sections
                if (sections != null)
                {
                    // try to find a section that ends after this sentence ends, check if it encloses sentence
                    // if it doesn't, that means this sentence is in two sections
                    while (currSectionIndex < sections.Count)
                    {
                        int currSectionCharBegin = sections[currSectionIndex].Get(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation));
                        int currSectionCharEnd   = sections[currSectionIndex].Get(typeof(CoreAnnotations.CharacterOffsetEndAnnotation));
                        if (currSectionCharEnd < end)
                        {
                            currSectionIndex++;
                        }
                        else
                        {
                            // if the sentence falls in this current section, link it to this section
                            if (currSectionCharBegin <= begin)
                            {
                                // ... but first check if it's in one of this sections quotes!
                                // if so mark it as quoted
                                foreach (ICoreMap sectionQuote in sections[currSectionIndex].Get(typeof(CoreAnnotations.QuotesAnnotation)))
                                {
                                    if (sectionQuote.Get(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation)) <= begin && end <= sectionQuote.Get(typeof(CoreAnnotations.CharacterOffsetEndAnnotation)))
                                    {
                                        sentence.Set(typeof(CoreAnnotations.QuotedAnnotation), true);
                                        // set the author to the quote author
                                        sentence.Set(typeof(CoreAnnotations.AuthorAnnotation), sectionQuote.Get(typeof(CoreAnnotations.AuthorAnnotation)));
                                    }
                                }
                                // add the sentence to the section's sentence list
                                sections[currSectionIndex].Get(typeof(CoreAnnotations.SentencesAnnotation)).Add(sentence);
                                // set sentence's section date
                                string sectionDate = sections[currSectionIndex].Get(typeof(CoreAnnotations.SectionDateAnnotation));
                                sentence.Set(typeof(CoreAnnotations.SectionDateAnnotation), sectionDate);
                                // set sentence's section index
                                sentence.Set(typeof(CoreAnnotations.SectionIndexAnnotation), currSectionIndex);
                            }
                            break;
                        }
                    }
                }
                if (docID != null)
                {
                    sentence.Set(typeof(CoreAnnotations.DocIDAnnotation), docID);
                }
                int index = 1;
                foreach (CoreLabel token in sentenceTokens)
                {
                    token.SetIndex(index++);
                    token.SetSentIndex(sentences.Count);
                    if (docID != null)
                    {
                        token.SetDocID(docID);
                    }
                }
                // add the sentence to the list
                sentences.Add(sentence);
            }
            // after sentence splitting, remove newline tokens, set token and
            // sentence indexes, and update before and after text appropriately
            // at end of this annotator, it should be as though newline tokens
            // were never used
            // reset token indexes
            IList <CoreLabel> finalTokens = new List <CoreLabel>();
            int       tokenIndex          = 0;
            CoreLabel prevToken           = null;

            foreach (CoreLabel currToken in annotation.Get(typeof(CoreAnnotations.TokensAnnotation)))
            {
                if (!currToken.IsNewline())
                {
                    finalTokens.Add(currToken);
                    currToken.Set(typeof(CoreAnnotations.TokenBeginAnnotation), tokenIndex);
                    currToken.Set(typeof(CoreAnnotations.TokenEndAnnotation), tokenIndex + 1);
                    tokenIndex++;
                    // fix before text for this token
                    if (prevToken != null && prevToken.IsNewline())
                    {
                        string currTokenBeforeText = currToken.Get(typeof(CoreAnnotations.BeforeAnnotation));
                        string prevTokenText       = prevToken.Get(typeof(CoreAnnotations.OriginalTextAnnotation));
                        currToken.Set(typeof(CoreAnnotations.BeforeAnnotation), prevTokenText + currTokenBeforeText);
                    }
                }
                else
                {
                    string newlineText = currToken.Get(typeof(CoreAnnotations.OriginalTextAnnotation));
                    // fix after text for last token
                    if (prevToken != null)
                    {
                        string prevTokenAfterText = prevToken.Get(typeof(CoreAnnotations.AfterAnnotation));
                        prevToken.Set(typeof(CoreAnnotations.AfterAnnotation), prevTokenAfterText + newlineText);
                    }
                }
                prevToken = currToken;
            }
            annotation.Set(typeof(CoreAnnotations.TokensAnnotation), finalTokens);
            // set sentence token begin and token end values
            foreach (ICoreMap sentence_1 in sentences)
            {
                IList <CoreLabel> sentenceTokens_1 = sentence_1.Get(typeof(CoreAnnotations.TokensAnnotation));
                int sentenceTokenBegin             = sentenceTokens_1[0].Get(typeof(CoreAnnotations.TokenBeginAnnotation));
                int sentenceTokenEnd = sentenceTokens_1[sentenceTokens_1.Count - 1].Get(typeof(CoreAnnotations.TokenEndAnnotation));
                sentence_1.Set(typeof(CoreAnnotations.TokenBeginAnnotation), sentenceTokenBegin);
                sentence_1.Set(typeof(CoreAnnotations.TokenEndAnnotation), sentenceTokenEnd);
            }
            // add the sentences annotations to the document
            annotation.Set(typeof(CoreAnnotations.SentencesAnnotation), sentences);
        }