Esempi in C# (CSharp) per MultiTokenTag

Linguaggio di programmazione: C# (CSharp)

Classe/tipologia: MultiTokenTag

Esempi su hotexamples.com: 3

MultiTokenTag in C# (CSharp): 3 esempi trovati. Questi sono i migliori esempi reali in C# (CSharp) per MultiTokenTag, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

IsEnd(2)

IsStart(1)

Esempio n. 1

Mostra file

        protected internal static void ExtractPremarkedEntityMentions(ICoreMap s, IList <Mention> mentions, ICollection <IntPair> mentionSpanSet, ICollection <IntPair> namedEntitySpanSet)
        {
            IList <CoreLabel> sent               = s.Get(typeof(CoreAnnotations.TokensAnnotation));
            SemanticGraph     basicDependency    = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
            SemanticGraph     enhancedDependency = s.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation));

            if (enhancedDependency == null)
            {
                enhancedDependency = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
            }
            int beginIndex = -1;

            foreach (CoreLabel w in sent)
            {
                MultiTokenTag t = w.Get(typeof(CoreAnnotations.MentionTokenAnnotation));
                if (t != null)
                {
                    // Part of a mention
                    if (t.IsStart())
                    {
                        // Start of mention
                        beginIndex = w.Get(typeof(CoreAnnotations.IndexAnnotation)) - 1;
                    }
                    if (t.IsEnd())
                    {
                        // end of mention
                        int endIndex = w.Get(typeof(CoreAnnotations.IndexAnnotation));
                        if (beginIndex >= 0)
                        {
                            IntPair mSpan          = new IntPair(beginIndex, endIndex);
                            int     dummyMentionId = -1;
                            Mention m = new Mention(dummyMentionId, beginIndex, endIndex, sent, basicDependency, enhancedDependency, new List <CoreLabel>(sent.SubList(beginIndex, endIndex)));
                            mentions.Add(m);
                            mentionSpanSet.Add(mSpan);
                            beginIndex = -1;
                        }
                        else
                        {
                            Redwood.Log("Start of marked mention not found in sentence: " + t + " at tokenIndex=" + (w.Get(typeof(CoreAnnotations.IndexAnnotation)) - 1) + " for " + s.Get(typeof(CoreAnnotations.TextAnnotation)));
                        }
                    }
                }
            }
        }

Esempio n. 2

Mostra file

File: MultiTokenTag.cs Progetto: zerouid/Stanford.CoreNLP.NET

        public override bool Equals(object o)
        {
            if (this == o)
            {
                return(true);
            }
            if (o == null || GetType() != o.GetType())
            {
                return(false);
            }
            MultiTokenTag that = (MultiTokenTag)o;

            if (index != that.index)
            {
                return(false);
            }
            if (!tag.Equals(that.tag))
            {
                return(false);
            }
            return(true);
        }

Esempio n. 3

Mostra file

File: WordToSentenceProcessor.cs Progetto: zerouid/Stanford.CoreNLP.NET

        /// <summary>
        /// Returns a List of Lists where each element is built from a run
        /// of Words in the input Document.
        /// </summary>
        /// <remarks>
        /// Returns a List of Lists where each element is built from a run
        /// of Words in the input Document. Specifically, reads through each word in
        /// the input document and breaks off a sentence after finding a valid
        /// sentence boundary token or end of file.
        /// Note that for this to work, the words in the
        /// input document must have been tokenized with a tokenizer that makes
        /// sentence boundary tokens their own tokens (e.g.,
        /// <see cref="PTBTokenizer{T}"/>
        /// ).
        /// </remarks>
        /// <param name="words">A list of already tokenized words (must implement HasWord or be a String).</param>
        /// <returns>A list of sentences.</returns>
        /// <seealso cref="WordToSentenceProcessor{IN}.WordToSentenceProcessor(string, string, Java.Util.ISet{E}, Java.Util.ISet{E}, string, NewlineIsSentenceBreak, Edu.Stanford.Nlp.Ling.Tokensregex.SequencePattern{T}, Java.Util.ISet{E}, bool, bool)"/>
        private IList <IList <In> > WordsToSentences <_T0>(IList <_T0> words)
            where _T0 : IN
        {
            IdentityHashMap <object, bool> isSentenceBoundary = null;

            // is null unless used by sentenceBoundaryMultiTokenPattern
            if (sentenceBoundaryMultiTokenPattern != null)
            {
                // Do initial pass using TokensRegex to identify multi token patterns that need to be matched
                // and add the last token of a match to our table of sentence boundary tokens.
                isSentenceBoundary = new IdentityHashMap <object, bool>();
                SequenceMatcher <In> matcher = sentenceBoundaryMultiTokenPattern.GetMatcher(words);
                while (matcher.Find())
                {
                    IList <In> nodes = matcher.GroupNodes();
                    if (nodes != null && !nodes.IsEmpty())
                    {
                        isSentenceBoundary[nodes[nodes.Count - 1]] = true;
                    }
                }
            }
            // Split tokens into sentences!!!
            IList <IList <In> > sentences       = Generics.NewArrayList();
            IList <In>          currentSentence = new List <In>();
            IList <In>          lastSentence    = null;
            bool insideRegion          = false;
            bool inWaitForForcedEnd    = false;
            bool lastTokenWasNewline   = false;
            bool lastSentenceEndForced = false;

            foreach (IN o in words)
            {
                string word      = GetString(o);
                bool   forcedEnd = IsForcedEndToken(o);
                // if (DEBUG) { if (forcedEnd) { log.info("Word is " + word + "; marks forced end of sentence [cont.]"); } }
                bool inMultiTokenExpr = false;
                bool discardToken     = false;
                if (o is ICoreMap)
                {
                    // Hacky stuff to ensure sentence breaks do not happen in certain cases
                    ICoreMap cm = (ICoreMap)o;
                    if (!forcedEnd)
                    {
                        bool forcedUntilEndValue = cm.Get(typeof(CoreAnnotations.ForcedSentenceUntilEndAnnotation));
                        if (forcedUntilEndValue != null && forcedUntilEndValue)
                        {
                            // if (DEBUG) { log.info("Word is " + word + "; starting wait for forced end of sentence [cont.]"); }
                            inWaitForForcedEnd = true;
                        }
                        else
                        {
                            MultiTokenTag mt = cm.Get(typeof(CoreAnnotations.MentionTokenAnnotation));
                            if (mt != null && !mt.IsEnd())
                            {
                                // In the middle of a multi token mention, make sure sentence is not ended here
                                // if (DEBUG) { log.info("Word is " + word + "; inside multi-token mention [cont.]"); }
                                inMultiTokenExpr = true;
                            }
                        }
                    }
                }
                if (tokenPatternsToDiscard != null)
                {
                    discardToken = MatchesTokenPatternsToDiscard(word);
                }
                if (sentenceRegionBeginPattern != null && !insideRegion)
                {
                    if (sentenceRegionBeginPattern.Matcher(word).Matches())
                    {
                        insideRegion = true;
                    }
                    lastTokenWasNewline = false;
                    continue;
                }
                if (!lastSentenceEndForced && lastSentence != null && currentSentence.IsEmpty() && !lastTokenWasNewline && sentenceBoundaryFollowersPattern.Matcher(word).Matches())
                {
                    if (!discardToken)
                    {
                        lastSentence.Add(o);
                    }
                    lastTokenWasNewline = false;
                    continue;
                }
                bool   newSentForced = false;
                bool   newSent       = false;
                string debugText     = (discardToken) ? "discarded" : "added to current";
                if (inWaitForForcedEnd && !forcedEnd)
                {
                    if (sentenceBoundaryToDiscard.Contains(word))
                    {
                        // there can be newlines even in something to keep together
                        discardToken = true;
                    }
                    if (!discardToken)
                    {
                        currentSentence.Add(o);
                    }
                }
                else
                {
                    if (inMultiTokenExpr && !forcedEnd)
                    {
                        if (!discardToken)
                        {
                            currentSentence.Add(o);
                        }
                    }
                    else
                    {
                        if (sentenceBoundaryToDiscard.Contains(word))
                        {
                            if (forcedEnd)
                            {
                                // sentence boundary can easily be forced end
                                inWaitForForcedEnd = false;
                                newSentForced      = true;
                            }
                            else
                            {
                                if (newlineIsSentenceBreak == WordToSentenceProcessor.NewlineIsSentenceBreak.Always)
                                {
                                    newSentForced = true;
                                }
                                else
                                {
                                    if (newlineIsSentenceBreak == WordToSentenceProcessor.NewlineIsSentenceBreak.TwoConsecutive && lastTokenWasNewline)
                                    {
                                        newSentForced = true;
                                    }
                                }
                            }
                            lastTokenWasNewline = true;
                        }
                        else
                        {
                            lastTokenWasNewline = false;
                            bool isb;
                            if (xmlBreakElementsToDiscard != null && MatchesXmlBreakElementToDiscard(word))
                            {
                                newSentForced = true;
                            }
                            else
                            {
                                if (sentenceRegionEndPattern != null && sentenceRegionEndPattern.Matcher(word).Matches())
                                {
                                    insideRegion  = false;
                                    newSentForced = true;
                                }
                                else
                                {
                                    // Marked sentence boundaries
                                    if ((isSentenceBoundary != null) && ((isb = isSentenceBoundary[o]) != null) && isb)
                                    {
                                        if (!discardToken)
                                        {
                                            currentSentence.Add(o);
                                        }
                                        newSent = true;
                                    }
                                    else
                                    {
                                        if (sentenceBoundaryTokenPattern.Matcher(word).Matches())
                                        {
                                            if (!discardToken)
                                            {
                                                currentSentence.Add(o);
                                            }
                                            newSent = true;
                                        }
                                        else
                                        {
                                            if (forcedEnd)
                                            {
                                                if (!discardToken)
                                                {
                                                    currentSentence.Add(o);
                                                }
                                                inWaitForForcedEnd = false;
                                                newSentForced      = true;
                                            }
                                            else
                                            {
                                                if (!discardToken)
                                                {
                                                    currentSentence.Add(o);
                                                }
                                                // chris added this next test in 2017; a bit weird, but KBP setup doesn't have newline in sentenceBoundary patterns, just in toDiscard
                                                if (AbstractTokenizer.NewlineToken.Equals(word))
                                                {
                                                    lastTokenWasNewline = true;
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
                if ((newSentForced || newSent) && (!currentSentence.IsEmpty() || allowEmptySentences))
                {
                    sentences.Add(currentSentence);
                    // adds this sentence now that it's complete
                    lastSentenceEndForced = ((lastSentence == null || lastSentence.IsEmpty()) && lastSentenceEndForced) || newSentForced;
                    lastSentence          = currentSentence;
                    currentSentence       = new List <In>();
                }
                else
                {
                    // clears the current sentence
                    if (newSentForced)
                    {
                        lastSentenceEndForced = true;
                    }
                }
            }
            // add any words at the end, even if there isn't a sentence
            // terminator at the end of file
            if (!currentSentence.IsEmpty())
            {
                sentences.Add(currentSentence);
            }
            // adds last sentence
            return(sentences);
        }