C# (CSharp) CoreLabel.BeginPosition примеры использования

Язык программирования: C# (CSharp)

Класс/Тип: CoreLabel

Метод/Функция: BeginPosition

Примеров на hotexamples.com: 6

C# (CSharp) CoreLabel.BeginPosition - 6 примеров найдено. Это лучшие примеры C# (CSharp) кода для CoreLabel.BeginPosition, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Set(30)

Word(30)

Get(30)

SetWord(27)

SetValue(27)

Lemma(15)

SetTag(13)

Tag(12)

ContainsKey(11)

GetString(10)

Index(9)

SetIndex(9)

Value(9)

Factory(9)

Remove(8)

SetNER(7)

BeginPosition(6)

SetLemma(6)

SetOriginalText(5)

ToString(4)

get(4)

SetBeginPosition(4)

OriginalText(4)

SetEndPosition(4)

KeySet(3)

ToShorterString(3)

value(2)

Ner(2)

IsNewline(2)

EndPosition(2)

toString(1)

set(1)

lemma(1)

index(1)

SetCategory(1)

endPosition(1)

beginPosition(1)

GetHashCode(1)

Size(1)

LabelFactory(1)

Category(1)

word(1)

Пример #1

Показать файл

        /// <summary>
        /// Handles contractions like del and al, marked by the lexer
        /// del =&gt; de + l =&gt; de + el
        /// al =&gt; a + l =&gt; a + el
        /// con[mts]igo =&gt; con + [mts]i
        /// </summary>
        private CoreLabel ProcessContraction(CoreLabel cl)
        {
            cl.Remove(typeof(CoreAnnotations.ParentAnnotation));
            string word = cl.Word();
            string first;
            string second;
            int    secondOffset = 0;
            int    secondLength = 0;
            string lowered      = word.ToLower();

            switch (lowered)
            {
            case "del":
            case "al":
            {
                first = Sharpen.Runtime.Substring(word, 0, lowered.Length - 1);
                char lastChar = word[lowered.Length - 1];
                if (char.IsLowerCase(lastChar))
                {
                    second = "el";
                }
                else
                {
                    second = "EL";
                }
                secondOffset = 1;
                secondLength = lowered.Length - 1;
                break;
            }

            case "conmigo":
            case "consigo":
            {
                first        = Sharpen.Runtime.Substring(word, 0, 3);
                second       = word[3] + "í";
                secondOffset = 3;
                secondLength = 4;
                break;
            }

            case "contigo":
            {
                first        = Sharpen.Runtime.Substring(word, 0, 3);
                second       = Sharpen.Runtime.Substring(word, 3, 5);
                secondOffset = 3;
                secondLength = 4;
                break;
            }

            default:
            {
                throw new ArgumentException("Invalid contraction provided to processContraction");
            }
            }
            int secondStart = cl.BeginPosition() + secondOffset;
            int secondEnd   = secondStart + secondLength;

            compoundBuffer.Add(CopyCoreLabel(cl, second, secondStart, secondEnd));
            return(CopyCoreLabel(cl, first, cl.BeginPosition(), secondStart));
        }

Пример #2

Показать файл

        /// <summary>Splits a contraction marked by the lexer.</summary>
        /// <remarks>
        /// Splits a contraction marked by the lexer.
        /// au =&gt; a + u =&gt; à + le
        /// aux =&gt; a + ux =&gt; à + les
        /// des =&gt; de + s =&gt; de + les
        /// du =&gt; d + u =&gt; de + le
        /// </remarks>
        private CoreLabel ProcessContraction(CoreLabel cl)
        {
            cl.Remove(typeof(CoreAnnotations.ParentAnnotation));
            string word = cl.Word();
            string first;
            string second;
            int    secondOffset = 0;
            int    secondLength = 0;
            string lowered      = word.ToLower();

            switch (lowered)
            {
            case "au":
            {
                first        = "à";
                second       = "le";
                secondOffset = 1;
                secondLength = 1;
                break;
            }

            case "aux":
            {
                first        = "à";
                second       = "les";
                secondOffset = 1;
                secondLength = 2;
                break;
            }

            case "du":
            {
                first        = "de";
                second       = "le";
                secondOffset = 1;
                secondLength = 1;
                break;
            }

            default:
            {
                throw new ArgumentException("Invalid contraction provided to processContraction");
            }
            }
            int secondStart = cl.BeginPosition() + secondOffset;
            int secondEnd   = secondStart + secondLength;

            compoundBuffer.Add(CopyCoreLabel(cl, second, secondStart, secondEnd));
            return(CopyCoreLabel(cl, first, cl.BeginPosition(), secondStart));
        }

Пример #3

Показать файл

        //convert token range to char range, check if charIndex is in it.
        public virtual bool RangeContainsCharIndex(Pair <int, int> tokenRange, int charIndex)
        {
            IList <CoreLabel> tokens     = doc.Get(typeof(CoreAnnotations.TokensAnnotation));
            CoreLabel         startToken = tokens[tokenRange.First()];
            CoreLabel         endToken   = tokens[tokenRange.Second()];
            int startTokenCharBegin      = startToken.BeginPosition();
            int endTokenCharEnd          = endToken.EndPosition();

            return(startTokenCharBegin <= charIndex && charIndex <= endTokenCharEnd);
        }

Пример #4

Показать файл

        /// <summary>Splits a compound marked by the lexer.</summary>
        private CoreLabel ProcessCompound(CoreLabel cl)
        {
            cl.Remove(typeof(CoreAnnotations.ParentAnnotation));
            string[] parts       = pSpace.Split(pDash.Matcher(cl.Word()).ReplaceAll(" - "));
            int      lengthAccum = 0;

            foreach (string part in parts)
            {
                CoreLabel newLabel = new CoreLabel(cl);
                newLabel.SetWord(part);
                newLabel.SetValue(part);
                newLabel.SetBeginPosition(cl.BeginPosition() + lengthAccum);
                newLabel.SetEndPosition(cl.BeginPosition() + lengthAccum + part.Length);
                newLabel.Set(typeof(CoreAnnotations.OriginalTextAnnotation), part);
                compoundBuffer.Add(newLabel);
                lengthAccum += part.Length;
            }
            return(compoundBuffer.Remove(0));
        }

Пример #5

Показать файл

        /// <summary>
        /// Handles verbs with attached suffixes, marked by the lexer:
        /// Escribamosela =&gt; Escribamo + se + la =&gt; escribamos + se + la
        /// Sentaos =&gt; senta + os =&gt; sentad + os
        /// Damelo =&gt; da + me + lo
        /// </summary>
        private CoreLabel ProcessVerb(CoreLabel cl)
        {
            cl.Remove(typeof(CoreAnnotations.ParentAnnotation));
            SpanishVerbStripper.StrippedVerb stripped = verbStripper.SeparatePronouns(cl.Word());
            if (stripped == null)
            {
                return(cl);
            }
            // Split the CoreLabel into separate labels, tracking changing begin + end
            // positions.
            int stemEnd       = cl.BeginPosition() + stripped.GetOriginalStem().Length;
            int lengthRemoved = 0;

            foreach (string pronoun in stripped.GetPronouns())
            {
                int beginOffset = stemEnd + lengthRemoved;
                compoundBuffer.Add(CopyCoreLabel(cl, pronoun, beginOffset));
                lengthRemoved += pronoun.Length;
            }
            CoreLabel stem = CopyCoreLabel(cl, stripped.GetStem(), cl.BeginPosition(), stemEnd);

            stem.SetOriginalText(stripped.GetOriginalStem());
            return(stem);
        }

Пример #6

Показать файл

Файл: IOBUtils.cs Проект: zerouid/Stanford.CoreNLP.NET

        /// <summary>Convert token to a sequence of datums and add to iobList.</summary>
        /// <param name="iobList"/>
        /// <param name="token"/>
        /// <param name="tokType"/>
        /// <param name="tokenLabel"/>
        /// <param name="lastToken"/>
        /// <param name="applyRewriteRules"/>
        /// <param name="tf">a TokenizerFactory returning ArabicTokenizers (for determining original segment boundaries)</param>
        /// <param name="origText">the original string before tokenization (for determining original segment boundaries)</param>
        private static void TokenToDatums(IList <CoreLabel> iobList, CoreLabel cl, string token, IOBUtils.TokenType tokType, CoreLabel tokenLabel, string lastToken, bool applyRewriteRules, bool stripRewrites, ITokenizerFactory <CoreLabel> tf, string origText
                                          )
        {
            if (token.IsEmpty())
            {
                return;
            }
            string lastLabel        = ContinuationSymbol;
            string firstLabel       = BeginSymbol;
            string rewritten        = cl.Get(typeof(ArabicDocumentReaderAndWriter.RewrittenArabicAnnotation));
            bool   crossRefRewrites = true;

            if (rewritten == null)
            {
                rewritten        = token;
                crossRefRewrites = false;
            }
            else
            {
                rewritten = StripSegmentationMarkers(rewritten, tokType);
            }
            if (applyRewriteRules)
            {
                // Apply Arabic-specific re-write rules
                string rawToken = tokenLabel.Word();
                string tag      = tokenLabel.Tag();
                MorphoFeatureSpecification featureSpec = new ArabicMorphoFeatureSpecification();
                featureSpec.Activate(MorphoFeatureSpecification.MorphoFeatureType.Ngen);
                featureSpec.Activate(MorphoFeatureSpecification.MorphoFeatureType.Nnum);
                featureSpec.Activate(MorphoFeatureSpecification.MorphoFeatureType.Def);
                featureSpec.Activate(MorphoFeatureSpecification.MorphoFeatureType.Tense);
                MorphoFeatures features = featureSpec.StrToFeatures(tag);
                // Rule #1 : ت --> ة
                if (features.GetValue(MorphoFeatureSpecification.MorphoFeatureType.Ngen).Equals("F") && features.GetValue(MorphoFeatureSpecification.MorphoFeatureType.Nnum).Equals("SG") && rawToken.EndsWith("ت-") && !stripRewrites)
                {
                    lastLabel = RewriteSymbol;
                }
                else
                {
                    if (rawToken.EndsWith("ة-"))
                    {
                        System.Diagnostics.Debug.Assert(token.EndsWith("ة"));
                        token     = Sharpen.Runtime.Substring(token, 0, token.Length - 1) + "ت";
                        lastLabel = RewriteSymbol;
                    }
                }
                // Rule #2 : لل --> ل ال
                if (lastToken.Equals("ل") && features.GetValue(MorphoFeatureSpecification.MorphoFeatureType.Def).Equals("D"))
                {
                    if (rawToken.StartsWith("-ال"))
                    {
                        if (!token.StartsWith("ا"))
                        {
                            log.Info("Bad REWAL: " + rawToken + " / " + token);
                        }
                        token     = Sharpen.Runtime.Substring(token, 1);
                        rewritten = Sharpen.Runtime.Substring(rewritten, 1);
                        if (!stripRewrites)
                        {
                            firstLabel = RewriteSymbol;
                        }
                    }
                    else
                    {
                        if (rawToken.StartsWith("-ل"))
                        {
                            if (!token.StartsWith("ل"))
                            {
                                log.Info("Bad REWAL: " + rawToken + " / " + token);
                            }
                            if (!stripRewrites)
                            {
                                firstLabel = RewriteSymbol;
                            }
                        }
                        else
                        {
                            log.Info("Ignoring REWAL: " + rawToken + " / " + token);
                        }
                    }
                }
                // Rule #3 : ي --> ى
                // Rule #4 : ا --> ى
                if (rawToken.EndsWith("ى-"))
                {
                    if (features.GetValue(MorphoFeatureSpecification.MorphoFeatureType.Tense) != null)
                    {
                        // verb: ى becomes ا
                        token = Sharpen.Runtime.Substring(token, 0, token.Length - 1) + "ا";
                    }
                    else
                    {
                        // assume preposition:
                        token = Sharpen.Runtime.Substring(token, 0, token.Length - 1) + "ي";
                    }
                    if (!stripRewrites)
                    {
                        lastLabel = RewriteSymbol;
                    }
                }
                else
                {
                    if (rawToken.Equals("علي-") || rawToken.Equals("-علي-"))
                    {
                        if (!stripRewrites)
                        {
                            lastLabel = RewriteSymbol;
                        }
                    }
                }
            }
            string origWord;

            if (origText == null)
            {
                origWord = tokenLabel.Word();
            }
            else
            {
                origWord = Sharpen.Runtime.Substring(origText, cl.BeginPosition(), cl.EndPosition());
            }
            int origIndex = 0;

            while (origIndex < origWord.Length && IsDeletedCharacter(origWord[origIndex], tf))
            {
                ++origIndex;
            }
            // Create datums and add to iobList
            if (token.IsEmpty())
            {
                log.Info("Rewriting resulted in empty token: " + tokenLabel.Word());
            }
            string firstChar = token[0].ToString();

            // Start at 0 to make sure we include the whole token according to the tokenizer
            iobList.Add(CreateDatum(cl, firstChar, firstLabel, 0, origIndex + 1));
            int numChars = token.Length;

            if (crossRefRewrites && rewritten.Length != numChars)
            {
                System.Console.Error.Printf("Rewritten annotation doesn't have correct length: %s>>>%s%n", token, rewritten);
                crossRefRewrites = false;
            }
            ++origIndex;
            for (int j = 1; j < numChars; ++j, ++origIndex)
            {
                while (origIndex < origWord.Length && IsDeletedCharacter(origWord[origIndex], tf))
                {
                    ++origIndex;
                }
                if (origIndex >= origWord.Length)
                {
                    origIndex = origWord.Length - 1;
                }
                string charLabel = (j == numChars - 1) ? lastLabel : ContinuationSymbol;
                string thisChar  = token[j].ToString();
                if (crossRefRewrites && !rewritten[j].ToString().Equals(thisChar))
                {
                    charLabel = RewriteSymbol;
                }
                if (charLabel == ContinuationSymbol && thisChar.Equals("ى") && j != numChars - 1)
                {
                    charLabel = RewriteSymbol;
                }
                // Assume all mid-word alef maqsura are supposed to be yah
                iobList.Add(CreateDatum(cl, thisChar, charLabel, origIndex, origIndex + 1));
            }
            // End at endPosition to make sure we include the whole token according to the tokenizer
            if (!iobList.IsEmpty())
            {
                iobList[iobList.Count - 1].SetEndPosition(cl.EndPosition());
            }
        }