C# (CSharp) POSTagger.Tag примеры использования

Язык программирования: C# (CSharp)

Класс/Тип: POSTagger

Метод/Функция: Tag

Примеров на hotexamples.com: 2

C# (CSharp) POSTagger.Tag - 2 примера найдено. Это лучшие примеры C# (CSharp) кода для POSTagger.Tag, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

tag(4)

BatchTag(3)

ResolveUnknowns(2)

Tag(2)

TagString(2)

ExtractArtifact(1)

TagList(1)

Пример #1

Показать файл

        /// <summary>
        /// Processes the specified text.
        /// </summary>
        /// <param name="text">The text.</param>
        /// <returns>The resulting document object.</returns>
        public Document Process(string text)
        {
            var TempText = NormalizerManager.Normalize(text);
            var Tokens   = Tokenizer.Tokenize(TempText, TokenizerLanguage);

            Tokens = NormalizerManager.Normalize(Tokens);
            Tokens = Stemmer.Stem(Tokens, StemmerLanguage);
            Tokens = StopWordsManager.MarkStopWords(Tokens, StopWordsLanguage);

            var Sentences = SentenceDetector.Detect(Tokens, SentenceDetectorLanguage);

            for (int x = 0; x < Sentences.Length; ++x)
            {
                var Sentence = Sentences[x];
                Sentence.Tokens = POSTagger.Tag(Sentence.Tokens, POSTaggerLanguage);
            }
            Tokens    = EntityFinder.Find(Tokens, EntityFinderType);
            Sentences = SentenceDetector.Detect(Tokens, SentenceDetectorLanguage);

            return(new Document(Sentences, Tokens, text, FeatureExtractor, TextSummarizer, Tokenizer, TokenizerLanguage));
        }

Пример #2

Показать файл

        /// <summary>
        /// Take in a paragraph and replace all non-ignored words with a 'smarter' synonym.
        /// </summary>
        /// <param name="data">Paragraph to convert.</param>
        /// <returns>The 'improved' paragraph.</returns>
        public static string ConvertParagraph(string data)
        {
            StringBuilder output = new StringBuilder();

            string[] sentences = MEDetector.Detect(data);

            foreach (string sentence in sentences)
            {
                string[] tokens      = METokenizer.Tokenize(sentence);
                Span[]   names       = MENameFinder.Find(tokens);
                char[]   sentenceArr = sentence.ToCharArray();
                for (int cCharIndex = 0; cCharIndex < sentence.Length; cCharIndex++)
                {
                    if (Char.IsUpper(sentenceArr[cCharIndex]))
                    {
                        bool isName = false;
                        for (int cSpanIndex = 0; cSpanIndex < names.Length; cSpanIndex++)
                        {
                            if (cCharIndex == names[cSpanIndex].Start)
                            {
                                isName = true;
                            }
                        }

                        if (!isName)
                        {
                            sentenceArr[cCharIndex] = Char.ToLower(sentenceArr[cCharIndex]);
                            // TODO: Have to keep track of where the capitals were in the original sentence to add them again later.
                        }
                    }
                }
                tokens = METokenizer.Tokenize(new string(sentenceArr));
                string[] tags = METagger.Tag(tokens);

                string[] chunks = MEChunker.Chunk(tokens, tags);

                Wnlib.PartsOfSpeech pos = Wnlib.PartsOfSpeech.Unknown;
                for (int i = 0; i < tokens.Length; i++)
                {
                    if (!ConversionConditions.ExcludedPOS.Contains(tags[i]))
                    {
                        // Current token POS is not excluded from conversion.
                        if (Regex.IsMatch(chunks[i], "-") && ConversionConditions.IncludedPhrases.Contains(Regex.Split(chunks[i], "-")[1]))
                        {
                            // The containing phrase of the current token is not excluded.
                            switch (tags[i])
                            {
                            case "NN":
                            case "NNS":
                                pos = Wnlib.PartsOfSpeech.Noun;
                                break;

                            case "JJ":
                            case "JJR":
                            case "JJS":
                                pos = Wnlib.PartsOfSpeech.Adj;
                                break;

                            case "RB":
                            case "RBR":
                            case "RBS":
                                pos = Wnlib.PartsOfSpeech.Adv;
                                break;

                            case "VB":
                            case "VBD":
                            case "VBG":
                            case "VBN":
                            case "VBP":
                            case "VBZ":
                                pos = Wnlib.PartsOfSpeech.Verb;
                                break;
                            }

                            string mostComplexSynonym = GetMostComplexSynyonymScoredWN(tokens[i], pos);
                            output.Append(mostComplexSynonym);
                        }
                        else
                        {
                            // The containing phrase of the current token is excluded.
                            output.Append(tokens[i]);
                        }
                    }
                    else
                    {
                        // Current token POS is excluded from conversion.
                        output.Append(tokens[i]);
                    }

                    // Checking if a space needs to be added after this token (eg, it is not at the end of the line).
                    // NOTE: Uses two inline if statements.
                    bool isBeforePunctuation;
                    try
                    {
                        isBeforePunctuation = Regex.IsMatch(tokens[i + 1], IS_BEFORE_PUNCTUATION_MATCH_PATTERN);
                    }
                    catch (IndexOutOfRangeException)
                    {
                        isBeforePunctuation = false;
                    }

                    output.Append((i >= tokens.Length - (sentence.EndsWith(".") ? 2 : 1)) || isBeforePunctuation ? "" : " ");
                    if (tokens[i] == ".")
                    {
                        output.Append(Array.IndexOf(sentences, sentence) == (sentences.Length - 1) ? "" : " ");
                    }

                    try
                    {
                        if ((chunks[i + 1] == "O" && tokens[i + 1].Contains("'")) || tokens[i + 1] == "'s")
                        {
                            // This is a contraction. Remove the space between the two parts.
                            output.Length--;
                        }
                    }
                    catch (IndexOutOfRangeException)
                    { /* Don't need to do anything, just means we don't need to remove the last space. */ }
                }
            }

            return(AddPeriod(StringToSentenceCase(output.ToString())));
        }