示例#1
0
        public static IEnumerable <POS> Extract(string text, ref NLPCount count)
        {
            var result = new List <POS>();

            workflow.analyze(text);
            var sentences = workflow.getResultOfDocument(new jHanNanum.comm.Sentence(0, 0, false));

            foreach (jHanNanum.comm.Sentence sentence in sentences)
            {
                count.SentenceCount++;
                var eojeols = sentence.Eojeols;
                foreach (var eojeol in eojeols)
                {
                    count.WordsPhraseCount++;
                    for (int i = 0; i < eojeol.length; i++)
                    {
                        count.MorphemeCount++;
                        result.Add(new POS()
                        {
                            PosTag = eojeol.Tags[i],
                            Text   = eojeol.Morphemes[i]
                        });
                    }
                }
            }
            return(result);
        }
        public static IEnumerable <POS> Extract(string text, ref NLPCount count)
        {
            var segments = new List <POS>();

            if (string.IsNullOrEmpty(text))
            {
                return(segments);
            }

            MeCabNode node = tagger.ParseToNode(text);

            while (node != null)
            {
                if (node.CharType > 0)
                {
                    if (node.Surface.Length <= 100)
                    {
                        segments.Add(new POS()
                        {
                            Text = node.Surface, PosTag = node.Feature.Split(',')[0]
                        });
                    }
                }
                node = node.Next;
            }
            return(segments);
        }
示例#3
0
        public static IEnumerable <POS> Extract(string text, ref NLPCount count)
        {
            string normalized = TwitterKoreanProcessor.normalize(text).toString();
            var    tokenized  = TwitterKoreanProcessor.tokenize(normalized);
            var    stemmed    = TwitterKoreanProcessor.stem(tokenized);

            return(stemmed.ToPosListFromTokens().Where(s => s.PosTag != "Punctuation" && s.PosTag != "Space"));
        }
        public static IEnumerable <POS> Extract(string text, ref NLPCount count)
        {
            var segment = new List <POS>();

            if (string.IsNullOrEmpty(text))
            {
                return(segment);
            }

            var document = new Annotation(text);

            pipeline.annotate(document);

            var sentencesAnnotation    = new SentencesAnnotation();
            var tokensAnnotation       = new TokensAnnotation();
            var textAnnotation         = new TextAnnotation();
            var partOfSpeechAnnotation = new PartOfSpeechAnnotation();

            java.util.ArrayList sentenceArrayList = (java.util.ArrayList)document.get(sentencesAnnotation.getClass());
            var sentences = sentenceArrayList.toArray();

            count.SentenceCount += sentences.Length;
            for (int i = 0; i < sentences.Length; i++)
            {
                var sentence   = (edu.stanford.nlp.util.CoreMap)sentences[i];
                var tokenArray = ((java.util.ArrayList)sentence.get(tokensAnnotation.getClass()));
                var tokens     = tokenArray.toArray();
                count.WordsPhraseCount += tokens.Length;
                for (int j = 0; j < tokens.Length; j++)
                {
                    var    coreLabel = (edu.stanford.nlp.ling.CoreLabel)tokens[j];
                    string posTag    = (string)coreLabel.get(partOfSpeechAnnotation.getClass());
                    string word      = (string)coreLabel.get(textAnnotation.getClass());
                    if (word.Length <= 100)
                    {
                        segment.Add(new POS()
                        {
                            Text = word, PosTag = posTag
                        });
                    }
                }
            }
            return(segment.ToList());
        }
        public static IEnumerable <POS> Extract(string text)
        {
            var count = new NLPCount();

            return(Extract(text, ref count));
        }