public static IEnumerable <POS> Extract(string text, ref NLPCount count) { var result = new List <POS>(); workflow.analyze(text); var sentences = workflow.getResultOfDocument(new jHanNanum.comm.Sentence(0, 0, false)); foreach (jHanNanum.comm.Sentence sentence in sentences) { count.SentenceCount++; var eojeols = sentence.Eojeols; foreach (var eojeol in eojeols) { count.WordsPhraseCount++; for (int i = 0; i < eojeol.length; i++) { count.MorphemeCount++; result.Add(new POS() { PosTag = eojeol.Tags[i], Text = eojeol.Morphemes[i] }); } } } return(result); }
public static IEnumerable <POS> Extract(string text, ref NLPCount count) { var segments = new List <POS>(); if (string.IsNullOrEmpty(text)) { return(segments); } MeCabNode node = tagger.ParseToNode(text); while (node != null) { if (node.CharType > 0) { if (node.Surface.Length <= 100) { segments.Add(new POS() { Text = node.Surface, PosTag = node.Feature.Split(',')[0] }); } } node = node.Next; } return(segments); }
public static IEnumerable <POS> Extract(string text, ref NLPCount count) { string normalized = TwitterKoreanProcessor.normalize(text).toString(); var tokenized = TwitterKoreanProcessor.tokenize(normalized); var stemmed = TwitterKoreanProcessor.stem(tokenized); return(stemmed.ToPosListFromTokens().Where(s => s.PosTag != "Punctuation" && s.PosTag != "Space")); }
public static IEnumerable <POS> Extract(string text, ref NLPCount count) { var segment = new List <POS>(); if (string.IsNullOrEmpty(text)) { return(segment); } var document = new Annotation(text); pipeline.annotate(document); var sentencesAnnotation = new SentencesAnnotation(); var tokensAnnotation = new TokensAnnotation(); var textAnnotation = new TextAnnotation(); var partOfSpeechAnnotation = new PartOfSpeechAnnotation(); java.util.ArrayList sentenceArrayList = (java.util.ArrayList)document.get(sentencesAnnotation.getClass()); var sentences = sentenceArrayList.toArray(); count.SentenceCount += sentences.Length; for (int i = 0; i < sentences.Length; i++) { var sentence = (edu.stanford.nlp.util.CoreMap)sentences[i]; var tokenArray = ((java.util.ArrayList)sentence.get(tokensAnnotation.getClass())); var tokens = tokenArray.toArray(); count.WordsPhraseCount += tokens.Length; for (int j = 0; j < tokens.Length; j++) { var coreLabel = (edu.stanford.nlp.ling.CoreLabel)tokens[j]; string posTag = (string)coreLabel.get(partOfSpeechAnnotation.getClass()); string word = (string)coreLabel.get(textAnnotation.getClass()); if (word.Length <= 100) { segment.Add(new POS() { Text = word, PosTag = posTag }); } } } return(segment.ToList()); }
public static IEnumerable <POS> Extract(string text) { var count = new NLPCount(); return(Extract(text, ref count)); }