/// <summary> /// Gets stem text /// </summary> /// <param name="text">Text to stem</param> /// <returns>Text that is stemmed</returns> public string GetStemmedText(string text) { try { // Annotation var annotation = new Annotation(text); _pipeLine.annotate(annotation); // Sentence ArrayList sentences = annotation.get(_sentencesAnnotation.getClass()) as ArrayList; CoreMap sentence = sentences.get(0) as CoreMap; // Token ArrayList tokens = sentence.get(_tokensAnnotation.getClass()) as ArrayList; CoreLabel token = tokens.get(0) as CoreLabel; // Lemma string lemma = token.get(_lemmaAnnotation.getClass()).ToString(); return(lemma); } catch (Exception) { return(null); } }
/// <summary>Return a new <see cref="ParseResult"/> constructed from <paramref name="annotation"/></summary> internal ParseResult(Annotation annotation) { java.util.AbstractList sentences = annotation.get(SentencesAnnotationClass) as java.util.AbstractList; CoreMap sentence = sentences.get(0) as CoreMap; LabeledScoredTreeNode constituencyParse = sentence.get(TreeAnnotationClass) as LabeledScoredTreeNode; // Skip the ROOT Tree childOfRoot = constituencyParse.firstChild(); Constituents = childOfRoot; Constituents.indexLeaves(); // Build the collection of tokens var parsedTokens = sentence.get(TokensAnnotationClass) as java.util.AbstractList; var mentions = sentence.get(MentionsAnnotationClass); for (int tokenIndex = 0; tokenIndex < parsedTokens.size(); tokenIndex++) { CoreLabel source = parsedTokens.get(tokenIndex) as CoreLabel; var tokenMentions = source.get(MentionTokenAnnotationClass); var tokenGender = source.get(GenderAnnotationClass); Tokens.Add(new ParseToken { Index = source.index(), Word = source.word(), Lemma = source.lemma(), PartOfSpeech = source.get(PartOfSpeechAnnotationClass) as string, NamedEntityClass = source.get(NamedEntityTagAnnotationClass) as string, }); } // Create the list of dependencies between tokens SemanticGraph dependencyGraph = sentence.get(DependencyAnnotationClass) as SemanticGraph; //java.util.List dependencies = dependencyGraph.edgeListSorted(); java.util.Iterator dependencyGraphEdges = dependencyGraph.edgeIterable().iterator(); while (dependencyGraphEdges.hasNext()) { SemanticGraphEdge edge = dependencyGraphEdges.next() as SemanticGraphEdge; string relationName = edge.getRelation().getShortName(); string relationSpecifier = edge.getRelation().getSpecific(); IndexedWord governor = edge.getGovernor(); IndexedWord dependent = edge.getDependent(); Dependencies.Add((relationName, relationSpecifier, governor.index(), dependent.index())); } }
// public void Process(ref RecipeItem rec) { string text = rec.Value.ToLower(); // Annotation var annotation = new Annotation(text); pipeline.annotate(annotation); // these are all the sentences in this document // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types var sentences = annotation.get(typeof(CoreAnnotations.SentencesAnnotation)); if (sentences == null) { return; } var adj = ""; var noun = ""; foreach (Annotation sentence in sentences as ArrayList) { //var token = sentence.get(typeof(CoreAnnotations.PartOfSpeechAnnotation)); var token = sentence.get(typeof(CoreAnnotations.TokensAnnotation)); CoreLabel prev = new CoreLabel(); CoreLabel next; bool isNote = false; foreach (CoreLabel typ in token as ArrayList) { object word = typ.get(typeof(CoreAnnotations.TextAnnotation)); var pos = typ.get(typeof(CoreAnnotations.PartOfSpeechAnnotation)); Console.WriteLine("type: {0}, word: {1}", pos, word); string test = pos.ToString().ToLower(); if (isNote) { rec.Notes += " " + word; } if (test.Contains(",")) { isNote = true; } if (test.Contains("jj")) { adj += " " + word; } if (test.Contains("nn")) { noun += " " + word; } if (prev.value() != null) { word = prev.get(typeof(CoreAnnotations.TextAnnotation)); pos = prev.get(typeof(CoreAnnotations.PartOfSpeechAnnotation)); } prev = typ; } } Console.WriteLine("\n"); rec.Adj = adj; rec.Noun = noun; }
private static int tokReader(TextReader r, TextWriter writer, Regex parseInsidePattern, String options, bool preserveLines, bool dump, bool lowerCase) /*throws IOException*/ { int numTokens = 0; bool beginLine = true; bool printing = (parseInsidePattern == null); // start off printing, unless you're looking for a start entity Matcher m = null; if (parseInsidePattern != null) { m = parseInsidePattern.matcher(""); // create once as performance hack } for (PTBTokenizer <CoreLabel> tokenizer = new PTBTokenizer <CoreLabel>(r, new CoreLabelTokenFactory(), options); tokenizer.hasNext();) { CoreLabel obj = tokenizer.next(); // String origStr = obj.get(CoreAnnotations.TextAnnotation.class).replaceFirst("\n+$", ""); // DanC added this to fix a lexer bug, hopefully now corrected String origStr = (String)obj.get(typeof(CoreAnnotations.TextAnnotation)); String str; if (lowerCase) { str = origStr.ToLower(Locale.ENGLISH); obj.set(typeof(CoreAnnotations.TextAnnotation), str); } else { str = origStr; } if (m != null && m.reset(origStr).matches()) { printing = m.group(1).isEmpty(); // turn on printing if no end element slash, turn it off it there is } else if (printing) { if (dump) { // after having checked for tags, change str to be exhaustive str = obj.toString(); } if (preserveLines) { if (PTBLexer.NEWLINE_TOKEN.equals(origStr)) { beginLine = true; writer.newLine(); } else { if (!beginLine) { writer.Write(' '); } else { beginLine = false; } // writer.Write(str.replace("\n", "")); writer.Write(str); } } else { writer.Write(str); writer.newLine(); } } numTokens++; } return(numTokens); }