/// <summary> /// Extracts a Stanford-Word to a models.Word and adds to sentence /// </summary> /// <param name="stanfordWord"></param> /// <returns></returns> private bool ExtractWord( CoreMap stanfordWord, ref InternalSentence sentence) { uint offsetBegin, offsetEnd; if (!UInt32.TryParse( stanfordWord.get(_charOffsetBeginAnnotationClass).ToString(), out offsetBegin)) { return(false); } if (!UInt32.TryParse( stanfordWord.get(_charOffsetEndAnnotationClass).ToString(), out offsetEnd)) { return(false); } sentence.words.Add(new InternalWord() { annotatedText = stanfordWord.get(_textAnnotationClass).ToString(), //originalText = stanfordWord.get(_originalTextAnnotationClass).ToString(), originalText = _rawData.Substring((int)offsetBegin, (int)offsetEnd - (int)offsetBegin), characterOffsetBegin = offsetBegin, characterOffsetEnd = offsetEnd }); return(true); }
/// <summary> /// Gets stem text /// </summary> /// <param name="text">Text to stem</param> /// <returns>Text that is stemmed</returns> public string GetStemmedText(string text) { try { // Annotation var annotation = new Annotation(text); _pipeLine.annotate(annotation); // Sentence ArrayList sentences = annotation.get(_sentencesAnnotation.getClass()) as ArrayList; CoreMap sentence = sentences.get(0) as CoreMap; // Token ArrayList tokens = sentence.get(_tokensAnnotation.getClass()) as ArrayList; CoreLabel token = tokens.get(0) as CoreLabel; // Lemma string lemma = token.get(_lemmaAnnotation.getClass()).ToString(); return(lemma); } catch (Exception) { return(null); } }
private static void ExtractKeyPhrases(CoreDocument coredoc, int id) { ArrayList sents = _analyzer.GetSents(coredoc); if (sents != null) { List <string> NP = new List <string>(); for (int i = 0; i < sents.size(); i++) { CoreMap sentence = (CoreMap)sents.get(i); List <Tree> smallTrees = NPExtractor.getKeyPhrases((Tree)sentence.get(typeof(TreeCoreAnnotations.TreeAnnotation))).ToList(); foreach (var tree in smallTrees) { List leaves = tree.getLeaves(); var objarray = leaves.toArray(); //foreach (var obj in objarray) //{ // NP.Add(obj.ToString()); //} string joinedNP = String.Join(" ", objarray); NP.Add(joinedNP); } } NounPhrases.Add(id, NP); } }
/// <summary>Return a new <see cref="ParseResult"/> constructed from <paramref name="annotation"/></summary> internal ParseResult(Annotation annotation) { java.util.AbstractList sentences = annotation.get(SentencesAnnotationClass) as java.util.AbstractList; CoreMap sentence = sentences.get(0) as CoreMap; LabeledScoredTreeNode constituencyParse = sentence.get(TreeAnnotationClass) as LabeledScoredTreeNode; // Skip the ROOT Tree childOfRoot = constituencyParse.firstChild(); Constituents = childOfRoot; Constituents.indexLeaves(); // Build the collection of tokens var parsedTokens = sentence.get(TokensAnnotationClass) as java.util.AbstractList; var mentions = sentence.get(MentionsAnnotationClass); for (int tokenIndex = 0; tokenIndex < parsedTokens.size(); tokenIndex++) { CoreLabel source = parsedTokens.get(tokenIndex) as CoreLabel; var tokenMentions = source.get(MentionTokenAnnotationClass); var tokenGender = source.get(GenderAnnotationClass); Tokens.Add(new ParseToken { Index = source.index(), Word = source.word(), Lemma = source.lemma(), PartOfSpeech = source.get(PartOfSpeechAnnotationClass) as string, NamedEntityClass = source.get(NamedEntityTagAnnotationClass) as string, }); } // Create the list of dependencies between tokens SemanticGraph dependencyGraph = sentence.get(DependencyAnnotationClass) as SemanticGraph; //java.util.List dependencies = dependencyGraph.edgeListSorted(); java.util.Iterator dependencyGraphEdges = dependencyGraph.edgeIterable().iterator(); while (dependencyGraphEdges.hasNext()) { SemanticGraphEdge edge = dependencyGraphEdges.next() as SemanticGraphEdge; string relationName = edge.getRelation().getShortName(); string relationSpecifier = edge.getRelation().getSpecific(); IndexedWord governor = edge.getGovernor(); IndexedWord dependent = edge.getDependent(); Dependencies.Add((relationName, relationSpecifier, governor.index(), dependent.index())); } }
/// <summary> /// Extracts the sentence members of a Standford Sentence to a models /// sentence /// </summary> /// <param name="stanfordSentence"></param> /// <param name="sentence"></param> /// <returns></returns> private bool ExtractSentence( CoreMap stanfordSentence, ref InternalSentence sentence) { sentence.text = stanfordSentence.get(_textAnnotationClass).ToString(); if (!UInt32.TryParse( stanfordSentence.get(_charOffsetBeginAnnotationClass).ToString(), out sentence.fileOffsetBegin)) { return(false); } if (!UInt32.TryParse( stanfordSentence.get(_charOffsetEndAnnotationClass).ToString(), out sentence.fileOffsetEnd)) { return(false); } return(true); }
public void CreateParseTree(CoreDocument coredoc) { if (coredoc != null) { ArrayList sents = (ArrayList)coredoc.annotation().get(typeof(CoreAnnotations.SentencesAnnotation)); for (int i = 0; i < sents.size(); i++) { CoreMap sentence = (CoreMap)sents.get(i); this.constituencyParse = (Tree)sentence.get(typeof(TreeCoreAnnotations.TreeAnnotation)); Set treeConstituents = (Set)constituencyParse.constituents(new LabeledScoredConstituentFactory()); treeArray = treeConstituents.toArray(); } } }
/// <summary> /// /// </summary> /// <param name="index"></param> /// <returns></returns> public bool GetSentence(uint index, ref InternalSentence sentence) { if ((index < 0) || (index > (this.SentencesCount - 1))) { throw new IndexOutOfRangeException(); } // extract the basic sentence fields CoreMap stanfordSentence = (CoreMap)_sentences.get((int)index); if (!ExtractSentence(stanfordSentence, ref sentence)) { return(false); } // extract words java.util.ArrayList stanfordWords = (java.util.ArrayList)stanfordSentence.get(_tokensAnnotationClass); if (stanfordWords.size() <= 0) { return(true); } // a "minor" optimisation (capacity) sentence.words = new System.Collections.Generic.List <InternalWord>( stanfordWords.size()); foreach (CoreMap stanfordWord in stanfordWords) { if (!ExtractWord(stanfordWord, ref sentence)) { // NOTE: currently, we fail the entire sentence even if a single // word fails return(false); } } ; return(true); }
public Tree GetParseTree(CoreMap sent) { return((Tree)sent.get(typeof(TreeCoreAnnotations.TreeAnnotation))); }
/// <summary> /// Pull the Sentiment annotation for this sentence. /// </summary> private int GetSentiment(CoreMap sentence) { var tree = (Tree)sentence.get(typeof(SentimentCoreAnnotations.SentimentAnnotatedTree)); //pull the annotated tree var sentiment = RNNCoreAnnotations.getPredictedClass(tree); //a score between 0-4, higher being more positive. return sentiment; }