/*Stem the given word with, return the stemmed word */ public List <string> Stem(string word) { if (pipeline == null) { Initial(); } var lemmas = new List <String>(); // create an empty Annotation just with the given text var document = new Annotation(word); // run all Annotators on this text try { pipeline.annotate(document); } catch (Exception) { return(null); } // Iterate over all of the sentences found var senObj = new edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation(); var obj = document.get(senObj.getClass()); var tokenObj = new edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation(); var lemmaObj = new edu.stanford.nlp.ling.CoreAnnotations.LemmaAnnotation(); var sentences = (ArrayList)obj; foreach (CoreMap sentence in sentences) { // Iterate over all tokens in a sentence lemmas.AddRange(from CoreLabel token in (ArrayList)sentence.get(tokenObj.getClass()) select(string) token.get(lemmaObj.getClass())); } return(lemmas); }
public List <string> Tokenize(string sequence) { if (sequence == null) { throw new Exception("Sequence should not be null for tokenizer."); } if (pipeline == null) { Initial(); } var document = new Annotation(sequence); pipeline.annotate(document); var tokens = (ArrayList)document.get(tokenObj.getClass()); return((from CoreMap token in tokens select token.ToString()).ToList()); }
public void Parse(string sentence) { if (pipeline == null) { Initial(); } Annotation context = new Annotation(sentence); pipeline.annotate(context); this.tokens = (ArrayList)context.get(tokenObj.getClass()); var sentences = (ArrayList)context.get(senObj.getClass()); foreach (CoreMap sen in sentences) { this.dependencies = (SemanticGraph)sen.get(depObj.getClass()); break; } }
/*Stem the given word with, return the stemmed word */ public void FindNer(string context) { if (context == null) { throw new Exception("Input should not be null for finding NER!"); } if (pipeline == null) { Initial(); } // create an empty Annotation just with the given text var annotation = new Annotation(context); // run all Annotators on this text pipeline.annotate(annotation); var ners = (edu.stanford.nlp.util.CoreMap)annotation.get(nerObj.getClass()); var sentences = (ArrayList)annotation.get(new edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation().getClass()); nerPairs = new List <Pair <string, string> >(); foreach (CoreMap sentence in sentences) { var prevNerToken = "O"; var currNerToken = "O"; bool newToken = true; var buffer = new StringBuilder(); foreach (CoreLabel token in (ArrayList)sentence.get(tokenObj.getClass())) { currNerToken = (string)token.get(nerObj.getClass()); var word = (string)token.get(textObj.getClass()); // Strip out "O"s completely, makes code below easier to understand if (currNerToken.Equals("O")) { if (!prevNerToken.Equals("O") && (buffer.Length > 0)) { if (prevNerToken.Equals("LOCATION") || prevNerToken.Equals("PERSON") || prevNerToken.Equals("ORGANIZATION")) { nerPairs.Add(new Pair <string, string>(buffer.ToString(), prevNerToken)); } buffer.Clear(); newToken = true; prevNerToken = "O"; } continue; } if (newToken) { prevNerToken = currNerToken; newToken = false; buffer.Append(word); continue; } if (currNerToken.Equals(prevNerToken)) { buffer.Append(" " + word); } else { // We're done with the current entity - print it out and reset // TODO save this token into an appropriate ADT to return for useful processing.. if (prevNerToken.Equals("LOCATION") || prevNerToken.Equals("PERSON") || prevNerToken.Equals("ORGANIZATION")) { nerPairs.Add(new Pair <string, string>(buffer.ToString(), prevNerToken)); } buffer.Clear(); buffer.Append(word); newToken = true; } prevNerToken = currNerToken; } if (!prevNerToken.Equals("O") && buffer.Length > 0) { if (prevNerToken.Equals("LOCATION") || prevNerToken.Equals("PERSON") || prevNerToken.Equals("ORGANIZATION")) { nerPairs.Add(new Pair <string, string>(buffer.ToString(), prevNerToken)); } } } }