public static ProcessedEntity Classify(this StanfordCoreNLP nlp, string source) { CoreDocument document = new CoreDocument(source); nlp.annotate(document); return(document.sentences() .toArray() .OfType <CoreSentence>() .Select(s => new ParsedSentence(s)) .Aggregate(new ProcessedEntity(), (r, s) => ProcessedEntity.Union(r, s.ToProcessedEntity()))); }
private static void ExtractNouns(CoreDocument coredoc, Lucene.Net.Documents.Document document) { List <string> nouns = new List <string>(); for (int i = 0; i < coredoc.sentences().size(); i++) { CoreSentence sent = (CoreSentence)coredoc.sentences().get(i); for (int j = 0; j < sent.tokens().size(); j++) { // Condition: if the word is a noun (posTag starts with "NN") if (sent.posTags() != null && sent.posTags().get(j) != null) { string posTags = sent.posTags().get(j).ToString(); if (posTags.Contains("NN")) { var noun = sent.tokens().get(j).ToString(); noun = noun.Remove(noun.Length - 2); nouns.Add(noun); } } } } NounPhrases.Add(document.GetField("id").GetInt32Value().Value, nouns); }