private static void ExtractKeyPhrases(CoreDocument coredoc, int id) { ArrayList sents = _analyzer.GetSents(coredoc); if (sents != null) { List <string> NP = new List <string>(); for (int i = 0; i < sents.size(); i++) { CoreMap sentence = (CoreMap)sents.get(i); List <Tree> smallTrees = NPExtractor.getKeyPhrases((Tree)sentence.get(typeof(TreeCoreAnnotations.TreeAnnotation))).ToList(); foreach (var tree in smallTrees) { List leaves = tree.getLeaves(); var objarray = leaves.toArray(); //foreach (var obj in objarray) //{ // NP.Add(obj.ToString()); //} string joinedNP = String.Join(" ", objarray); NP.Add(joinedNP); } } NounPhrases.Add(id, NP); } }
private static void ExtractNouns(CoreDocument coredoc, Lucene.Net.Documents.Document document) { List <string> nouns = new List <string>(); for (int i = 0; i < coredoc.sentences().size(); i++) { CoreSentence sent = (CoreSentence)coredoc.sentences().get(i); for (int j = 0; j < sent.tokens().size(); j++) { // Condition: if the word is a noun (posTag starts with "NN") if (sent.posTags() != null && sent.posTags().get(j) != null) { string posTags = sent.posTags().get(j).ToString(); if (posTags.Contains("NN")) { var noun = sent.tokens().get(j).ToString(); noun = noun.Remove(noun.Length - 2); nouns.Add(noun); } } } } NounPhrases.Add(document.GetField("id").GetInt32Value().Value, nouns); }