// public void Process(ref RecipeItem rec) { string text = rec.Value.ToLower(); // Annotation var annotation = new Annotation(text); pipeline.annotate(annotation); // these are all the sentences in this document // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types var sentences = annotation.get(typeof(CoreAnnotations.SentencesAnnotation)); if (sentences == null) { return; } var adj = ""; var noun = ""; foreach (Annotation sentence in sentences as ArrayList) { //var token = sentence.get(typeof(CoreAnnotations.PartOfSpeechAnnotation)); var token = sentence.get(typeof(CoreAnnotations.TokensAnnotation)); CoreLabel prev = new CoreLabel(); CoreLabel next; bool isNote = false; foreach (CoreLabel typ in token as ArrayList) { object word = typ.get(typeof(CoreAnnotations.TextAnnotation)); var pos = typ.get(typeof(CoreAnnotations.PartOfSpeechAnnotation)); Console.WriteLine("type: {0}, word: {1}", pos, word); string test = pos.ToString().ToLower(); if (isNote) { rec.Notes += " " + word; } if (test.Contains(",")) { isNote = true; } if (test.Contains("jj")) { adj += " " + word; } if (test.Contains("nn")) { noun += " " + word; } if (prev.value() != null) { word = prev.get(typeof(CoreAnnotations.TextAnnotation)); pos = prev.get(typeof(CoreAnnotations.PartOfSpeechAnnotation)); } prev = typ; } } Console.WriteLine("\n"); rec.Adj = adj; rec.Noun = noun; }
public static void TokenizeText(string text) { var temp = new StringReader(text); PTBTokenizer ptbt = new PTBTokenizer(temp, new CoreLabelTokenFactory(), ""); while (ptbt.hasNext()) { CoreLabel label = (CoreLabel)ptbt.next(); Console.WriteLine(String.Format("{0}\t| BEGIN_OFFSET: {1}\t| END_OFFSET: {2}", label.value(), label.beginPosition(), label.endPosition())); } }