public static void AnalyzeText(Book book) { ResetVariables(); wordCount = book.Text.Split(' ').Count(word => !String.IsNullOrEmpty(word)); //Dictionary<string, int> entities = new Dictionary<string, int>(); punctCount = book.Text.Count(c => Char.IsPunctuation(c)); var pipeline = PipelineBuilder.GetPipeLine(); // Annotation var annotation = new Annotation(book.Text); pipeline.annotate(annotation); // these are all the sentences in this document // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types var sentences = annotation.get(new CoreAnnotations.SentencesAnnotation().getClass()) as ArrayList; if (sentences == null) { return; } foreach (CoreMap sentence in sentences) { var tokens = sentence.get(new CoreAnnotations.TokensAnnotation().getClass()) as ArrayList; foreach (CoreLabel token in tokens) { var word = token.get(new CoreAnnotations.TextAnnotation().getClass()) as string; var pos = token.get(new CoreAnnotations.PartOfSpeechAnnotation().getClass()) as string; var ner = token.get(new CoreAnnotations.NamedEntityTagAnnotation().getClass()) as string; //if (entities.ContainsKey(ner)) //{ // entities[ner]++; //} //else //{ // entities.Add(ner, 1); //} AnalyzePOSTag(pos); } } //foreach (var entity in entities) //{ // book.NamedEntities.Add(new NamedEntity() { BookID = book.BookID, NamedEntity1 = entity.Key, NumberOfOccurences = entity.Value }); //} book.AdjectiveToWordRatio = adjCount / wordCount; book.AdverbToWordRatio = advCount / wordCount; book.NounToWordRatio = nounCount / wordCount; book.VerbToWordRatio = verbCount / wordCount; book.PunctoationToWordRatio = punctCount / wordCount; book.AverageSentenceWordCount = wordCount / sentences.size(); }