Exemple #1
0
        public static void AnalyzeText(Book book)
        {
            ResetVariables();
            wordCount = book.Text.Split(' ').Count(word => !String.IsNullOrEmpty(word));
            //Dictionary<string, int> entities = new Dictionary<string, int>();
            punctCount = book.Text.Count(c => Char.IsPunctuation(c));

            var pipeline = PipelineBuilder.GetPipeLine();

            // Annotation
            var annotation = new Annotation(book.Text);

            pipeline.annotate(annotation);

            // these are all the sentences in this document
            // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
            var sentences = annotation.get(new CoreAnnotations.SentencesAnnotation().getClass()) as ArrayList;

            if (sentences == null)
            {
                return;
            }


            foreach (CoreMap sentence in sentences)
            {
                var tokens = sentence.get(new CoreAnnotations.TokensAnnotation().getClass()) as ArrayList;
                foreach (CoreLabel token in tokens)
                {
                    var word = token.get(new CoreAnnotations.TextAnnotation().getClass()) as string;
                    var pos  = token.get(new CoreAnnotations.PartOfSpeechAnnotation().getClass()) as string;
                    var ner  = token.get(new  CoreAnnotations.NamedEntityTagAnnotation().getClass()) as string;
                    //if (entities.ContainsKey(ner))
                    //{
                    //    entities[ner]++;
                    //}
                    //else
                    //{
                    //    entities.Add(ner, 1);
                    //}
                    AnalyzePOSTag(pos);
                }
            }


            //foreach (var entity in entities)
            //{
            //    book.NamedEntities.Add(new NamedEntity() { BookID = book.BookID, NamedEntity1 = entity.Key, NumberOfOccurences = entity.Value });
            //}

            book.AdjectiveToWordRatio     = adjCount / wordCount;
            book.AdverbToWordRatio        = advCount / wordCount;
            book.NounToWordRatio          = nounCount / wordCount;
            book.VerbToWordRatio          = verbCount / wordCount;
            book.PunctoationToWordRatio   = punctCount / wordCount;
            book.AverageSentenceWordCount = wordCount / sentences.size();
        }