Beispiel #1
0
        public void ClassifyArticle()
        {
            Tags = new List <string>();
            List <string> sentences = SplitSentences();

            // analyse article using Bag of Words technique
            TopicsList topicsList = new TopicsList("XF40Demo.Resources.NewsBoW.csv");

            AnalyseSentences(sentences, topicsList);

            // analyse article again to identify false positives
            TopicsList falseTopicsList = new TopicsList("XF40Demo.Resources.NewsFalseBoW.csv");

            AnalyseSentences(sentences, falseTopicsList);

            // subtract false positives from topics list
            foreach (Topic falseTopic in falseTopicsList.Topics)
            {
                Topic topic = topicsList.Topics.Find(x => x.Name.Equals(falseTopic.Name));
                topic.Count -= falseTopic.Count;
            }

            // select topic + tags
            Topic tempTopic = topicsList.Topics.OrderByDescending(o => o.Count).First();

            Topic = (tempTopic.Count < 2 || string.Equals(Title, "week in review", StringComparison.OrdinalIgnoreCase)) ? "Unclassified" : tempTopic.Name;
            foreach (Topic topic in topicsList.Topics.OrderByDescending(o => o.Count).Take(4))
            {
                if (topic.Count > 0)
                {
                    Tags.Add(topic.Name);
                }
            }
        }
Beispiel #2
0
 public void AnalyseSentences(List <string> sentences, TopicsList topicsList)
 {
     foreach (string sentence in sentences)
     {
         Parallel.ForEach(topicsList.Topics, topic =>
         {
             foreach (string term in topic.Terms)
             {
                 if (sentence.Contains(term.ToLower()))
                 {
                     topic.Count++;
                 }
             }
         });
     }
 }