public void ClassifyArticle() { Tags = new List <string>(); List <string> sentences = SplitSentences(); // analyse article using Bag of Words technique TopicsList topicsList = new TopicsList("XF40Demo.Resources.NewsBoW.csv"); AnalyseSentences(sentences, topicsList); // analyse article again to identify false positives TopicsList falseTopicsList = new TopicsList("XF40Demo.Resources.NewsFalseBoW.csv"); AnalyseSentences(sentences, falseTopicsList); // subtract false positives from topics list foreach (Topic falseTopic in falseTopicsList.Topics) { Topic topic = topicsList.Topics.Find(x => x.Name.Equals(falseTopic.Name)); topic.Count -= falseTopic.Count; } // select topic + tags Topic tempTopic = topicsList.Topics.OrderByDescending(o => o.Count).First(); Topic = (tempTopic.Count < 2 || string.Equals(Title, "week in review", StringComparison.OrdinalIgnoreCase)) ? "Unclassified" : tempTopic.Name; foreach (Topic topic in topicsList.Topics.OrderByDescending(o => o.Count).Take(4)) { if (topic.Count > 0) { Tags.Add(topic.Name); } } }
public void AnalyseSentences(List <string> sentences, TopicsList topicsList) { foreach (string sentence in sentences) { Parallel.ForEach(topicsList.Topics, topic => { foreach (string term in topic.Terms) { if (sentence.Contains(term.ToLower())) { topic.Count++; } } }); } }