public string ClassifyArticle(int articleId)
        {
            BayesianClassifier classifier =
              new BayesianClassifier(this, new DefaultTokenizer(), new CustomizableStopWordProvider());

            NewsItem article = this.Data.NewsItems.FirstOrDefault(s => s.Id == articleId);
            List<Category> mainCategories = this.Data.Categories.All().DistinctBy(s => s.BaseCategoryId).ToList();
            string category = "";
            decimal? probResult = 0;
            decimal? maxProbResult = 0;
            for (int i = 0; i < mainCategories.Count; i++)
            {
                bool isMatch = classifier.IsMatch(mainCategories[i].Name, article.Header, ref probResult, mainCategories[i].Id);
                if (isMatch)
                {
                    category = mainCategories[i].Name;
                    break;
                }
                else
                {
                    if (probResult>maxProbResult)
                    {
                        maxProbResult = probResult;
                        category= mainCategories[i].Name;
                    }
                }
            }

            return category;
        }
        public void ClassifyDataSourceResult(DataSourceResult news)
        {
            BayesianClassifier classifier =
             new BayesianClassifier(this, new DefaultTokenizer(), new CustomizableStopWordProvider());
            List<Category> mainCategories = this.Data.Categories.All().DistinctBy(s => s.BaseCategoryId).ToList();

            var sequenceEnum = news.Data.GetEnumerator();

            while (sequenceEnum.MoveNext())
            {
                var article = (sequenceEnum.Current as NewsItemViewModel);

                decimal? categoryPosibiity = 0;
                decimal? maxPropability = 0;
                string maxProbCategory = "";
                for (int i = 0; i < mainCategories.Count; i++)
                {
                    bool isMatch = classifier.IsMatch(mainCategories[i].Name, article.Header, ref categoryPosibiity, mainCategories[i].Id);
                    if (isMatch)
                    {
                        article.ClassificationCategory = mainCategories[i].Name;
                        article.ClassificationProbability = categoryPosibiity.Value;
                        break;
                    }
                    else
                    {
                        if (categoryPosibiity > maxPropability)
                        {
                            maxPropability = categoryPosibiity;
                            maxProbCategory = mainCategories[i].Name;
                        }
                    }

                }

                if (string.IsNullOrEmpty( article.ClassificationCategory))
                {
                    article.ClassificationCategory = maxProbCategory;
                    article.ClassificationProbability = maxPropability.Value;
                }


            }
        }
        public int TrainModel()
        {
            BayesianClassifier classifier =
              new BayesianClassifier(this, new DefaultTokenizer(), new CustomizableStopWordProvider("DefaultStopWords.txt"));

            int countTrainedNews = 0;
            List<Category> mainCategories = this.Data.Categories.All().DistinctBy(s => s.BaseCategoryId).ToList();
            List<Category> allCategories = this.Data.Categories.All(new string[] { "NewsItems" }).ToList();

            //get all news that havent been used for classification and arent for test
            var newsItems = this.Data.NewsItems.All(new string[] { "Categories" }).Where(s => s.UsedForClassication != true && s.IsForTest != true).ToList();
            for (int i = 0; i < newsItems.Count; i++)
            {
                var article = newsItems[i];
                var category = article.Categories.FirstOrDefault();
                if (category != null)
                {
                    if (category.BaseCategory == null)
                    {
                        continue;
                    }

                    //string text = article.CleanContent; - this take to mach time 
                    string text = article.Header;

                    try
                    {
                        //teach this category with this content
                        classifier.TeachMatch(category.BaseCategory.Name, text, category.BaseCategoryId);

                        for (int j = 0; j < mainCategories.Count; j++)
                        {
                            if (mainCategories[j].BaseCategory == null)
                            {
                                continue;
                            }

                            if (category.Id != mainCategories[j].Id)
                            {
                                //teach each other category that is not match for this sentance
                                classifier.TeachNonMatch(mainCategories[j].BaseCategory.Name, text, mainCategories[j].BaseCategory.Id);
                            }
                        }

                        countTrainedNews++;
                        article.UsedForClassication = true;
                        this.Data.NewsItems.Update(article);
                        this.Data.SaveChanges();


                    }
                    catch (Exception ex)
                    {
                        BaseHelper.WriteInFile("errors.txt", ex.Message);

                    }

                }

            }

            //foreach (var category in allCategories)
            //{
            //    var newsItems = category.NewsItems.Where(s => s.UsedForClassication != true && s.IsForTest!=true).ToList();
            //    for (int i = 0; i < newsItems.Count; i++)
            //    {
            //        var article = category.NewsItems.ElementAt(i);

            //        //string text = article.CleanContent; - this take to mach time 

            //            string text = article.Header;

            //        if (category.BaseCategory == null)
            //        {
            //            continue;
            //        }

            //        try
            //        {


            //            //teach this category with this content
            //            classifier.TeachMatch(category.BaseCategory.Name, text, category.BaseCategoryId);

            //            for (int j = 0; j < mainCategories.Count; j++)
            //            {
            //                if (mainCategories[j].BaseCategory == null)
            //                {
            //                    continue;
            //                }

            //                if (category.Id != mainCategories[j].Id)
            //                {
            //                    //teach each other category that is not match for this sentance
            //                    classifier.TeachNonMatch(mainCategories[j].BaseCategory.Name, text, mainCategories[j].BaseCategory.Id);
            //                }
            //            }

            //            countTrainedNews++;
            //            article.UsedForClassication = true;
            //            this.Data.NewsItems.Update(article);
            //            this.Data.SaveChanges();


            //        }
            //        catch (Exception ex)
            //        {
            //            BaseHelper.WriteInFile("errors.txt", ex.Message);

            //        }

            //    }
            //}

            //foreach (var category in TrainingData)
            //{
            //    //something wrong with stemming
            //    //List<string> listStemmedData = LucenePorterStemmer.ExecuteSteamming(category.Value);  for (int i = 0; i < listStemmedData.Count; i++)

            //    for (int i = 0; i < category.Value.Count; i++)
            //    {
            //        string word = category.Value[i];
            //        classifier.TeachMatch(category.Key, word);

            //        if (category.Key==ICategorizedClassifierConstants.POSSITIVE_CATEGORY)
            //        {
            //            classifier.TeachNonMatch(ICategorizedClassifierConstants.NEGATIVE_CATEGORY, word);
            //            classifier.TeachNonMatch(ICategorizedClassifierConstants.NEUTRAL_CATEGORY, word);
            //        }
            //        else if (category.Key == ICategorizedClassifierConstants.NEGATIVE_CATEGORY)
            //        {
            //            classifier.TeachNonMatch(ICategorizedClassifierConstants.POSSITIVE_CATEGORY, word);
            //            classifier.TeachNonMatch(ICategorizedClassifierConstants.NEUTRAL_CATEGORY, word);
            //        }
            //        else
            //        {
            //            classifier.TeachNonMatch(ICategorizedClassifierConstants.NEGATIVE_CATEGORY, word);
            //            classifier.TeachNonMatch(ICategorizedClassifierConstants.POSSITIVE_CATEGORY, word);
            //        }

            //    }
            //}



            return countTrainedNews;
        }