public string ClassifyArticle(PreprocessedArticle article) { KnnArticle knnArticle = new KnnArticle { Label = article.Label, FeaturesVector = FeaturesVectorService.GetFeaturesVector(article) }; List <(string, double)> neighbors = new List <(string, double)>(); foreach (KnnArticle classifiedArticle in ClassifiedArticles) { neighbors.Add((classifiedArticle.PredictedLabel, Metric.CalculateDistance(classifiedArticle.FeaturesVector, knnArticle.FeaturesVector))); } var orderedNeighbors = neighbors.OrderBy(t => t.Item2).Take(K).ToList(); var distinctNeighborsCount = orderedNeighbors.GroupBy(t => t.Item1).Select(g => (g.Key, g.Count())).OrderByDescending(e => e.Item2).ToList(); var neighborsWithSameCount = distinctNeighborsCount.Where(t => t.Item2 == distinctNeighborsCount[0].Item2).ToList(); if (neighborsWithSameCount.Count != 1) { var orderedNeighborsWithSameCount = orderedNeighbors.Where(c => neighborsWithSameCount.Select(d => d.Item1).Contains(c.Item1)); var distinctNeighborsSum = orderedNeighborsWithSameCount.GroupBy(t => t.Item1).Select(g => (g.Key, g.Sum(t => t.Item2))).OrderBy(e => e.Item2).ToList(); knnArticle.PredictedLabel = distinctNeighborsSum[0].Item1; ClassifiedArticles.Add(knnArticle); return(distinctNeighborsSum[0].Item1); } knnArticle.PredictedLabel = distinctNeighborsCount[0].Item1; ClassifiedArticles.Add(knnArticle); return(distinctNeighborsCount[0].Item1); }
public Dictionary <string, Dictionary <string, int> > CalculateConfusionMatrix(List <string> tags) { Dictionary <string, Dictionary <string, int> > matrix = new Dictionary <string, Dictionary <string, int> >(); foreach (var tag in tags) { Dictionary <string, int> row = new Dictionary <string, int>(); foreach (var tag1 in tags) { row.Add(tag1, 0); } matrix.Add(tag, row); } foreach (KnnArticle classifiedArticle in ClassifiedArticles.Skip(coldStartNumber)) { matrix[classifiedArticle.Label][classifiedArticle.PredictedLabel]++; } return(matrix); }