public void TestKnearestClassification() { var r = new Random(); var clusters = new Dictionary <int, IEnumerable <GenericVector> >(); var samplePoint = new GenericVector(96, 86); var clus = 1; for (int i = 50; i < 201; i += 50) { var sampleData = new List <GenericVector>(); for (int j = 0; j < 4; j++) { sampleData.Add(new GenericVector(r.Next(i - 10, i), r.Next(i - 10, i))); } clusters[clus++] = sampleData; } var kNearest = new KnearestClassification(clusters, 3); Assert.Equal(2, kNearest.ClassifyPoint(samplePoint)); }
public string CreateGraph(Stud dataA, Stud dataB, bool kmeans, bool dbscan, bool linearregression, bool polynomialregression, bool pearsoncorrelation, bool spearmancorrelation, bool knearest, bool naivebayes) { var gradedStudents = Students.StudentsGraded; var classifyungraded = knearest || naivebayes; if (classifyungraded) { var grades = new Dictionary <int, double>(); var clusters = new Dbscan(210, 3, Students.StudentsGraded.Select(x => x.ToGenericVector(Stud.Attempts, Stud.Class, Stud.FailRatio, Stud.Fails, Stud.Succeeds, Stud.SuccessRatio, Stud.Grade))); foreach (var cluster in clusters.DataClusters) { grades[cluster.Key] = cluster.Value.Sum(x => x[6]) / cluster.Value.Count(); } Classification classification = new NaiveBayesClassification(clusters.DataClusters, 50000); if (knearest) { classification = new KnearestClassification(clusters.DataClusters, 5); } foreach (var student in Students.StudentsUngraded) { var cluster = classification.ClassifyPoint(student.ToGenericVector(Stud.Attempts, Stud.Class, Stud.FailRatio, Stud.Fails, Stud.Succeeds, Stud.SuccessRatio, Stud.Grade)); student.Grade = (int)grades[cluster]; gradedStudents.Add(student); } } var list = new List <int>(); var a = list .GroupBy(x => x) .Select(x => x.OrderBy(y => y)) .Select(x => x.First()); foreach (var student in gradedStudents) { student.Filter(); } var data = new Dataset(gradedStudents.Select(x => x.ToGenericVector(dataA, dataB))); var highChart = new HighchartsAdapter(Highchart.Scatterplot); //Dbscan removes outliers, so we have to change are dataset afterwards if (dbscan) { var newData = new List <GenericVector>(); var dBscan = new Dbscan(50, 3, data); foreach (var cluster in dBscan.DataClusters) { newData.AddRange(cluster.Value); } data = new Dataset(newData); highChart.AddClusters(dBscan); } if (kmeans) { highChart.AddClusters(new Kmeans(4, 100, data)); } if (linearregression) { highChart.AddRegression(new LinearRegression(data.Select(x => x.ToVector2()))); } if (polynomialregression) { highChart.AddRegression(new PolynomialRegression(data.Select(x => x.ToVector2()), 3)); } if (pearsoncorrelation) { highChart.AddCorrelation(new PearsonCorrelation(data.Select(x => x.ToVector2()))); } if (spearmancorrelation) { highChart.AddCorrelation(new SpearmanCorrelation(data.Select(x => x.ToVector2()))); } highChart.SetDivId("plotkmeans"); highChart.SetTitle($"{dataA} vs {dataB}"); highChart.SetXlabel(dataA.ToString()); highChart.SetYlabel(dataB.ToString()); return(highChart.CreateTemplate()); }