private int GetNearestCluster(GenericVector v) { var cluster = Centroids .OrderBy(Cluster => GenericVector.Distance(Cluster.Value, v)) .Select(pair => pair.Key) .FirstOrDefault(); return(cluster); }
public GenericVector Sum(GenericVector vectorToSum) { if (Size != vectorToSum.Size) { throw new Exception("GenericVector size of vectorToSum not equal to instance vector size"); } for (var i = 0; i < Points.Count; i++) { Points[i] += vectorToSum.Points[i]; } return(this); }
public static void Main(string[] args) { var dataSet = new Dictionary <int, GenericVector>(); var parsedData = File .ReadAllLines("data.csv") .Select(line => line .Split(',') .Select(float.Parse) .ToList() ) .ToList(); for (var i = 0; i < parsedData.Count(); i++) { for (var j = 0; j < parsedData[i].Count; j++) { if (!dataSet.ContainsKey(j)) { dataSet[j] = new GenericVector(); } dataSet[j].Add(parsedData[i][j]); } } var kmeans = new kmeans { Clusters = 4, DataSet = dataSet.Values.ToList(), Iterations = 100 }; kmeans.Run(); kmeans.PrintClusters(); Console.WriteLine(kmeans.SquaredErrors()); }
public double SquaredErrors() { return(DataSet .Select(x => Math.Pow(GenericVector.Distance(x, Centroids[x.Cluster]), 2)) .Sum()); }
public static double Distance(GenericVector a, GenericVector b) { var aMinusBpoints = a.Points.Select((t, i) => t - b.Points[i]).ToList(); return(Math.Sqrt(aMinusBpoints.Sum(item => Math.Pow(item, 2)))); }
public bool IsBiggerAs(GenericVector v) { return(Points.Where((p, i) => p > v.Points[i]).Count() > Points.Count); }