public static List <DataVector> AggregateTrueMeansToFileSystem(int partitionsNum, int clustersNum, string executionDirectory) { List <PartialMean> partialMeans = new List <PartialMean>(); for (int i = 0; i < partitionsNum; i++) { // should be replaced with Group Communication string path = Path.Combine(executionDirectory, Constants.DataDirectory, Constants.PartialMeanFilePrefix + i.ToString(CultureInfo.InvariantCulture)); FileStream file = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read); using (StreamReader reader = new StreamReader(file)) { int index = 0; while (!reader.EndOfStream) { string line = reader.ReadLine(); if (index++ < clustersNum) { partialMeans.Add(PartialMean.FromString(line)); } } reader.Close(); } } List <DataVector> newCentroids = new List <DataVector>(); for (int i = 0; i < clustersNum; i++) { List <PartialMean> means = partialMeans.Where(m => m.Mean.Label == i).ToList(); newCentroids.Add(PartialMean.AggreatedMean(means)); } return(newCentroids); }
public ProcessedResults Reduce(IEnumerable <ProcessedResults> elements) { List <PartialMean> aggregatedMeans = new List <PartialMean>(); List <PartialMean> totalList = new List <PartialMean>(); float aggregatedLoss = 0; foreach (var element in elements) { totalList.AddRange(element.Means); aggregatedLoss += element.Loss; } // we infer the value of K from the labeled data int clustersNum = totalList.Max(p => p.Mean.Label) + 1; for (int i = 0; i < clustersNum; i++) { List <PartialMean> means = totalList.Where(m => m.Mean.Label == i).ToList(); aggregatedMeans.Add(new PartialMean(PartialMean.AggreatedMean(means), means.Count)); } ProcessedResults returnMeans = new ProcessedResults(aggregatedMeans, aggregatedLoss); Logger.Log(Level.Info, "The true means aggregated by the reduce function: " + returnMeans); return(returnMeans); }