public static List <DataVector> AggregateTrueMeansToFileSystem(int partitionsNum, int clustersNum, string executionDirectory) { List <PartialMean> partialMeans = new List <PartialMean>(); for (int i = 0; i < partitionsNum; i++) { // should be replaced with Group Communication string path = Path.Combine(executionDirectory, Constants.DataDirectory, Constants.PartialMeanFilePrefix + i.ToString(CultureInfo.InvariantCulture)); FileStream file = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read); using (StreamReader reader = new StreamReader(file)) { int index = 0; while (!reader.EndOfStream) { string line = reader.ReadLine(); if (index++ < clustersNum) { partialMeans.Add(PartialMean.FromString(line)); } } reader.Close(); } } List <DataVector> newCentroids = new List <DataVector>(); for (int i = 0; i < clustersNum; i++) { List <PartialMean> means = partialMeans.Where(m => m.Mean.Label == i).ToList(); newCentroids.Add(PartialMean.AggreatedMean(means)); } return(newCentroids); }
public ProcessedResults Reduce(IEnumerable <ProcessedResults> elements) { List <PartialMean> aggregatedMeans = new List <PartialMean>(); List <PartialMean> totalList = new List <PartialMean>(); float aggregatedLoss = 0; foreach (var element in elements) { totalList.AddRange(element.Means); aggregatedLoss += element.Loss; } // we infer the value of K from the labeled data int clustersNum = totalList.Max(p => p.Mean.Label) + 1; for (int i = 0; i < clustersNum; i++) { List <PartialMean> means = totalList.Where(m => m.Mean.Label == i).ToList(); aggregatedMeans.Add(new PartialMean(PartialMean.AggreatedMean(means), means.Count)); } ProcessedResults returnMeans = new ProcessedResults(aggregatedMeans, aggregatedLoss); Logger.Log(Level.Info, "The true means aggregated by the reduce function: " + returnMeans); return(returnMeans); }
public static DataVector AggreatedMean(List <PartialMean> means) { if (means == null || means.Count == 0) { throw new ArgumentException("means"); } PartialMean mean = means[0]; for (int i = 1; i < means.Count; i++) { mean = mean.CombinePartialMean(means[i]); } return(mean.Mean); }
private PartialMean CombinePartialMean(PartialMean other) { PartialMean aggreatedMean = new PartialMean(); if (other == null) { throw new ArgumentNullException("other"); } if (Mean.Label != other.Mean.Label) { throw new ArgumentException("cannot combine partial means with different labels"); } aggreatedMean.Size = Size + other.Size; aggreatedMean.Mean = Mean.MultiplyScalar(Size).Add(other.Mean.MultiplyScalar(other.Size)).Normalize(aggreatedMean.Size); return(aggreatedMean); }
public List <PartialMean> ComputePartialMeans() { List <PartialMean> partialMeans = new PartialMean[_clustersNum].ToList(); for (int i = 0; i < _clustersNum; i++) { List <DataVector> slices = _dataPartition.DataVectors.Where(d => d.Label == i).ToList(); DataVector average = new DataVector(_dataPartition.DataVectors[0].Dimension); if (slices.Count > 1) { average = DataVector.Mean(slices); } average.Label = i; partialMeans[i] = new PartialMean(average, slices.Count); } return(partialMeans); }
private List <PartialMean> ComputePartialMeans() { Logger.Log(Level.Verbose, "cluster number " + _clustersNum); List <PartialMean> partialMeans = new PartialMean[_clustersNum].ToList(); for (int i = 0; i < _clustersNum; i++) { List <DataVector> slices = _dataPartition.DataVectors.Where(d => d.Label == i).ToList(); DataVector average = new DataVector(_dataPartition.DataVectors[0].Dimension); if (slices.Count > 1) { average = DataVector.Mean(slices); } average.Label = i; partialMeans[i] = new PartialMean(average, slices.Count); Logger.Log(Level.Info, "Adding to partial means list: " + partialMeans[i]); } return(partialMeans); }
private List<PartialMean> ComputePartialMeans() { Logger.Log(Level.Verbose, "cluster number " + _clustersNum); List<PartialMean> partialMeans = new PartialMean[_clustersNum].ToList(); for (int i = 0; i < _clustersNum; i++) { List<DataVector> slices = _dataPartition.DataVectors.Where(d => d.Label == i).ToList(); DataVector average = new DataVector(_dataPartition.DataVectors[0].Dimension); if (slices.Count > 1) { average = DataVector.Mean(slices); } average.Label = i; partialMeans[i] = new PartialMean(average, slices.Count); Logger.Log(Level.Info, "Adding to partial means list: " + partialMeans[i]); } return partialMeans; }
private PartialMean CombinePartialMean(PartialMean other) { PartialMean aggreatedMean = new PartialMean(); if (other == null) { throw new ArgumentNullException("other"); } if (Mean.Label != other.Mean.Label) { throw new ArgumentException("cannot combine partial means with different labels"); } aggreatedMean.Size = Size + other.Size; aggreatedMean.Mean = Mean.MultiplyScalar(Size).Add(other.Mean.MultiplyScalar(other.Size)).Normalize(aggreatedMean.Size); return aggreatedMean; }