private float ComputeLossFunction(Centroids centroids, List <DataVector> labeledData) { float d = 0; for (int i = 0; i < centroids.Points.Count; i++) { DataVector centroid = centroids.Points[i]; List <DataVector> slice = labeledData.Where(v => v.Label == centroid.Label).ToList(); d += centroid.DistanceTo(slice); } return(d); }
public LegacyKMeansTask( DataPartitionCache dataPartition, [Parameter(Value = typeof(KMeansConfiguratioinOptions.K))] int clustersNumber, [Parameter(Value = typeof(KMeansConfiguratioinOptions.ExecutionDirectory))] string executionDirectory) { _dataPartition = dataPartition; _clustersNum = clustersNumber; _kMeansExecutionDirectory = executionDirectory; if (_centroids == null) { string centroidFile = Path.Combine(_kMeansExecutionDirectory, Constants.CentroidsFile); _centroids = new Centroids(DataPartitionCache.ReadDataFile(centroidFile)); } }
public byte[] Call(byte[] memento) { // TODO: this belongs to dedicated data loader layer, will refactor once we have that _groupCommClient.Initialize(); string centroidFile = Path.Combine(_kMeansExecutionDirectory, Constants.CentroidsFile); _centroids = new Centroids(DataPartitionCache.ReadDataFile(centroidFile)); float loss = float.MaxValue; float newLoss; while (true) { if (_isInitialIteration) { // broadcast initial centroids to all slave nodes Logger.Log(Level.Info, "Broadcasting INITIAL centroids to all slave nodes: " + _centroids); _isInitialIteration = false; } else { ProcessedResults results = _meansReducerReceiver.Reduce(); _centroids = new Centroids(results.Means.Select(m => m.Mean).ToList()); Logger.Log(Level.Info, "Broadcasting new centroids to all slave nodes: " + _centroids); newLoss = results.Loss; Logger.Log(Level.Info, string.Format(CultureInfo.InvariantCulture, "The new loss value {0} at iteration {1} ", newLoss, _iteration)); if (newLoss > loss) { _controlBroadcastSender.Send(ControlMessage.STOP); throw new InvalidOperationException( string.Format(CultureInfo.InvariantCulture, "The new loss {0} is larger than previous loss {1}, while loss function must be monotonically decreasing across iterations", newLoss, loss)); } else if (newLoss.Equals(loss)) { Logger.Log(Level.Info, string.Format(CultureInfo.InvariantCulture, "KMeans clustering has converged with a loss value of {0} at iteration {1} ", newLoss, _iteration)); break; } else { loss = newLoss; } } _controlBroadcastSender.Send(ControlMessage.RECEIVE); _dataBroadcastSender.Send(_centroids); _iteration++; } _controlBroadcastSender.Send(ControlMessage.STOP); return(null); }
public void LabelData(Centroids centroids) { foreach (DataVector vector in DataVectors) { float minimumDistance = float.MaxValue; foreach (DataVector centroid in centroids.Points) { float d = vector.DistanceTo(centroid); if (d < minimumDistance) { vector.Label = centroid.Label; minimumDistance = d; } } } }
public byte[] Call(byte[] memento) { while (true) { if (_controlBroadcastReceiver.Receive() == ControlMessage.STOP) { break; } Centroids centroids = _dataBroadcastReceiver.Receive(); // we compute the loss here before data is relabled, this does not reflect the latest clustering result at the end of current iteration, // but it will save another round of group communications in each iteration _logger.Log(Level.Info, "Received centroids from master: " + centroids); _dataPartition.LabelData(centroids); ProcessedResults partialMeans = new ProcessedResults(ComputePartialMeans(), ComputeLossFunction(centroids, _dataPartition.DataVectors)); _logger.Log(Level.Info, "Sending partial means: " + partialMeans); _partialMeansSender.Send(partialMeans); } return(null); }
public byte[] CallWithWritingToFileSystem(byte[] memento) { string centroidFile = Path.Combine(_kMeansExecutionDirectory, Constants.CentroidsFile); _centroids = new Centroids(DataPartitionCache.ReadDataFile(centroidFile)); _dataPartition.LabelData(_centroids); _partialMeans = ComputePartialMeans(); // should be replaced with Group Communication using (StreamWriter writer = new StreamWriter( File.OpenWrite(Path.Combine(_kMeansExecutionDirectory, Constants.DataDirectory, Constants.PartialMeanFilePrefix + _dataPartition.Partition)))) { for (int i = 0; i < _partialMeans.Count; i++) { writer.WriteLine(_partialMeans[i].ToString()); } writer.Close(); } return(null); }
private float ComputeLossFunction(Centroids centroids, List<DataVector> labeledData) { float d = 0; for (int i = 0; i < centroids.Points.Count; i++) { DataVector centroid = centroids.Points[i]; List<DataVector> slice = labeledData.Where(v => v.Label == centroid.Label).ToList(); d += centroid.DistanceTo(slice); } return d; }