Beispiel #1
0
        private float ComputeLossFunction(Centroids centroids, List <DataVector> labeledData)
        {
            float d = 0;

            for (int i = 0; i < centroids.Points.Count; i++)
            {
                DataVector        centroid = centroids.Points[i];
                List <DataVector> slice    = labeledData.Where(v => v.Label == centroid.Label).ToList();
                d += centroid.DistanceTo(slice);
            }
            return(d);
        }
 public LegacyKMeansTask(
     DataPartitionCache dataPartition,
     [Parameter(Value = typeof(KMeansConfiguratioinOptions.K))] int clustersNumber,
     [Parameter(Value = typeof(KMeansConfiguratioinOptions.ExecutionDirectory))] string executionDirectory)
 {
     _dataPartition            = dataPartition;
     _clustersNum              = clustersNumber;
     _kMeansExecutionDirectory = executionDirectory;
     if (_centroids == null)
     {
         string centroidFile = Path.Combine(_kMeansExecutionDirectory, Constants.CentroidsFile);
         _centroids = new Centroids(DataPartitionCache.ReadDataFile(centroidFile));
     }
 }
Beispiel #3
0
        public byte[] Call(byte[] memento)
        {
            // TODO: this belongs to dedicated data loader layer, will refactor once we have that
            _groupCommClient.Initialize();
            string centroidFile = Path.Combine(_kMeansExecutionDirectory, Constants.CentroidsFile);

            _centroids = new Centroids(DataPartitionCache.ReadDataFile(centroidFile));

            float loss = float.MaxValue;
            float newLoss;

            while (true)
            {
                if (_isInitialIteration)
                {
                    // broadcast initial centroids to all slave nodes
                    Logger.Log(Level.Info, "Broadcasting INITIAL centroids to all slave nodes: " + _centroids);
                    _isInitialIteration = false;
                }
                else
                {
                    ProcessedResults results = _meansReducerReceiver.Reduce();
                    _centroids = new Centroids(results.Means.Select(m => m.Mean).ToList());
                    Logger.Log(Level.Info, "Broadcasting new centroids to all slave nodes: " + _centroids);
                    newLoss = results.Loss;
                    Logger.Log(Level.Info, string.Format(CultureInfo.InvariantCulture, "The new loss value {0} at iteration {1} ", newLoss, _iteration));
                    if (newLoss > loss)
                    {
                        _controlBroadcastSender.Send(ControlMessage.STOP);
                        throw new InvalidOperationException(
                                  string.Format(CultureInfo.InvariantCulture, "The new loss {0} is larger than previous loss {1}, while loss function must be monotonically decreasing across iterations", newLoss, loss));
                    }
                    else if (newLoss.Equals(loss))
                    {
                        Logger.Log(Level.Info, string.Format(CultureInfo.InvariantCulture, "KMeans clustering has converged with a loss value of {0} at iteration {1} ", newLoss, _iteration));
                        break;
                    }
                    else
                    {
                        loss = newLoss;
                    }
                }
                _controlBroadcastSender.Send(ControlMessage.RECEIVE);
                _dataBroadcastSender.Send(_centroids);
                _iteration++;
            }
            _controlBroadcastSender.Send(ControlMessage.STOP);
            return(null);
        }
 public void LabelData(Centroids centroids)
 {
     foreach (DataVector vector in DataVectors)
     {
         float minimumDistance = float.MaxValue;
         foreach (DataVector centroid in centroids.Points)
         {
             float d = vector.DistanceTo(centroid);
             if (d < minimumDistance)
             {
                 vector.Label    = centroid.Label;
                 minimumDistance = d;
             }
         }
     }
 }
Beispiel #5
0
        public byte[] Call(byte[] memento)
        {
            while (true)
            {
                if (_controlBroadcastReceiver.Receive() == ControlMessage.STOP)
                {
                    break;
                }
                Centroids centroids = _dataBroadcastReceiver.Receive();
                // we compute the loss here before data is relabled, this does not reflect the latest clustering result at the end of current iteration,
                // but it will save another round of group communications in each iteration
                _logger.Log(Level.Info, "Received centroids from master: " + centroids);
                _dataPartition.LabelData(centroids);
                ProcessedResults partialMeans = new ProcessedResults(ComputePartialMeans(), ComputeLossFunction(centroids, _dataPartition.DataVectors));
                _logger.Log(Level.Info, "Sending partial means: " + partialMeans);
                _partialMeansSender.Send(partialMeans);
            }

            return(null);
        }
        public byte[] CallWithWritingToFileSystem(byte[] memento)
        {
            string centroidFile = Path.Combine(_kMeansExecutionDirectory, Constants.CentroidsFile);

            _centroids = new Centroids(DataPartitionCache.ReadDataFile(centroidFile));

            _dataPartition.LabelData(_centroids);
            _partialMeans = ComputePartialMeans();

            // should be replaced with Group Communication
            using (StreamWriter writer = new StreamWriter(
                       File.OpenWrite(Path.Combine(_kMeansExecutionDirectory, Constants.DataDirectory, Constants.PartialMeanFilePrefix + _dataPartition.Partition))))
            {
                for (int i = 0; i < _partialMeans.Count; i++)
                {
                    writer.WriteLine(_partialMeans[i].ToString());
                }
                writer.Close();
            }

            return(null);
        }
Beispiel #7
0
 private float ComputeLossFunction(Centroids centroids, List<DataVector> labeledData)
 {
     float d = 0;
     for (int i = 0; i < centroids.Points.Count; i++)
     {
         DataVector centroid = centroids.Points[i];
         List<DataVector> slice = labeledData.Where(v => v.Label == centroid.Label).ToList();
         d += centroid.DistanceTo(slice);
     }
     return d;
 }