Ejemplo n.º 1
0
 public static PartialMeanContract Create(PartialMean partialMean)
 {
     return(new PartialMeanContract
     {
         DataVectContract = DataVectorContract.Create(partialMean.Mean),
         Size = partialMean.Size
     });
 }
Ejemplo n.º 2
0
 public static PartialMeanContract Create(PartialMean partialMean)
 {
     return new PartialMeanContract
     {
         DataVectContract = DataVectorContract.Create(partialMean.Mean),
         Size = partialMean.Size
     };
 }
Ejemplo n.º 3
0
        //// TODO[JIRA REEF-1184]: add timeout 180 sec
        public void TestKMeansOnDirectRunViaFileSystem()
        {
            int    iteration          = 0;
            string executionDirectory = Path.Combine(Directory.GetCurrentDirectory(),
                                                     string.Join("-", Constants.KMeansExecutionBaseDirectory, Guid.NewGuid().ToString("N").Substring(0, 4)));
            string            dataFilePath = GenerateDataFileAndGetPath();
            List <DataVector> centroids    = DataVector.ShuffleDataAndGetInitialCentriods(dataFilePath, Partitions, K, executionDirectory);

            // initialize all tasks
            List <LegacyKMeansTask> tasks       = new List <LegacyKMeansTask>();
            List <DataVector>       labeledData = new List <DataVector>();

            for (int i = 0; i < Partitions; i++)
            {
                DataPartitionCache p = new DataPartitionCache(i, executionDirectory);
                tasks.Add(new LegacyKMeansTask(p, K, executionDirectory));
                labeledData.AddRange(p.DataVectors);
            }

            float loss = float.MaxValue;

            while (true)
            {
                for (int i = 0; i < Partitions; i++)
                {
                    tasks[i].CallWithWritingToFileSystem(null);
                }
                List <DataVector> newCentroids = PartialMean.AggregateTrueMeansToFileSystem(Partitions, K, executionDirectory);
                DataVector.WriteToCentroidFile(newCentroids, executionDirectory);
                centroids = newCentroids;
                float newLoss = LegacyKMeansTask.ComputeLossFunction(centroids, labeledData);
                if (newLoss > loss)
                {
                    throw new InvalidOperationException(
                              string.Format(CultureInfo.InvariantCulture, "The new loss {0} is larger than previous loss {1}, while loss function must be monotonically decreasing across iterations", newLoss, loss));
                }
                else if (newLoss.Equals(loss))
                {
                    Console.WriteLine(string.Format(CultureInfo.InvariantCulture, "KMeans clustering has converged with a loss value of {0} at iteration {1} ", newLoss, iteration));
                    break;
                }
                else
                {
                    loss = newLoss;
                }
                iteration++;
            }

            // cleanup workspace
            try
            {
                Directory.Delete(executionDirectory, true);
            }
            catch (Exception)
            {
                // do not fail if clean up is unsuccessful
            }
        }