예제 #1
0
 public KMeansTaskData(KMeansTaskData task)
     : base(task)
 {
     this.TaskID = task.TaskID;
     this.PartitionNumber = task.PartitionNumber;
     this.M = task.M;
     this.Centroids = task.Centroids;
     this.TaskStartTime = task.TaskStartTime;
     this.Iteration = task.Iteration;
     this.BuddyGroup = task.BuddyGroup;
 }
예제 #2
0
 public KMeansTaskData(KMeansTaskData task)
     : base(task)
 {
     this.TaskID          = task.TaskID;
     this.PartitionNumber = task.PartitionNumber;
     this.M             = task.M;
     this.Centroids     = task.Centroids;
     this.TaskStartTime = task.TaskStartTime;
     this.Iteration     = task.Iteration;
     this.BuddyGroup    = task.BuddyGroup;
 }
예제 #3
0
        /// <summary>
        /// Enqueues M messages into a queue. Each message is an instruction to a worker to process a partition of the k-means data.
        /// </summary>
        public void EnqueueTasks(IEnumerable<Worker> workers)
        {
            AzureHelper.LogPerformance(() =>
            {
                int workerNumber = 0;

                // Loop through the known workers and give them each a chunk of the points.
                // Note: This loop must execute in the same order every time, otherwise caching will not work -- the workers will get a different workerNumber each time and therefore a different chunk of the points.
                // We use OrderBy on the PartitionKey to guarantee stable ordering.
                foreach (Worker worker in workers.OrderBy(worker => worker.PartitionKey))
                {
                    KMeansTaskData taskData = new KMeansTaskData(jobData, Guid.NewGuid(), workerNumber++, workers.Count(), Centroids.Uri, DateTime.UtcNow, IterationCount, worker.BuddyGroupID);
                    taskData.Points = Points.Uri;

                    tasks.Add(new KMeansTask(taskData));

                    AzureHelper.EnqueueMessage(AzureHelper.GetWorkerRequestQueue(worker.PartitionKey), taskData, true);
                }
            }, jobData.JobID.ToString(), methodName: "EnqueueTasks", iterationCount: IterationCount, points: Points.Uri.ToString(), centroids: Centroids.Uri.ToString(), machineID: MachineID);
        }
예제 #4
0
        /// <summary>
        /// Enqueues M messages into a queue. Each message is an instruction to a worker to process a partition of the k-means data.
        /// </summary>
        public void EnqueueTasks(IEnumerable <Worker> workers)
        {
            AzureHelper.LogPerformance(() =>
            {
                int workerNumber = 0;

                // Loop through the known workers and give them each a chunk of the points.
                // Note: This loop must execute in the same order every time, otherwise caching will not work -- the workers will get a different workerNumber each time and therefore a different chunk of the points.
                // We use OrderBy on the PartitionKey to guarantee stable ordering.
                foreach (Worker worker in workers.OrderBy(worker => worker.PartitionKey))
                {
                    KMeansTaskData taskData = new KMeansTaskData(jobData, Guid.NewGuid(), workerNumber++, workers.Count(), Centroids.Uri, DateTime.UtcNow, IterationCount, worker.BuddyGroupID);
                    taskData.Points         = Points.Uri;

                    tasks.Add(new KMeansTask(taskData));

                    AzureHelper.EnqueueMessage(AzureHelper.GetWorkerRequestQueue(worker.PartitionKey), taskData, true);
                }
            }, jobData.JobID.ToString(), methodName: "EnqueueTasks", iterationCount: IterationCount, points: Points.Uri.ToString(), centroids: Centroids.Uri.ToString(), machineID: MachineID);
        }
        public void SavePointsProcessedDataByCentroidTest()
        {
            KMeansTaskData task = new KMeansTaskData(Guid.NewGuid(), Guid.NewGuid(), 1, null, 2, 3, 10, 0, null, DateTime.Now, DateTime.Now, 0, null);
            KMeansTaskResult target = new KMeansTaskResult(task);
            target.PointsProcessedDataByCentroid[Guid.NewGuid()] = new PointsProcessedData()
            {
                NumPointsProcessed = 100,
                PartialPointSum = new Point(10, -10)
            };
            target.PointsProcessedDataByCentroid[Guid.NewGuid()] = new PointsProcessedData()
            {
                NumPointsProcessed = 100,
                PartialPointSum = new Point(10, -10)
            };
            target.SavePointsProcessedDataByCentroid();

            foreach (KeyValuePair<Guid, PointsProcessedData> pair in target.PointsProcessedDataByCentroid)
            {
                Assert.IsTrue(target.PointsProcessedDataByCentroidList.Contains(pair));
            }
        }
        public void AssignClusterPointToNearestCentroidTest()
        {
            KMeansTaskData task = new KMeansTaskData(Guid.NewGuid(), Guid.NewGuid(), 1, null, 2, 3, 10, 0, null, DateTime.Now, DateTime.Now, 0, null);
            KMeansTaskProcessor_Accessor target = new KMeansTaskProcessor_Accessor(task);

            target.centroids = new List<Centroid>();
            target.centroids.Add(new Centroid
            {
                ID = Guid.NewGuid(),
                X = 0.0F,
                Y = -1.0F
            });
            target.centroids.Add(new Centroid
            {
                ID = Guid.NewGuid(),
                X = 10.0F,
                Y = 10.0F
            });

            ClusterPoint clusterPoint = new ClusterPoint
            {
                CentroidID = Guid.Empty,
                X = 1.0F,
                Y = 2.0F
            };

            ClusterPoint expected = new ClusterPoint
            {
                CentroidID = target.centroids[0].ID,
                X = 1.0F,
                Y = 2.0F
            };
            ClusterPointProcessingResult_Accessor actual;
            actual = target.AssignClusterPointToNearestCentroid(clusterPoint);

            Assert.AreEqual(expected.CentroidID, actual.Point.CentroidID);
        }
예제 #7
0
 public KMeansTask(KMeansTaskData taskData, bool running = true)
 {
     this.TaskData = taskData;
     this.Running  = running;
 }
예제 #8
0
 public KMeansTaskResult(KMeansTaskData task)
     : base(task)
 {
     NumPointsChanged = 0;
     PointsProcessedDataByCentroid = new Dictionary <Guid, PointsProcessedData>();
 }
예제 #9
0
 private void UpdateBuddyGroup(KMeansTaskData task)
 {
     buddies = AzureHelper.WorkerStatsReporter.WorkersInBuddyGroup(task.BuddyGroup).ToList();
 }
예제 #10
0
        private bool ProcessNewTask(KMeansTaskData task)
        {
            System.Diagnostics.Trace.TraceInformation("[WorkerRole] ProcessNewTask(jobID={1}, taskID={0})", task.TaskID, task.JobID);

            UpdateBuddyGroup(task);

            AzureHelper.LogPerformance(() =>
            {
                // Process the taskData
                KMeansTaskProcessor taskProcessor = new KMeansTaskProcessor(task);
                taskProcessor.Run();

                // Send the result back
                taskProcessor.TaskResult.SavePointsProcessedDataByCentroid();
                AzureHelper.EnqueueMessage(AzureHelper.WorkerResponseQueue, taskProcessor.TaskResult);
            }, jobID: task.JobID.ToString(), methodName: "ProcessNewTask", iterationCount: task.Iteration, points: task.Points.ToString(), centroids: task.Centroids.ToString(), machineID: machineID);

            return true;
        }
        public KMeansTaskProcessor(KMeansTaskData task)
        {
            this.task = task;

            this.TaskResult = new KMeansTaskResult(task);
        }
        public KMeansTaskProcessor(KMeansTaskData task)
        {
            this.task = task;

            this.TaskResult = new KMeansTaskResult(task);
        }
예제 #13
0
 public KMeansTask(KMeansTaskData taskData, bool running = true)
 {
     this.TaskData = taskData;
     this.Running = running;
 }
예제 #14
0
 public KMeansTaskResult(KMeansTaskData task)
     : base(task)
 {
     NumPointsChanged = 0;
     PointsProcessedDataByCentroid = new Dictionary<Guid, PointsProcessedData>();
 }
예제 #15
0
        public void ProcessWorkerResponseTest()
        {
            KMeansJobData jobData = new KMeansJobData(Guid.NewGuid(), 4, null, 2, 10, DateTime.Now);
            KMeansJob_Accessor target = new KMeansJob_Accessor(jobData, "server");
            target.InitializeStorage();

            // Upload a block with an arbitrary ClusterPoint, so we can verify it gets copied
            ClusterPoint arbitraryPoint = new ClusterPoint(1, 2, Guid.NewGuid());
            List<string> blockList;
            using (ObjectCachedBlockWriter<ClusterPoint> pointPartitionWriteStream = new ObjectCachedBlockWriter<ClusterPoint>(target.Points, point => point.ToByteArray(), ClusterPoint.Size,
                Environment.GetEnvironmentVariable("TEMP") + @"\" + Guid.NewGuid().ToString()))
            {
                pointPartitionWriteStream.Write(arbitraryPoint);
                pointPartitionWriteStream.FlushBlock();
                blockList = pointPartitionWriteStream.BlockList;
            }

            KMeansTaskData taskData = new KMeansTaskData(jobData, Guid.NewGuid(), 0, 1, target.Centroids.Uri, DateTime.Now, 0, null);

            target.tasks.Clear();
            target.tasks.Add(new KMeansTask(taskData));

            KMeansTaskResult taskResult = new KMeansTaskResult(taskData);
            CloudBlob pointsBlockListBlob = AzureHelper.CreateBlob(jobData.JobID.ToString(), Guid.NewGuid().ToString());
            using (Stream stream = pointsBlockListBlob.OpenWrite())
            {
                BinaryFormatter bf = new BinaryFormatter();
                bf.Serialize(stream, blockList);
            }
            taskResult.PointsBlockListBlob = pointsBlockListBlob.Uri;
            taskResult.NumPointsChanged = 2;
            Guid centroidID = Guid.NewGuid();
            taskResult.PointsProcessedDataByCentroid = new Dictionary<Guid, PointsProcessedData> {
                { centroidID, new PointsProcessedData() {
                        NumPointsProcessed = 2,
                        PartialPointSum = new Point(1, 2)
                    }
                }
            };
            target.ProcessWorkerResponse(taskResult, new List<Worker>());

            // Verify that the first ClusterPoint in Points is indeed equal to arbitraryPoint
            using (ObjectStreamReader<ClusterPoint> pointsStream = new ObjectStreamReader<ClusterPoint>(target.Points, ClusterPoint.FromByteArray, ClusterPoint.Size))
            {
                ClusterPoint point = pointsStream.First();
                Assert.AreEqual(arbitraryPoint.X, point.X);
                Assert.AreEqual(arbitraryPoint.Y, point.Y);
                Assert.AreEqual(arbitraryPoint.CentroidID, point.CentroidID);
            }
        }