public KMeansTaskData(KMeansTaskData task) : base(task) { this.TaskID = task.TaskID; this.PartitionNumber = task.PartitionNumber; this.M = task.M; this.Centroids = task.Centroids; this.TaskStartTime = task.TaskStartTime; this.Iteration = task.Iteration; this.BuddyGroup = task.BuddyGroup; }
/// <summary> /// Enqueues M messages into a queue. Each message is an instruction to a worker to process a partition of the k-means data. /// </summary> public void EnqueueTasks(IEnumerable<Worker> workers) { AzureHelper.LogPerformance(() => { int workerNumber = 0; // Loop through the known workers and give them each a chunk of the points. // Note: This loop must execute in the same order every time, otherwise caching will not work -- the workers will get a different workerNumber each time and therefore a different chunk of the points. // We use OrderBy on the PartitionKey to guarantee stable ordering. foreach (Worker worker in workers.OrderBy(worker => worker.PartitionKey)) { KMeansTaskData taskData = new KMeansTaskData(jobData, Guid.NewGuid(), workerNumber++, workers.Count(), Centroids.Uri, DateTime.UtcNow, IterationCount, worker.BuddyGroupID); taskData.Points = Points.Uri; tasks.Add(new KMeansTask(taskData)); AzureHelper.EnqueueMessage(AzureHelper.GetWorkerRequestQueue(worker.PartitionKey), taskData, true); } }, jobData.JobID.ToString(), methodName: "EnqueueTasks", iterationCount: IterationCount, points: Points.Uri.ToString(), centroids: Centroids.Uri.ToString(), machineID: MachineID); }
/// <summary> /// Enqueues M messages into a queue. Each message is an instruction to a worker to process a partition of the k-means data. /// </summary> public void EnqueueTasks(IEnumerable <Worker> workers) { AzureHelper.LogPerformance(() => { int workerNumber = 0; // Loop through the known workers and give them each a chunk of the points. // Note: This loop must execute in the same order every time, otherwise caching will not work -- the workers will get a different workerNumber each time and therefore a different chunk of the points. // We use OrderBy on the PartitionKey to guarantee stable ordering. foreach (Worker worker in workers.OrderBy(worker => worker.PartitionKey)) { KMeansTaskData taskData = new KMeansTaskData(jobData, Guid.NewGuid(), workerNumber++, workers.Count(), Centroids.Uri, DateTime.UtcNow, IterationCount, worker.BuddyGroupID); taskData.Points = Points.Uri; tasks.Add(new KMeansTask(taskData)); AzureHelper.EnqueueMessage(AzureHelper.GetWorkerRequestQueue(worker.PartitionKey), taskData, true); } }, jobData.JobID.ToString(), methodName: "EnqueueTasks", iterationCount: IterationCount, points: Points.Uri.ToString(), centroids: Centroids.Uri.ToString(), machineID: MachineID); }
public void SavePointsProcessedDataByCentroidTest() { KMeansTaskData task = new KMeansTaskData(Guid.NewGuid(), Guid.NewGuid(), 1, null, 2, 3, 10, 0, null, DateTime.Now, DateTime.Now, 0, null); KMeansTaskResult target = new KMeansTaskResult(task); target.PointsProcessedDataByCentroid[Guid.NewGuid()] = new PointsProcessedData() { NumPointsProcessed = 100, PartialPointSum = new Point(10, -10) }; target.PointsProcessedDataByCentroid[Guid.NewGuid()] = new PointsProcessedData() { NumPointsProcessed = 100, PartialPointSum = new Point(10, -10) }; target.SavePointsProcessedDataByCentroid(); foreach (KeyValuePair<Guid, PointsProcessedData> pair in target.PointsProcessedDataByCentroid) { Assert.IsTrue(target.PointsProcessedDataByCentroidList.Contains(pair)); } }
public void AssignClusterPointToNearestCentroidTest() { KMeansTaskData task = new KMeansTaskData(Guid.NewGuid(), Guid.NewGuid(), 1, null, 2, 3, 10, 0, null, DateTime.Now, DateTime.Now, 0, null); KMeansTaskProcessor_Accessor target = new KMeansTaskProcessor_Accessor(task); target.centroids = new List<Centroid>(); target.centroids.Add(new Centroid { ID = Guid.NewGuid(), X = 0.0F, Y = -1.0F }); target.centroids.Add(new Centroid { ID = Guid.NewGuid(), X = 10.0F, Y = 10.0F }); ClusterPoint clusterPoint = new ClusterPoint { CentroidID = Guid.Empty, X = 1.0F, Y = 2.0F }; ClusterPoint expected = new ClusterPoint { CentroidID = target.centroids[0].ID, X = 1.0F, Y = 2.0F }; ClusterPointProcessingResult_Accessor actual; actual = target.AssignClusterPointToNearestCentroid(clusterPoint); Assert.AreEqual(expected.CentroidID, actual.Point.CentroidID); }
public KMeansTask(KMeansTaskData taskData, bool running = true) { this.TaskData = taskData; this.Running = running; }
public KMeansTaskResult(KMeansTaskData task) : base(task) { NumPointsChanged = 0; PointsProcessedDataByCentroid = new Dictionary <Guid, PointsProcessedData>(); }
private void UpdateBuddyGroup(KMeansTaskData task) { buddies = AzureHelper.WorkerStatsReporter.WorkersInBuddyGroup(task.BuddyGroup).ToList(); }
private bool ProcessNewTask(KMeansTaskData task) { System.Diagnostics.Trace.TraceInformation("[WorkerRole] ProcessNewTask(jobID={1}, taskID={0})", task.TaskID, task.JobID); UpdateBuddyGroup(task); AzureHelper.LogPerformance(() => { // Process the taskData KMeansTaskProcessor taskProcessor = new KMeansTaskProcessor(task); taskProcessor.Run(); // Send the result back taskProcessor.TaskResult.SavePointsProcessedDataByCentroid(); AzureHelper.EnqueueMessage(AzureHelper.WorkerResponseQueue, taskProcessor.TaskResult); }, jobID: task.JobID.ToString(), methodName: "ProcessNewTask", iterationCount: task.Iteration, points: task.Points.ToString(), centroids: task.Centroids.ToString(), machineID: machineID); return true; }
public KMeansTaskProcessor(KMeansTaskData task) { this.task = task; this.TaskResult = new KMeansTaskResult(task); }
public KMeansTaskResult(KMeansTaskData task) : base(task) { NumPointsChanged = 0; PointsProcessedDataByCentroid = new Dictionary<Guid, PointsProcessedData>(); }
public void ProcessWorkerResponseTest() { KMeansJobData jobData = new KMeansJobData(Guid.NewGuid(), 4, null, 2, 10, DateTime.Now); KMeansJob_Accessor target = new KMeansJob_Accessor(jobData, "server"); target.InitializeStorage(); // Upload a block with an arbitrary ClusterPoint, so we can verify it gets copied ClusterPoint arbitraryPoint = new ClusterPoint(1, 2, Guid.NewGuid()); List<string> blockList; using (ObjectCachedBlockWriter<ClusterPoint> pointPartitionWriteStream = new ObjectCachedBlockWriter<ClusterPoint>(target.Points, point => point.ToByteArray(), ClusterPoint.Size, Environment.GetEnvironmentVariable("TEMP") + @"\" + Guid.NewGuid().ToString())) { pointPartitionWriteStream.Write(arbitraryPoint); pointPartitionWriteStream.FlushBlock(); blockList = pointPartitionWriteStream.BlockList; } KMeansTaskData taskData = new KMeansTaskData(jobData, Guid.NewGuid(), 0, 1, target.Centroids.Uri, DateTime.Now, 0, null); target.tasks.Clear(); target.tasks.Add(new KMeansTask(taskData)); KMeansTaskResult taskResult = new KMeansTaskResult(taskData); CloudBlob pointsBlockListBlob = AzureHelper.CreateBlob(jobData.JobID.ToString(), Guid.NewGuid().ToString()); using (Stream stream = pointsBlockListBlob.OpenWrite()) { BinaryFormatter bf = new BinaryFormatter(); bf.Serialize(stream, blockList); } taskResult.PointsBlockListBlob = pointsBlockListBlob.Uri; taskResult.NumPointsChanged = 2; Guid centroidID = Guid.NewGuid(); taskResult.PointsProcessedDataByCentroid = new Dictionary<Guid, PointsProcessedData> { { centroidID, new PointsProcessedData() { NumPointsProcessed = 2, PartialPointSum = new Point(1, 2) } } }; target.ProcessWorkerResponse(taskResult, new List<Worker>()); // Verify that the first ClusterPoint in Points is indeed equal to arbitraryPoint using (ObjectStreamReader<ClusterPoint> pointsStream = new ObjectStreamReader<ClusterPoint>(target.Points, ClusterPoint.FromByteArray, ClusterPoint.Size)) { ClusterPoint point = pointsStream.First(); Assert.AreEqual(arbitraryPoint.X, point.X); Assert.AreEqual(arbitraryPoint.Y, point.Y); Assert.AreEqual(arbitraryPoint.CentroidID, point.CentroidID); } }