/// <summary> /// Sums the given TaskResult's points processed data with the totals. /// </summary> /// <param name="TaskResult"></param> private void AddDataFromTaskResult(KMeansTaskResult taskResult) { TotalNumPointsChanged += taskResult.NumPointsChanged; foreach (KeyValuePair <Guid, PointsProcessedData> pointsProcessedDataForCentroid in taskResult.PointsProcessedDataByCentroid) { AddPointsProcessedDataForCentroid(pointsProcessedDataForCentroid.Key, pointsProcessedDataForCentroid.Value); } }
public void SavePointsProcessedDataByCentroidTest() { KMeansTaskData task = new KMeansTaskData(Guid.NewGuid(), Guid.NewGuid(), 1, null, 2, 3, 10, 0, null, DateTime.Now, DateTime.Now, 0, null); KMeansTaskResult target = new KMeansTaskResult(task); target.PointsProcessedDataByCentroid[Guid.NewGuid()] = new PointsProcessedData() { NumPointsProcessed = 100, PartialPointSum = new Point(10, -10) }; target.PointsProcessedDataByCentroid[Guid.NewGuid()] = new PointsProcessedData() { NumPointsProcessed = 100, PartialPointSum = new Point(10, -10) }; target.SavePointsProcessedDataByCentroid(); foreach (KeyValuePair<Guid, PointsProcessedData> pair in target.PointsProcessedDataByCentroid) { Assert.IsTrue(target.PointsProcessedDataByCentroidList.Contains(pair)); } }
/// <summary> /// Handles a worker's TaskResult from a running k-means job. Adds up the partial sums from the TaskResult. /// </summary> /// <param name="message"></param> /// <returns>False if the given taskData result has already been counted, true otherwise.</returns> public bool ProcessWorkerResponse(KMeansTaskResult taskResult, IEnumerable <Worker> workers) { // Make sure we're actually still waiting for a result for this taskData // If not, this might be a duplicate queue message if (!TaskResultMatchesRunningTask(taskResult)) { return(true); } AzureHelper.LogPerformance(() => { KMeansTask task = TaskResultWithTaskID(taskResult.TaskID); task.Running = false; // The task has returned a response, which means that it has stopped running // Add the worker's updated points blocks if (taskResult.PointsBlockListBlob != null) { using (Stream stream = AzureHelper.GetBlob(taskResult.PointsBlockListBlob).OpenRead()) { BinaryFormatter bf = new BinaryFormatter(); List <string> pointsBlockList = bf.Deserialize(stream) as List <string>; pointsBlockIDs.AddRange(pointsBlockList); } } // Copy out and integrate the data from the worker response AddDataFromTaskResult(taskResult); }, jobData.JobID.ToString(), methodName: "ProcessWorkerResponse", iterationCount: IterationCount, points: Points.Uri.ToString(), centroids: Centroids.Uri.ToString(), machineID: MachineID); // If this is the last worker to return, this iteration is done and we should start the next one if (NoMoreRunningTasks()) { NextIteration(workers); } return(true); }
/// <summary> /// Handles a worker response as part of a running k-means job. Looks up the appropriate job and passes the worker's response to it. /// </summary> /// <param name="message">The worker response. Must be of type KMeansTaskResult.</param> private bool ProcessWorkerResponse(KMeansTaskResult taskResult) { taskResult.RestorePointsProcessedDataByCentroid(); // Make sure the job belongs to this server if (!jobs.ContainsKey(taskResult.JobID)) return false; System.Diagnostics.Trace.TraceInformation("[ServerRole] ProcessWorkerResponse(jobID={0}, taskID={1}, iterationCount={2})", taskResult.JobID, taskResult.TaskID, jobs[taskResult.JobID].IterationCount); return jobs[taskResult.JobID].ProcessWorkerResponse(taskResult, workers.Values); }
public KMeansTaskProcessor(KMeansTaskData task) { this.task = task; this.TaskResult = new KMeansTaskResult(task); }
private bool TaskResultMatchesRunningTask(KMeansTaskResult taskResult) { KMeansTask task = TaskResultWithTaskID(taskResult.TaskID); return task != null && task.Running; }
/// <summary> /// Sums the given TaskResult's points processed data with the totals. /// </summary> /// <param name="TaskResult"></param> private void AddDataFromTaskResult(KMeansTaskResult taskResult) { TotalNumPointsChanged += taskResult.NumPointsChanged; foreach (KeyValuePair<Guid, PointsProcessedData> pointsProcessedDataForCentroid in taskResult.PointsProcessedDataByCentroid) { AddPointsProcessedDataForCentroid(pointsProcessedDataForCentroid.Key, pointsProcessedDataForCentroid.Value); } }
/// <summary> /// Handles a worker's TaskResult from a running k-means job. Adds up the partial sums from the TaskResult. /// </summary> /// <param name="message"></param> /// <returns>False if the given taskData result has already been counted, true otherwise.</returns> public bool ProcessWorkerResponse(KMeansTaskResult taskResult, IEnumerable<Worker> workers) { // Make sure we're actually still waiting for a result for this taskData // If not, this might be a duplicate queue message if (!TaskResultMatchesRunningTask(taskResult)) return true; AzureHelper.LogPerformance(() => { KMeansTask task = TaskResultWithTaskID(taskResult.TaskID); task.Running = false; // The task has returned a response, which means that it has stopped running // Add the worker's updated points blocks if (taskResult.PointsBlockListBlob != null) { using (Stream stream = AzureHelper.GetBlob(taskResult.PointsBlockListBlob).OpenRead()) { BinaryFormatter bf = new BinaryFormatter(); List<string> pointsBlockList = bf.Deserialize(stream) as List<string>; pointsBlockIDs.AddRange(pointsBlockList); } } // Copy out and integrate the data from the worker response AddDataFromTaskResult(taskResult); }, jobData.JobID.ToString(), methodName: "ProcessWorkerResponse", iterationCount: IterationCount, points: Points.Uri.ToString(), centroids: Centroids.Uri.ToString(), machineID: MachineID); // If this is the last worker to return, this iteration is done and we should start the next one if (NoMoreRunningTasks()) { NextIteration(workers); } return true; }
public void ProcessWorkerResponseTest() { KMeansJobData jobData = new KMeansJobData(Guid.NewGuid(), 4, null, 2, 10, DateTime.Now); KMeansJob_Accessor target = new KMeansJob_Accessor(jobData, "server"); target.InitializeStorage(); // Upload a block with an arbitrary ClusterPoint, so we can verify it gets copied ClusterPoint arbitraryPoint = new ClusterPoint(1, 2, Guid.NewGuid()); List<string> blockList; using (ObjectCachedBlockWriter<ClusterPoint> pointPartitionWriteStream = new ObjectCachedBlockWriter<ClusterPoint>(target.Points, point => point.ToByteArray(), ClusterPoint.Size, Environment.GetEnvironmentVariable("TEMP") + @"\" + Guid.NewGuid().ToString())) { pointPartitionWriteStream.Write(arbitraryPoint); pointPartitionWriteStream.FlushBlock(); blockList = pointPartitionWriteStream.BlockList; } KMeansTaskData taskData = new KMeansTaskData(jobData, Guid.NewGuid(), 0, 1, target.Centroids.Uri, DateTime.Now, 0, null); target.tasks.Clear(); target.tasks.Add(new KMeansTask(taskData)); KMeansTaskResult taskResult = new KMeansTaskResult(taskData); CloudBlob pointsBlockListBlob = AzureHelper.CreateBlob(jobData.JobID.ToString(), Guid.NewGuid().ToString()); using (Stream stream = pointsBlockListBlob.OpenWrite()) { BinaryFormatter bf = new BinaryFormatter(); bf.Serialize(stream, blockList); } taskResult.PointsBlockListBlob = pointsBlockListBlob.Uri; taskResult.NumPointsChanged = 2; Guid centroidID = Guid.NewGuid(); taskResult.PointsProcessedDataByCentroid = new Dictionary<Guid, PointsProcessedData> { { centroidID, new PointsProcessedData() { NumPointsProcessed = 2, PartialPointSum = new Point(1, 2) } } }; target.ProcessWorkerResponse(taskResult, new List<Worker>()); // Verify that the first ClusterPoint in Points is indeed equal to arbitraryPoint using (ObjectStreamReader<ClusterPoint> pointsStream = new ObjectStreamReader<ClusterPoint>(target.Points, ClusterPoint.FromByteArray, ClusterPoint.Size)) { ClusterPoint point = pointsStream.First(); Assert.AreEqual(arbitraryPoint.X, point.X); Assert.AreEqual(arbitraryPoint.Y, point.Y); Assert.AreEqual(arbitraryPoint.CentroidID, point.CentroidID); } }
public void MultiIterationJobTest() { KMeansJobData jobData = new KMeansJobData(Guid.NewGuid(), 4, null, 2, 2, DateTime.Now); List<Worker> workers = new List<Worker> { new Worker("a", "", 1), new Worker("b", "", 1) }; KMeansJob_Accessor job = new KMeansJob_Accessor(jobData, "server"); // First iteration job.InitializeStorage(); job.EnqueueTasks(workers); for (int i = 0; i < jobData.MaxIterationCount; i++) { CheckWorkerRequests(job, (from task in job.tasks where task.Running select task.TaskData), workers.Count, job.Points); // Create the worker results and send them to the job List<KMeansTaskResult> results = new List<KMeansTaskResult>(); foreach (var task in job.tasks) { var taskResult = new KMeansTaskResult(task.TaskData); taskResult.NumPointsChanged = 1; results.Add(taskResult); } foreach (var result in results) { job.ProcessWorkerResponse(result, workers); } } }
private bool TaskResultMatchesRunningTask(KMeansTaskResult taskResult) { KMeansTask task = TaskResultWithTaskID(taskResult.TaskID); return(task != null && task.Running); }