Example #1
0
 /// <summary>
 /// Sums the given TaskResult's points processed data with the totals.
 /// </summary>
 /// <param name="TaskResult"></param>
 private void AddDataFromTaskResult(KMeansTaskResult taskResult)
 {
     TotalNumPointsChanged += taskResult.NumPointsChanged;
     foreach (KeyValuePair <Guid, PointsProcessedData> pointsProcessedDataForCentroid in taskResult.PointsProcessedDataByCentroid)
     {
         AddPointsProcessedDataForCentroid(pointsProcessedDataForCentroid.Key, pointsProcessedDataForCentroid.Value);
     }
 }
        public void SavePointsProcessedDataByCentroidTest()
        {
            KMeansTaskData task = new KMeansTaskData(Guid.NewGuid(), Guid.NewGuid(), 1, null, 2, 3, 10, 0, null, DateTime.Now, DateTime.Now, 0, null);
            KMeansTaskResult target = new KMeansTaskResult(task);
            target.PointsProcessedDataByCentroid[Guid.NewGuid()] = new PointsProcessedData()
            {
                NumPointsProcessed = 100,
                PartialPointSum = new Point(10, -10)
            };
            target.PointsProcessedDataByCentroid[Guid.NewGuid()] = new PointsProcessedData()
            {
                NumPointsProcessed = 100,
                PartialPointSum = new Point(10, -10)
            };
            target.SavePointsProcessedDataByCentroid();

            foreach (KeyValuePair<Guid, PointsProcessedData> pair in target.PointsProcessedDataByCentroid)
            {
                Assert.IsTrue(target.PointsProcessedDataByCentroidList.Contains(pair));
            }
        }
Example #3
0
        /// <summary>
        /// Handles a worker's TaskResult from a running k-means job. Adds up the partial sums from the TaskResult.
        /// </summary>
        /// <param name="message"></param>
        /// <returns>False if the given taskData result has already been counted, true otherwise.</returns>
        public bool ProcessWorkerResponse(KMeansTaskResult taskResult, IEnumerable <Worker> workers)
        {
            // Make sure we're actually still waiting for a result for this taskData
            // If not, this might be a duplicate queue message
            if (!TaskResultMatchesRunningTask(taskResult))
            {
                return(true);
            }

            AzureHelper.LogPerformance(() =>
            {
                KMeansTask task = TaskResultWithTaskID(taskResult.TaskID);
                task.Running    = false; // The task has returned a response, which means that it has stopped running

                // Add the worker's updated points blocks
                if (taskResult.PointsBlockListBlob != null)
                {
                    using (Stream stream = AzureHelper.GetBlob(taskResult.PointsBlockListBlob).OpenRead())
                    {
                        BinaryFormatter bf            = new BinaryFormatter();
                        List <string> pointsBlockList = bf.Deserialize(stream) as List <string>;
                        pointsBlockIDs.AddRange(pointsBlockList);
                    }
                }

                // Copy out and integrate the data from the worker response
                AddDataFromTaskResult(taskResult);
            }, jobData.JobID.ToString(), methodName: "ProcessWorkerResponse", iterationCount: IterationCount, points: Points.Uri.ToString(), centroids: Centroids.Uri.ToString(), machineID: MachineID);

            // If this is the last worker to return, this iteration is done and we should start the next one
            if (NoMoreRunningTasks())
            {
                NextIteration(workers);
            }

            return(true);
        }
Example #4
0
        /// <summary>
        /// Handles a worker response as part of a running k-means job. Looks up the appropriate job and passes the worker's response to it.
        /// </summary>
        /// <param name="message">The worker response. Must be of type KMeansTaskResult.</param>
        private bool ProcessWorkerResponse(KMeansTaskResult taskResult)
        {
            taskResult.RestorePointsProcessedDataByCentroid();

            // Make sure the job belongs to this server
            if (!jobs.ContainsKey(taskResult.JobID))
                return false;

            System.Diagnostics.Trace.TraceInformation("[ServerRole] ProcessWorkerResponse(jobID={0}, taskID={1}, iterationCount={2})", taskResult.JobID, taskResult.TaskID, jobs[taskResult.JobID].IterationCount);

            return jobs[taskResult.JobID].ProcessWorkerResponse(taskResult, workers.Values);
        }
        public KMeansTaskProcessor(KMeansTaskData task)
        {
            this.task = task;

            this.TaskResult = new KMeansTaskResult(task);
        }
Example #6
0
 private bool TaskResultMatchesRunningTask(KMeansTaskResult taskResult)
 {
     KMeansTask task = TaskResultWithTaskID(taskResult.TaskID);
     return task != null && task.Running;
 }
Example #7
0
 /// <summary>
 /// Sums the given TaskResult's points processed data with the totals.
 /// </summary>
 /// <param name="TaskResult"></param>
 private void AddDataFromTaskResult(KMeansTaskResult taskResult)
 {
     TotalNumPointsChanged += taskResult.NumPointsChanged;
     foreach (KeyValuePair<Guid, PointsProcessedData> pointsProcessedDataForCentroid in taskResult.PointsProcessedDataByCentroid)
     {
         AddPointsProcessedDataForCentroid(pointsProcessedDataForCentroid.Key, pointsProcessedDataForCentroid.Value);
     }
 }
Example #8
0
        /// <summary>
        /// Handles a worker's TaskResult from a running k-means job. Adds up the partial sums from the TaskResult.
        /// </summary>
        /// <param name="message"></param>
        /// <returns>False if the given taskData result has already been counted, true otherwise.</returns>
        public bool ProcessWorkerResponse(KMeansTaskResult taskResult, IEnumerable<Worker> workers)
        {
            // Make sure we're actually still waiting for a result for this taskData
            // If not, this might be a duplicate queue message
            if (!TaskResultMatchesRunningTask(taskResult))
                return true;

            AzureHelper.LogPerformance(() =>
            {
                KMeansTask task = TaskResultWithTaskID(taskResult.TaskID);
                task.Running = false; // The task has returned a response, which means that it has stopped running

                // Add the worker's updated points blocks
                if (taskResult.PointsBlockListBlob != null)
                {
                    using (Stream stream = AzureHelper.GetBlob(taskResult.PointsBlockListBlob).OpenRead())
                    {
                        BinaryFormatter bf = new BinaryFormatter();
                        List<string> pointsBlockList = bf.Deserialize(stream) as List<string>;
                        pointsBlockIDs.AddRange(pointsBlockList);
                    }
                }

                // Copy out and integrate the data from the worker response
                AddDataFromTaskResult(taskResult);
            }, jobData.JobID.ToString(), methodName: "ProcessWorkerResponse", iterationCount: IterationCount, points: Points.Uri.ToString(), centroids: Centroids.Uri.ToString(), machineID: MachineID);

            // If this is the last worker to return, this iteration is done and we should start the next one
            if (NoMoreRunningTasks())
            {
                NextIteration(workers);
            }

            return true;
        }
        public void ProcessWorkerResponseTest()
        {
            KMeansJobData jobData = new KMeansJobData(Guid.NewGuid(), 4, null, 2, 10, DateTime.Now);
            KMeansJob_Accessor target = new KMeansJob_Accessor(jobData, "server");
            target.InitializeStorage();

            // Upload a block with an arbitrary ClusterPoint, so we can verify it gets copied
            ClusterPoint arbitraryPoint = new ClusterPoint(1, 2, Guid.NewGuid());
            List<string> blockList;
            using (ObjectCachedBlockWriter<ClusterPoint> pointPartitionWriteStream = new ObjectCachedBlockWriter<ClusterPoint>(target.Points, point => point.ToByteArray(), ClusterPoint.Size,
                Environment.GetEnvironmentVariable("TEMP") + @"\" + Guid.NewGuid().ToString()))
            {
                pointPartitionWriteStream.Write(arbitraryPoint);
                pointPartitionWriteStream.FlushBlock();
                blockList = pointPartitionWriteStream.BlockList;
            }

            KMeansTaskData taskData = new KMeansTaskData(jobData, Guid.NewGuid(), 0, 1, target.Centroids.Uri, DateTime.Now, 0, null);

            target.tasks.Clear();
            target.tasks.Add(new KMeansTask(taskData));

            KMeansTaskResult taskResult = new KMeansTaskResult(taskData);
            CloudBlob pointsBlockListBlob = AzureHelper.CreateBlob(jobData.JobID.ToString(), Guid.NewGuid().ToString());
            using (Stream stream = pointsBlockListBlob.OpenWrite())
            {
                BinaryFormatter bf = new BinaryFormatter();
                bf.Serialize(stream, blockList);
            }
            taskResult.PointsBlockListBlob = pointsBlockListBlob.Uri;
            taskResult.NumPointsChanged = 2;
            Guid centroidID = Guid.NewGuid();
            taskResult.PointsProcessedDataByCentroid = new Dictionary<Guid, PointsProcessedData> {
                { centroidID, new PointsProcessedData() {
                        NumPointsProcessed = 2,
                        PartialPointSum = new Point(1, 2)
                    }
                }
            };
            target.ProcessWorkerResponse(taskResult, new List<Worker>());

            // Verify that the first ClusterPoint in Points is indeed equal to arbitraryPoint
            using (ObjectStreamReader<ClusterPoint> pointsStream = new ObjectStreamReader<ClusterPoint>(target.Points, ClusterPoint.FromByteArray, ClusterPoint.Size))
            {
                ClusterPoint point = pointsStream.First();
                Assert.AreEqual(arbitraryPoint.X, point.X);
                Assert.AreEqual(arbitraryPoint.Y, point.Y);
                Assert.AreEqual(arbitraryPoint.CentroidID, point.CentroidID);
            }
        }
        public void MultiIterationJobTest()
        {
            KMeansJobData jobData = new KMeansJobData(Guid.NewGuid(), 4, null, 2, 2, DateTime.Now);
            List<Worker> workers = new List<Worker> {
                new Worker("a", "", 1),
                new Worker("b", "", 1)
            };
            KMeansJob_Accessor job = new KMeansJob_Accessor(jobData, "server");

            // First iteration
            job.InitializeStorage();
            job.EnqueueTasks(workers);

            for (int i = 0; i < jobData.MaxIterationCount; i++)
            {
                CheckWorkerRequests(job,
                    (from task in job.tasks
                    where task.Running
                    select task.TaskData),
                    workers.Count, job.Points);

                // Create the worker results and send them to the job
                List<KMeansTaskResult> results = new List<KMeansTaskResult>();
                foreach (var task in job.tasks)
                {
                    var taskResult = new KMeansTaskResult(task.TaskData);
                    taskResult.NumPointsChanged = 1;
                    results.Add(taskResult);
                }

                foreach (var result in results)
                {
                    job.ProcessWorkerResponse(result, workers);
                }
            }
        }
Example #11
0
        private bool TaskResultMatchesRunningTask(KMeansTaskResult taskResult)
        {
            KMeansTask task = TaskResultWithTaskID(taskResult.TaskID);

            return(task != null && task.Running);
        }