public void RecalculateCentroidsTest2() { KMeansJobData jobData = new KMeansJobData(Guid.NewGuid(), 0, null, 1, 10, DateTime.Now); KMeansJob_Accessor target = new KMeansJob_Accessor(jobData, "server"); target.InitializeStorage(); byte[] cBytes = new byte[Centroid.Size]; using (BlobStream cStream = target.Centroids.OpenRead()) { cStream.Read(cBytes, 0, cBytes.Length); } Centroid cOriginal = Centroid.FromByteArray(cBytes); target.totalPointsProcessedDataByCentroid[cOriginal.ID] = new PointsProcessedData(); target.RecalculateCentroids(); byte[] cBytesNew = new byte[Centroid.Size]; using (BlobStream cStreamNew = target.Centroids.OpenRead()) { cStreamNew.Read(cBytesNew, 0, cBytesNew.Length); } Centroid cNew = Centroid.FromByteArray(cBytesNew); Assert.AreEqual(cOriginal.ID, cNew.ID); Assert.AreEqual(cNew.X, 0); Assert.AreEqual(cNew.Y, 0); }
public void ProcessWorkerResponseTest() { KMeansJobData jobData = new KMeansJobData(Guid.NewGuid(), 4, null, 2, 10, DateTime.Now); KMeansJob_Accessor target = new KMeansJob_Accessor(jobData, "server"); target.InitializeStorage(); // Upload a block with an arbitrary ClusterPoint, so we can verify it gets copied ClusterPoint arbitraryPoint = new ClusterPoint(1, 2, Guid.NewGuid()); List <string> blockList; using (ObjectCachedBlockWriter <ClusterPoint> pointPartitionWriteStream = new ObjectCachedBlockWriter <ClusterPoint>(target.Points, point => point.ToByteArray(), ClusterPoint.Size, Environment.GetEnvironmentVariable("TEMP") + @"\" + Guid.NewGuid().ToString())) { pointPartitionWriteStream.Write(arbitraryPoint); pointPartitionWriteStream.FlushBlock(); blockList = pointPartitionWriteStream.BlockList; } KMeansTaskData taskData = new KMeansTaskData(jobData, Guid.NewGuid(), 0, 1, target.Centroids.Uri, DateTime.Now, 0, null); target.tasks.Clear(); target.tasks.Add(new KMeansTask(taskData)); KMeansTaskResult taskResult = new KMeansTaskResult(taskData); CloudBlob pointsBlockListBlob = AzureHelper.CreateBlob(jobData.JobID.ToString(), Guid.NewGuid().ToString()); using (Stream stream = pointsBlockListBlob.OpenWrite()) { BinaryFormatter bf = new BinaryFormatter(); bf.Serialize(stream, blockList); } taskResult.PointsBlockListBlob = pointsBlockListBlob.Uri; taskResult.NumPointsChanged = 2; Guid centroidID = Guid.NewGuid(); taskResult.PointsProcessedDataByCentroid = new Dictionary <Guid, PointsProcessedData> { { centroidID, new PointsProcessedData() { NumPointsProcessed = 2, PartialPointSum = new Point(1, 2) } } }; target.ProcessWorkerResponse(taskResult, new List <Worker>()); // Verify that the first ClusterPoint in Points is indeed equal to arbitraryPoint using (ObjectStreamReader <ClusterPoint> pointsStream = new ObjectStreamReader <ClusterPoint>(target.Points, ClusterPoint.FromByteArray, ClusterPoint.Size)) { ClusterPoint point = pointsStream.First(); Assert.AreEqual(arbitraryPoint.X, point.X); Assert.AreEqual(arbitraryPoint.Y, point.Y); Assert.AreEqual(arbitraryPoint.CentroidID, point.CentroidID); } }
public void MultiIterationJobTest() // TODO: make this unit test check things in more detail { KMeansJobData jobData = new KMeansJobData(Guid.NewGuid(), 4, null, 2, 2, DateTime.Now); List <Worker> workers = new List <Worker> { new Worker("a", "", 1), new Worker("b", "", 1) }; KMeansJob_Accessor job = new KMeansJob_Accessor(jobData, "server"); // First iteration job.InitializeStorage(); job.EnqueueTasks(workers); for (int i = 0; i < jobData.MaxIterationCount; i++) { CheckWorkerRequests(job, (from task in job.tasks where task.Running select task.TaskData), workers.Count, job.Points); // Create the worker results and send them to the job List <KMeansTaskResult> results = new List <KMeansTaskResult>(); foreach (var task in job.tasks) { var taskResult = new KMeansTaskResult(task.TaskData); taskResult.NumPointsChanged = 1; results.Add(taskResult); } foreach (var result in results) { job.ProcessWorkerResponse(result, workers); } } }
private void CheckWorkerRequests(KMeansJob_Accessor job, IEnumerable <KMeansTaskData> taskDataList, int expectedNumRequests, CloudBlob pointsBlob) { // Make sure there are enough taskDatas in the list Assert.AreEqual(expectedNumRequests, taskDataList.Count()); Assert.IsTrue(taskDataList.Where(element => element == null).Count() == 0); }
private void CheckWorkerRequests(KMeansJob_Accessor job, IEnumerable<KMeansTaskData> taskDataList, int expectedNumRequests, CloudBlob pointsBlob) { // Make sure there are enough taskDatas in the list Assert.AreEqual(expectedNumRequests, taskDataList.Count()); Assert.IsTrue(taskDataList.Where(element => element == null).Count() == 0); }
public void RecalculateCentroidsTest2() { KMeansJobData jobData = new KMeansJobData(Guid.NewGuid(), 0, null, 1, 10, DateTime.Now); KMeansJob_Accessor target = new KMeansJob_Accessor(jobData, "server"); target.InitializeStorage(); byte[] cBytes = new byte[Centroid.Size]; using (BlobStream cStream = target.Centroids.OpenRead()) { cStream.Read(cBytes, 0, cBytes.Length); } Centroid cOriginal = Centroid.FromByteArray(cBytes); target.totalPointsProcessedDataByCentroid[cOriginal.ID] = new PointsProcessedData(); target.RecalculateCentroids(); byte[] cBytesNew = new byte[Centroid.Size]; using (BlobStream cStreamNew = target.Centroids.OpenRead()) { cStreamNew.Read(cBytesNew, 0, cBytesNew.Length); } Centroid cNew = Centroid.FromByteArray(cBytesNew); Assert.AreEqual(cOriginal.ID, cNew.ID); Assert.AreEqual(cNew.X, 0); Assert.AreEqual(cNew.Y, 0); }
public void ProcessWorkerResponseTest() { KMeansJobData jobData = new KMeansJobData(Guid.NewGuid(), 4, null, 2, 10, DateTime.Now); KMeansJob_Accessor target = new KMeansJob_Accessor(jobData, "server"); target.InitializeStorage(); // Upload a block with an arbitrary ClusterPoint, so we can verify it gets copied ClusterPoint arbitraryPoint = new ClusterPoint(1, 2, Guid.NewGuid()); List<string> blockList; using (ObjectCachedBlockWriter<ClusterPoint> pointPartitionWriteStream = new ObjectCachedBlockWriter<ClusterPoint>(target.Points, point => point.ToByteArray(), ClusterPoint.Size, Environment.GetEnvironmentVariable("TEMP") + @"\" + Guid.NewGuid().ToString())) { pointPartitionWriteStream.Write(arbitraryPoint); pointPartitionWriteStream.FlushBlock(); blockList = pointPartitionWriteStream.BlockList; } KMeansTaskData taskData = new KMeansTaskData(jobData, Guid.NewGuid(), 0, 1, target.Centroids.Uri, DateTime.Now, 0, null); target.tasks.Clear(); target.tasks.Add(new KMeansTask(taskData)); KMeansTaskResult taskResult = new KMeansTaskResult(taskData); CloudBlob pointsBlockListBlob = AzureHelper.CreateBlob(jobData.JobID.ToString(), Guid.NewGuid().ToString()); using (Stream stream = pointsBlockListBlob.OpenWrite()) { BinaryFormatter bf = new BinaryFormatter(); bf.Serialize(stream, blockList); } taskResult.PointsBlockListBlob = pointsBlockListBlob.Uri; taskResult.NumPointsChanged = 2; Guid centroidID = Guid.NewGuid(); taskResult.PointsProcessedDataByCentroid = new Dictionary<Guid, PointsProcessedData> { { centroidID, new PointsProcessedData() { NumPointsProcessed = 2, PartialPointSum = new Point(1, 2) } } }; target.ProcessWorkerResponse(taskResult, new List<Worker>()); // Verify that the first ClusterPoint in Points is indeed equal to arbitraryPoint using (ObjectStreamReader<ClusterPoint> pointsStream = new ObjectStreamReader<ClusterPoint>(target.Points, ClusterPoint.FromByteArray, ClusterPoint.Size)) { ClusterPoint point = pointsStream.First(); Assert.AreEqual(arbitraryPoint.X, point.X); Assert.AreEqual(arbitraryPoint.Y, point.Y); Assert.AreEqual(arbitraryPoint.CentroidID, point.CentroidID); } }
public void MultiIterationJobTest() { KMeansJobData jobData = new KMeansJobData(Guid.NewGuid(), 4, null, 2, 2, DateTime.Now); List<Worker> workers = new List<Worker> { new Worker("a", "", 1), new Worker("b", "", 1) }; KMeansJob_Accessor job = new KMeansJob_Accessor(jobData, "server"); // First iteration job.InitializeStorage(); job.EnqueueTasks(workers); for (int i = 0; i < jobData.MaxIterationCount; i++) { CheckWorkerRequests(job, (from task in job.tasks where task.Running select task.TaskData), workers.Count, job.Points); // Create the worker results and send them to the job List<KMeansTaskResult> results = new List<KMeansTaskResult>(); foreach (var task in job.tasks) { var taskResult = new KMeansTaskResult(task.TaskData); taskResult.NumPointsChanged = 1; results.Add(taskResult); } foreach (var result in results) { job.ProcessWorkerResponse(result, workers); } } }