private void InitializeCentroids() { using (ObjectStreamReader<Centroid> stream = new ObjectStreamReader<Centroid>(AzureHelper.GetBlob(task.Centroids), Centroid.FromByteArray, Centroid.Size)) { centroids = stream.ToList(); } }
public void EnumeratorTest() { ClusterPoint p = new ClusterPoint(1, 2, Guid.NewGuid()); MemoryStream stream = new MemoryStream(); const int NumElements = 5; for (int i = 0; i < NumElements; i++) { stream.Write(p.ToByteArray(), 0, ClusterPoint.Size); } ObjectStreamReader<ClusterPoint> pointStream = new ObjectStreamReader<ClusterPoint>(new MemoryStream(stream.ToArray()), ClusterPoint.FromByteArray, ClusterPoint.Size); Assert.AreEqual(p.CentroidID, pointStream.First().CentroidID); DateTime serialStart = DateTime.Now; int[] serialOutput = pointStream.Select(point => { System.Threading.Thread.Sleep(200); return 1; }).ToArray(); DateTime serialEnd = DateTime.Now; Assert.AreEqual(NumElements, serialOutput.Length); DateTime parallelStart = DateTime.Now; int[] parallelOutput = pointStream.AsParallel().Select(point => { System.Threading.Thread.Sleep(200); return 1; }).ToArray(); DateTime parallelEnd = DateTime.Now; Assert.AreEqual(NumElements, parallelOutput.Length); System.Diagnostics.Trace.WriteLine(string.Format("serial: {0}, parallel: {1}", (serialEnd - serialStart).TotalSeconds, (parallelEnd - parallelStart).TotalSeconds)); }
public void ProcessWorkerResponseTest() { KMeansJobData jobData = new KMeansJobData(Guid.NewGuid(), 4, null, 2, 10, DateTime.Now); KMeansJob_Accessor target = new KMeansJob_Accessor(jobData, "server"); target.InitializeStorage(); // Upload a block with an arbitrary ClusterPoint, so we can verify it gets copied ClusterPoint arbitraryPoint = new ClusterPoint(1, 2, Guid.NewGuid()); List <string> blockList; using (ObjectCachedBlockWriter <ClusterPoint> pointPartitionWriteStream = new ObjectCachedBlockWriter <ClusterPoint>(target.Points, point => point.ToByteArray(), ClusterPoint.Size, Environment.GetEnvironmentVariable("TEMP") + @"\" + Guid.NewGuid().ToString())) { pointPartitionWriteStream.Write(arbitraryPoint); pointPartitionWriteStream.FlushBlock(); blockList = pointPartitionWriteStream.BlockList; } KMeansTaskData taskData = new KMeansTaskData(jobData, Guid.NewGuid(), 0, 1, target.Centroids.Uri, DateTime.Now, 0, null); target.tasks.Clear(); target.tasks.Add(new KMeansTask(taskData)); KMeansTaskResult taskResult = new KMeansTaskResult(taskData); CloudBlob pointsBlockListBlob = AzureHelper.CreateBlob(jobData.JobID.ToString(), Guid.NewGuid().ToString()); using (Stream stream = pointsBlockListBlob.OpenWrite()) { BinaryFormatter bf = new BinaryFormatter(); bf.Serialize(stream, blockList); } taskResult.PointsBlockListBlob = pointsBlockListBlob.Uri; taskResult.NumPointsChanged = 2; Guid centroidID = Guid.NewGuid(); taskResult.PointsProcessedDataByCentroid = new Dictionary <Guid, PointsProcessedData> { { centroidID, new PointsProcessedData() { NumPointsProcessed = 2, PartialPointSum = new Point(1, 2) } } }; target.ProcessWorkerResponse(taskResult, new List <Worker>()); // Verify that the first ClusterPoint in Points is indeed equal to arbitraryPoint using (ObjectStreamReader <ClusterPoint> pointsStream = new ObjectStreamReader <ClusterPoint>(target.Points, ClusterPoint.FromByteArray, ClusterPoint.Size)) { ClusterPoint point = pointsStream.First(); Assert.AreEqual(arbitraryPoint.X, point.X); Assert.AreEqual(arbitraryPoint.Y, point.Y); Assert.AreEqual(arbitraryPoint.CentroidID, point.CentroidID); } }
public void EnumeratorTest() { ClusterPoint p = new ClusterPoint(1, 2, Guid.NewGuid()); MemoryStream stream = new MemoryStream(); const int NumElements = 5; for (int i = 0; i < NumElements; i++) { stream.Write(p.ToByteArray(), 0, ClusterPoint.Size); } ObjectStreamReader <ClusterPoint> pointStream = new ObjectStreamReader <ClusterPoint>(new MemoryStream(stream.ToArray()), ClusterPoint.FromByteArray, ClusterPoint.Size); Assert.AreEqual(p.CentroidID, pointStream.First().CentroidID); DateTime serialStart = DateTime.Now; int[] serialOutput = pointStream.Select(point => { System.Threading.Thread.Sleep(200); return(1); }).ToArray(); DateTime serialEnd = DateTime.Now; Assert.AreEqual(NumElements, serialOutput.Length); DateTime parallelStart = DateTime.Now; int[] parallelOutput = pointStream.AsParallel().Select(point => { System.Threading.Thread.Sleep(200); return(1); }).ToArray(); DateTime parallelEnd = DateTime.Now; Assert.AreEqual(NumElements, parallelOutput.Length); System.Diagnostics.Trace.WriteLine(string.Format("serial: {0}, parallel: {1}", (serialEnd - serialStart).TotalSeconds, (parallelEnd - parallelStart).TotalSeconds)); }
private void UpdatePointsCentroids(CloudBlob points, CloudBlob centroids, bool final) { StringBuilder visualization = new StringBuilder(); using (ObjectStreamReader <ClusterPoint> pointsStream = new ObjectStreamReader <ClusterPoint>(points, ClusterPoint.FromByteArray, ClusterPoint.Size)) { int pointIndex = 0; foreach (ClusterPoint p in pointsStream) { visualization.AppendFormat("<div class=\"point\" style=\"top:{0}px;left:{1}px;background-color:{2}\"></div>", PointUnitsToPixels(p.Y), PointUnitsToPixels(p.X), GuidToColor(p.CentroidID)); pointIndex++; if (pointIndex > (final ? 1000 : 100)) { break; } } } using (ObjectStreamReader <Centroid> centroidsStream = new ObjectStreamReader <Centroid>(centroids, Centroid.FromByteArray, Centroid.Size)) { foreach (Centroid p in centroidsStream) { visualization.AppendFormat("<div class=\"centroid\" style=\"top:{0}px;left:{1}px;background-color:{2}\"></div>", PointUnitsToPixels(p.Y), PointUnitsToPixels(p.X), GuidToColor(p.ID)); } } Visualization.Text = visualization.ToString(); PointsURI.Text = points.Uri.ToString(); CentroidsURI.Text = centroids.Uri.ToString(); }
public void ProcessPointsTest() { CloudBlobContainer container = AzureHelper.StorageAccount.CreateCloudBlobClient().GetContainerReference("test"); container.CreateIfNotExist(); CloudBlob points = container.GetBlobReference(Guid.NewGuid().ToString()); CloudBlob centroids = container.GetBlobReference(Guid.NewGuid().ToString()); const int NumPoints = 100, NumCentroids = 10; using (ObjectStreamWriter<ClusterPoint> pointStream = new ObjectStreamWriter<ClusterPoint>(points, point => point.ToByteArray(), ClusterPoint.Size)) { for (int i = 0; i < NumPoints; i++) { pointStream.Write(new ClusterPoint(1, 2, Guid.Empty)); } } Guid centroidID = Guid.NewGuid(); using (ObjectStreamWriter<Centroid> stream = new ObjectStreamWriter<Centroid>(centroids, point => point.ToByteArray(), Centroid.Size)) { stream.Write(new Centroid(centroidID, 3, 4)); for (int i = 0; i < NumCentroids - 1; i++) { stream.Write(new Centroid(Guid.NewGuid(), 1000, 1000)); } } KMeansTaskProcessor_Accessor target = new KMeansTaskProcessor_Accessor(new KMeansTaskData(Guid.NewGuid(), Guid.NewGuid(), NumPoints, points.Uri, NumCentroids, 1, 0, 0, centroids.Uri, DateTime.UtcNow, DateTime.UtcNow, 0, null)); System.Diagnostics.Trace.WriteLine("Entering InitializeCentroids"); target.InitializeCentroids(); System.Diagnostics.Trace.WriteLine("Entering ProcessPoints"); System.Diagnostics.Trace.WriteLine("ProcessPoints took " + AzureHelper.Time(() => { target.ProcessPoints(); }).TotalSeconds + " seconds"); // Commit the blocks CloudBlockBlob newPointsBlob = AzureHelper.GetBlob(target.TaskResult.Points); using (Stream stream = AzureHelper.GetBlob(target.TaskResult.PointsBlockListBlob).OpenRead()) { BinaryFormatter bf = new BinaryFormatter(); List<string> pointsBlockList = bf.Deserialize(stream) as List<string>; newPointsBlob.PutBlockList(pointsBlockList); } using (ObjectStreamReader<ClusterPoint> stream = new ObjectStreamReader<ClusterPoint>(newPointsBlob, ClusterPoint.FromByteArray, ClusterPoint.Size)) { foreach (ClusterPoint p in stream) { Assert.AreEqual(centroidID, p.CentroidID); } } Assert.AreEqual(NumPoints, target.TaskResult.NumPointsChanged); Assert.IsTrue(target.TaskResult.PointsProcessedDataByCentroid.ContainsKey(centroidID)); Assert.AreEqual(NumPoints, target.TaskResult.PointsProcessedDataByCentroid[centroidID].NumPointsProcessed); const double Epsilon = 0.0001; Assert.IsTrue(Math.Abs((1 * NumPoints) - target.TaskResult.PointsProcessedDataByCentroid[centroidID].PartialPointSum.X) < Epsilon); Assert.IsTrue(Math.Abs((2 * NumPoints) - target.TaskResult.PointsProcessedDataByCentroid[centroidID].PartialPointSum.Y) < Epsilon); }
private void UpdatePointsCentroids(CloudBlob points, CloudBlob centroids, bool final) { StringBuilder visualization = new StringBuilder(); using (ObjectStreamReader<ClusterPoint> pointsStream = new ObjectStreamReader<ClusterPoint>(points, ClusterPoint.FromByteArray, ClusterPoint.Size)) { int pointIndex = 0; foreach (ClusterPoint p in pointsStream) { visualization.AppendFormat("<div class=\"point\" style=\"top:{0}px;left:{1}px;background-color:{2}\"></div>", PointUnitsToPixels(p.Y), PointUnitsToPixels(p.X), GuidToColor(p.CentroidID)); pointIndex++; if (pointIndex > (final ? 1000 : 100)) break; } } using (ObjectStreamReader<Centroid> centroidsStream = new ObjectStreamReader<Centroid>(centroids, Centroid.FromByteArray, Centroid.Size)) { foreach (Centroid p in centroidsStream) { visualization.AppendFormat("<div class=\"centroid\" style=\"top:{0}px;left:{1}px;background-color:{2}\"></div>", PointUnitsToPixels(p.Y), PointUnitsToPixels(p.X), GuidToColor(p.ID)); } } Visualization.Text = visualization.ToString(); PointsURI.Text = points.Uri.ToString(); CentroidsURI.Text = centroids.Uri.ToString(); }
protected virtual Task <BaseObjectStreamReader> OpenReader(ActivityStreamFilter filter) { BaseObjectStreamReader ret = null; string path = null; foreach (var index in Indexes) { if (index.Contains(filter)) { path = Localize(index.ToUri(null)); if (System.IO.File.Exists(path)) { break; } else { path = null; } } } if (path == null) { path = Localize(filter.ToUri(null)); if (!System.IO.File.Exists(path)) { path = Localize(filter.ToPath(null)); } } if (System.IO.File.Exists(path)) { switch (System.IO.Path.GetExtension(path)) { case ".index": { var s = System.IO.File.OpenRead(path); if (filter.reverse) { ret = new ReverseObjectIdStreamReader(s, id => { var p = Localize(filter.ToPath(id)); if (System.IO.File.Exists(p)) { return(System.IO.File.OpenRead(p)); } else { return(null); } }, _serializer); } else { ret = new ObjectIdStreamReader(s, id => System.IO.File.OpenRead(Localize(filter.ToPath(id))), _serializer); } } break; case ".json": ret = new ObjectStreamReader(System.IO.File.OpenRead(path), _serializer); break; default: throw new Exception($"Invalid activity stream type: {path}"); } } else { ret = new EmptyObjectStreamReader(); } return(Task.FromResult(ret)); }
/// <summary> /// Sets up the Azure storage (Points and Centroids) for the first k-means iteration. /// </summary> public void InitializeStorage() { AzureHelper.LogPerformance(() => { Random random = new Random(); if (jobData.Points == null) { // Initialize the points blob with N random ClusterPoints Points = AzureHelper.CreateBlob(jobData.JobID.ToString(), AzureHelper.PointsBlob); using (ObjectStreamWriter<ClusterPoint> stream = new ObjectStreamWriter<ClusterPoint>(Points, point => point.ToByteArray(), ClusterPoint.Size)) { for (int i = 0; i < jobData.N; i++) { stream.Write(new ClusterPoint( random.NextDouble() * 100 - 50, random.NextDouble() * 100 - 50, Guid.Empty)); } } } else { // Use the given points blob Points = AzureHelper.GetBlob(jobData.Points); // Initialize N based on that using (ObjectStreamReader<ClusterPoint> stream = new ObjectStreamReader<ClusterPoint>(Points, ClusterPoint.FromByteArray, ClusterPoint.Size)) { jobData.N = (int)stream.Length; } } // Initialize the centroids blob with K random Centroids Centroids = AzureHelper.CreateBlob(jobData.JobID.ToString(), AzureHelper.CentroidsBlob); using (ObjectStreamWriter<Centroid> stream = new ObjectStreamWriter<Centroid>(Centroids, point => point.ToByteArray(), Centroid.Size)) { for (int i = 0; i < jobData.K; i++) { stream.Write(new Centroid( Guid.NewGuid(), random.Next(-PointRange, PointRange), random.Next(-PointRange, PointRange))); } } }, jobID: jobData.JobID.ToString(), methodName: "InitializeStorage", iterationCount: IterationCount, points: new Lazy<string>(() => Points.Uri.ToString()), centroids: new Lazy<string>(() => Centroids.Uri.ToString()), machineID: MachineID); }
private void RecalculateCentroids() { AzureHelper.LogPerformance(() => { // Initialize the output blob CloudBlob writeBlob = AzureHelper.CreateBlob(jobData.JobID.ToString(), Guid.NewGuid().ToString()); // Do the mapping and write the new blob using (ObjectStreamReader<Centroid> stream = new ObjectStreamReader<Centroid>(Centroids, Centroid.FromByteArray, Centroid.Size)) { var newCentroids = stream.Select(c => { Point newCentroidPoint; if (totalPointsProcessedDataByCentroid.ContainsKey(c.ID) && totalPointsProcessedDataByCentroid[c.ID].NumPointsProcessed != 0) { newCentroidPoint = totalPointsProcessedDataByCentroid[c.ID].PartialPointSum / (double)totalPointsProcessedDataByCentroid[c.ID].NumPointsProcessed; } else { newCentroidPoint = new Point(); } c.X = newCentroidPoint.X; c.Y = newCentroidPoint.Y; return c; }); using (ObjectStreamWriter<Centroid> writeStream = new ObjectStreamWriter<Centroid>(writeBlob, point => point.ToByteArray(), Centroid.Size)) { foreach (Centroid c in newCentroids) { writeStream.Write(c); } } } // Copy the contents of the new blob back into the old blob Centroids.CopyFromBlob(writeBlob); System.Diagnostics.Trace.TraceInformation("[ServerRole] Finished RecalculateCentroids(). Total points changed: {0}", TotalNumPointsChanged); ResetPointChangedCounts(); }, jobData.JobID.ToString(), methodName: "RecalculateCentroids", iterationCount: IterationCount, points: Points.Uri.ToString(), centroids: Centroids.Uri.ToString(), machineID: MachineID); }
public void ProcessPointsTest() { CloudBlobContainer container = AzureHelper.StorageAccount.CreateCloudBlobClient().GetContainerReference("test"); container.CreateIfNotExist(); CloudBlob points = container.GetBlobReference(Guid.NewGuid().ToString()); CloudBlob centroids = container.GetBlobReference(Guid.NewGuid().ToString()); const int NumPoints = 100, NumCentroids = 10; using (ObjectStreamWriter <ClusterPoint> pointStream = new ObjectStreamWriter <ClusterPoint>(points, point => point.ToByteArray(), ClusterPoint.Size)) { for (int i = 0; i < NumPoints; i++) { pointStream.Write(new ClusterPoint(1, 2, Guid.Empty)); } } Guid centroidID = Guid.NewGuid(); using (ObjectStreamWriter <Centroid> stream = new ObjectStreamWriter <Centroid>(centroids, point => point.ToByteArray(), Centroid.Size)) { stream.Write(new Centroid(centroidID, 3, 4)); for (int i = 0; i < NumCentroids - 1; i++) { stream.Write(new Centroid(Guid.NewGuid(), 1000, 1000)); } } KMeansTaskProcessor_Accessor target = new KMeansTaskProcessor_Accessor(new KMeansTaskData(Guid.NewGuid(), Guid.NewGuid(), NumPoints, points.Uri, NumCentroids, 1, 0, 0, centroids.Uri, DateTime.UtcNow, DateTime.UtcNow, 0, null)); System.Diagnostics.Trace.WriteLine("Entering InitializeCentroids"); target.InitializeCentroids(); System.Diagnostics.Trace.WriteLine("Entering ProcessPoints"); System.Diagnostics.Trace.WriteLine("ProcessPoints took " + AzureHelper.Time(() => { target.ProcessPoints(); }).TotalSeconds + " seconds"); // Commit the blocks CloudBlockBlob newPointsBlob = AzureHelper.GetBlob(target.TaskResult.Points); using (Stream stream = AzureHelper.GetBlob(target.TaskResult.PointsBlockListBlob).OpenRead()) { BinaryFormatter bf = new BinaryFormatter(); List <string> pointsBlockList = bf.Deserialize(stream) as List <string>; newPointsBlob.PutBlockList(pointsBlockList); } using (ObjectStreamReader <ClusterPoint> stream = new ObjectStreamReader <ClusterPoint>(newPointsBlob, ClusterPoint.FromByteArray, ClusterPoint.Size)) { foreach (ClusterPoint p in stream) { Assert.AreEqual(centroidID, p.CentroidID); } } Assert.AreEqual(NumPoints, target.TaskResult.NumPointsChanged); Assert.IsTrue(target.TaskResult.PointsProcessedDataByCentroid.ContainsKey(centroidID)); Assert.AreEqual(NumPoints, target.TaskResult.PointsProcessedDataByCentroid[centroidID].NumPointsProcessed); const double Epsilon = 0.0001; Assert.IsTrue(Math.Abs((1 * NumPoints) - target.TaskResult.PointsProcessedDataByCentroid[centroidID].PartialPointSum.X) < Epsilon); Assert.IsTrue(Math.Abs((2 * NumPoints) - target.TaskResult.PointsProcessedDataByCentroid[centroidID].PartialPointSum.Y) < Epsilon); }
public void ProcessWorkerResponseTest() { KMeansJobData jobData = new KMeansJobData(Guid.NewGuid(), 4, null, 2, 10, DateTime.Now); KMeansJob_Accessor target = new KMeansJob_Accessor(jobData, "server"); target.InitializeStorage(); // Upload a block with an arbitrary ClusterPoint, so we can verify it gets copied ClusterPoint arbitraryPoint = new ClusterPoint(1, 2, Guid.NewGuid()); List<string> blockList; using (ObjectCachedBlockWriter<ClusterPoint> pointPartitionWriteStream = new ObjectCachedBlockWriter<ClusterPoint>(target.Points, point => point.ToByteArray(), ClusterPoint.Size, Environment.GetEnvironmentVariable("TEMP") + @"\" + Guid.NewGuid().ToString())) { pointPartitionWriteStream.Write(arbitraryPoint); pointPartitionWriteStream.FlushBlock(); blockList = pointPartitionWriteStream.BlockList; } KMeansTaskData taskData = new KMeansTaskData(jobData, Guid.NewGuid(), 0, 1, target.Centroids.Uri, DateTime.Now, 0, null); target.tasks.Clear(); target.tasks.Add(new KMeansTask(taskData)); KMeansTaskResult taskResult = new KMeansTaskResult(taskData); CloudBlob pointsBlockListBlob = AzureHelper.CreateBlob(jobData.JobID.ToString(), Guid.NewGuid().ToString()); using (Stream stream = pointsBlockListBlob.OpenWrite()) { BinaryFormatter bf = new BinaryFormatter(); bf.Serialize(stream, blockList); } taskResult.PointsBlockListBlob = pointsBlockListBlob.Uri; taskResult.NumPointsChanged = 2; Guid centroidID = Guid.NewGuid(); taskResult.PointsProcessedDataByCentroid = new Dictionary<Guid, PointsProcessedData> { { centroidID, new PointsProcessedData() { NumPointsProcessed = 2, PartialPointSum = new Point(1, 2) } } }; target.ProcessWorkerResponse(taskResult, new List<Worker>()); // Verify that the first ClusterPoint in Points is indeed equal to arbitraryPoint using (ObjectStreamReader<ClusterPoint> pointsStream = new ObjectStreamReader<ClusterPoint>(target.Points, ClusterPoint.FromByteArray, ClusterPoint.Size)) { ClusterPoint point = pointsStream.First(); Assert.AreEqual(arbitraryPoint.X, point.X); Assert.AreEqual(arbitraryPoint.Y, point.Y); Assert.AreEqual(arbitraryPoint.CentroidID, point.CentroidID); } }