public async Task <BaseObject> Write(BaseObject item) { if (item == null) { throw new ArgumentNullException(nameof(item)); } else if (string.IsNullOrWhiteSpace(item.id)) { throw new ArgumentNullException("item.id"); } item.published = item.published ?? DateTime.UtcNow; var storePath = Localize(filter.ToPath(item.PublicId)); if (!File.Exists(storePath)) { using (var storeWriter = new ObjectStreamWriter(File.Open(storePath, FileMode.CreateNew, FileAccess.Write, FileShare.Read), _serializer, ctx)) { await storeWriter.Write(item); } foreach (var writer in Writers) { await writer.Write(item); } } return(item); }
public void ProcessPointsTest() { CloudBlobContainer container = AzureHelper.StorageAccount.CreateCloudBlobClient().GetContainerReference("test"); container.CreateIfNotExist(); CloudBlob points = container.GetBlobReference(Guid.NewGuid().ToString()); CloudBlob centroids = container.GetBlobReference(Guid.NewGuid().ToString()); const int NumPoints = 100, NumCentroids = 10; using (ObjectStreamWriter<ClusterPoint> pointStream = new ObjectStreamWriter<ClusterPoint>(points, point => point.ToByteArray(), ClusterPoint.Size)) { for (int i = 0; i < NumPoints; i++) { pointStream.Write(new ClusterPoint(1, 2, Guid.Empty)); } } Guid centroidID = Guid.NewGuid(); using (ObjectStreamWriter<Centroid> stream = new ObjectStreamWriter<Centroid>(centroids, point => point.ToByteArray(), Centroid.Size)) { stream.Write(new Centroid(centroidID, 3, 4)); for (int i = 0; i < NumCentroids - 1; i++) { stream.Write(new Centroid(Guid.NewGuid(), 1000, 1000)); } } KMeansTaskProcessor_Accessor target = new KMeansTaskProcessor_Accessor(new KMeansTaskData(Guid.NewGuid(), Guid.NewGuid(), NumPoints, points.Uri, NumCentroids, 1, 0, 0, centroids.Uri, DateTime.UtcNow, DateTime.UtcNow, 0, null)); System.Diagnostics.Trace.WriteLine("Entering InitializeCentroids"); target.InitializeCentroids(); System.Diagnostics.Trace.WriteLine("Entering ProcessPoints"); System.Diagnostics.Trace.WriteLine("ProcessPoints took " + AzureHelper.Time(() => { target.ProcessPoints(); }).TotalSeconds + " seconds"); // Commit the blocks CloudBlockBlob newPointsBlob = AzureHelper.GetBlob(target.TaskResult.Points); using (Stream stream = AzureHelper.GetBlob(target.TaskResult.PointsBlockListBlob).OpenRead()) { BinaryFormatter bf = new BinaryFormatter(); List<string> pointsBlockList = bf.Deserialize(stream) as List<string>; newPointsBlob.PutBlockList(pointsBlockList); } using (ObjectStreamReader<ClusterPoint> stream = new ObjectStreamReader<ClusterPoint>(newPointsBlob, ClusterPoint.FromByteArray, ClusterPoint.Size)) { foreach (ClusterPoint p in stream) { Assert.AreEqual(centroidID, p.CentroidID); } } Assert.AreEqual(NumPoints, target.TaskResult.NumPointsChanged); Assert.IsTrue(target.TaskResult.PointsProcessedDataByCentroid.ContainsKey(centroidID)); Assert.AreEqual(NumPoints, target.TaskResult.PointsProcessedDataByCentroid[centroidID].NumPointsProcessed); const double Epsilon = 0.0001; Assert.IsTrue(Math.Abs((1 * NumPoints) - target.TaskResult.PointsProcessedDataByCentroid[centroidID].PartialPointSum.X) < Epsilon); Assert.IsTrue(Math.Abs((2 * NumPoints) - target.TaskResult.PointsProcessedDataByCentroid[centroidID].PartialPointSum.Y) < Epsilon); }
private void RecalculateCentroids() { AzureHelper.LogPerformance(() => { // Initialize the output blob CloudBlob writeBlob = AzureHelper.CreateBlob(jobData.JobID.ToString(), Guid.NewGuid().ToString()); // Do the mapping and write the new blob using (ObjectStreamReader<Centroid> stream = new ObjectStreamReader<Centroid>(Centroids, Centroid.FromByteArray, Centroid.Size)) { var newCentroids = stream.Select(c => { Point newCentroidPoint; if (totalPointsProcessedDataByCentroid.ContainsKey(c.ID) && totalPointsProcessedDataByCentroid[c.ID].NumPointsProcessed != 0) { newCentroidPoint = totalPointsProcessedDataByCentroid[c.ID].PartialPointSum / (double)totalPointsProcessedDataByCentroid[c.ID].NumPointsProcessed; } else { newCentroidPoint = new Point(); } c.X = newCentroidPoint.X; c.Y = newCentroidPoint.Y; return c; }); using (ObjectStreamWriter<Centroid> writeStream = new ObjectStreamWriter<Centroid>(writeBlob, point => point.ToByteArray(), Centroid.Size)) { foreach (Centroid c in newCentroids) { writeStream.Write(c); } } } // Copy the contents of the new blob back into the old blob Centroids.CopyFromBlob(writeBlob); System.Diagnostics.Trace.TraceInformation("[ServerRole] Finished RecalculateCentroids(). Total points changed: {0}", TotalNumPointsChanged); ResetPointChangedCounts(); }, jobData.JobID.ToString(), methodName: "RecalculateCentroids", iterationCount: IterationCount, points: Points.Uri.ToString(), centroids: Centroids.Uri.ToString(), machineID: MachineID); }
/// <summary> /// Sets up the Azure storage (Points and Centroids) for the first k-means iteration. /// </summary> public void InitializeStorage() { AzureHelper.LogPerformance(() => { Random random = new Random(); if (jobData.Points == null) { // Initialize the points blob with N random ClusterPoints Points = AzureHelper.CreateBlob(jobData.JobID.ToString(), AzureHelper.PointsBlob); using (ObjectStreamWriter<ClusterPoint> stream = new ObjectStreamWriter<ClusterPoint>(Points, point => point.ToByteArray(), ClusterPoint.Size)) { for (int i = 0; i < jobData.N; i++) { stream.Write(new ClusterPoint( random.NextDouble() * 100 - 50, random.NextDouble() * 100 - 50, Guid.Empty)); } } } else { // Use the given points blob Points = AzureHelper.GetBlob(jobData.Points); // Initialize N based on that using (ObjectStreamReader<ClusterPoint> stream = new ObjectStreamReader<ClusterPoint>(Points, ClusterPoint.FromByteArray, ClusterPoint.Size)) { jobData.N = (int)stream.Length; } } // Initialize the centroids blob with K random Centroids Centroids = AzureHelper.CreateBlob(jobData.JobID.ToString(), AzureHelper.CentroidsBlob); using (ObjectStreamWriter<Centroid> stream = new ObjectStreamWriter<Centroid>(Centroids, point => point.ToByteArray(), Centroid.Size)) { for (int i = 0; i < jobData.K; i++) { stream.Write(new Centroid( Guid.NewGuid(), random.Next(-PointRange, PointRange), random.Next(-PointRange, PointRange))); } } }, jobID: jobData.JobID.ToString(), methodName: "InitializeStorage", iterationCount: IterationCount, points: new Lazy<string>(() => Points.Uri.ToString()), centroids: new Lazy<string>(() => Centroids.Uri.ToString()), machineID: MachineID); }
public void ProcessPointsTest() { CloudBlobContainer container = AzureHelper.StorageAccount.CreateCloudBlobClient().GetContainerReference("test"); container.CreateIfNotExist(); CloudBlob points = container.GetBlobReference(Guid.NewGuid().ToString()); CloudBlob centroids = container.GetBlobReference(Guid.NewGuid().ToString()); const int NumPoints = 100, NumCentroids = 10; using (ObjectStreamWriter <ClusterPoint> pointStream = new ObjectStreamWriter <ClusterPoint>(points, point => point.ToByteArray(), ClusterPoint.Size)) { for (int i = 0; i < NumPoints; i++) { pointStream.Write(new ClusterPoint(1, 2, Guid.Empty)); } } Guid centroidID = Guid.NewGuid(); using (ObjectStreamWriter <Centroid> stream = new ObjectStreamWriter <Centroid>(centroids, point => point.ToByteArray(), Centroid.Size)) { stream.Write(new Centroid(centroidID, 3, 4)); for (int i = 0; i < NumCentroids - 1; i++) { stream.Write(new Centroid(Guid.NewGuid(), 1000, 1000)); } } KMeansTaskProcessor_Accessor target = new KMeansTaskProcessor_Accessor(new KMeansTaskData(Guid.NewGuid(), Guid.NewGuid(), NumPoints, points.Uri, NumCentroids, 1, 0, 0, centroids.Uri, DateTime.UtcNow, DateTime.UtcNow, 0, null)); System.Diagnostics.Trace.WriteLine("Entering InitializeCentroids"); target.InitializeCentroids(); System.Diagnostics.Trace.WriteLine("Entering ProcessPoints"); System.Diagnostics.Trace.WriteLine("ProcessPoints took " + AzureHelper.Time(() => { target.ProcessPoints(); }).TotalSeconds + " seconds"); // Commit the blocks CloudBlockBlob newPointsBlob = AzureHelper.GetBlob(target.TaskResult.Points); using (Stream stream = AzureHelper.GetBlob(target.TaskResult.PointsBlockListBlob).OpenRead()) { BinaryFormatter bf = new BinaryFormatter(); List <string> pointsBlockList = bf.Deserialize(stream) as List <string>; newPointsBlob.PutBlockList(pointsBlockList); } using (ObjectStreamReader <ClusterPoint> stream = new ObjectStreamReader <ClusterPoint>(newPointsBlob, ClusterPoint.FromByteArray, ClusterPoint.Size)) { foreach (ClusterPoint p in stream) { Assert.AreEqual(centroidID, p.CentroidID); } } Assert.AreEqual(NumPoints, target.TaskResult.NumPointsChanged); Assert.IsTrue(target.TaskResult.PointsProcessedDataByCentroid.ContainsKey(centroidID)); Assert.AreEqual(NumPoints, target.TaskResult.PointsProcessedDataByCentroid[centroidID].NumPointsProcessed); const double Epsilon = 0.0001; Assert.IsTrue(Math.Abs((1 * NumPoints) - target.TaskResult.PointsProcessedDataByCentroid[centroidID].PartialPointSum.X) < Epsilon); Assert.IsTrue(Math.Abs((2 * NumPoints) - target.TaskResult.PointsProcessedDataByCentroid[centroidID].PartialPointSum.Y) < Epsilon); }