Exemple #1
0
        public async Task <BaseObject> Write(BaseObject item)
        {
            if (item == null)
            {
                throw new ArgumentNullException(nameof(item));
            }
            else if (string.IsNullOrWhiteSpace(item.id))
            {
                throw new ArgumentNullException("item.id");
            }

            item.published = item.published ?? DateTime.UtcNow;

            var storePath = Localize(filter.ToPath(item.PublicId));

            if (!File.Exists(storePath))
            {
                using (var storeWriter = new ObjectStreamWriter(File.Open(storePath, FileMode.CreateNew, FileAccess.Write, FileShare.Read), _serializer, ctx))
                {
                    await storeWriter.Write(item);
                }

                foreach (var writer in Writers)
                {
                    await writer.Write(item);
                }
            }
            return(item);
        }
        public void ProcessPointsTest()
        {
            CloudBlobContainer container = AzureHelper.StorageAccount.CreateCloudBlobClient().GetContainerReference("test");
            container.CreateIfNotExist();
            CloudBlob points = container.GetBlobReference(Guid.NewGuid().ToString());
            CloudBlob centroids = container.GetBlobReference(Guid.NewGuid().ToString());
            const int NumPoints = 100, NumCentroids = 10;

            using (ObjectStreamWriter<ClusterPoint> pointStream = new ObjectStreamWriter<ClusterPoint>(points, point => point.ToByteArray(), ClusterPoint.Size))
            {
                for (int i = 0; i < NumPoints; i++)
                {
                    pointStream.Write(new ClusterPoint(1, 2, Guid.Empty));
                }
            }

            Guid centroidID = Guid.NewGuid();
            using (ObjectStreamWriter<Centroid> stream = new ObjectStreamWriter<Centroid>(centroids, point => point.ToByteArray(), Centroid.Size))
            {
                stream.Write(new Centroid(centroidID, 3, 4));

                for (int i = 0; i < NumCentroids - 1; i++)
                {
                    stream.Write(new Centroid(Guid.NewGuid(), 1000, 1000));
                }
            }

            KMeansTaskProcessor_Accessor target = new KMeansTaskProcessor_Accessor(new KMeansTaskData(Guid.NewGuid(), Guid.NewGuid(), NumPoints, points.Uri, NumCentroids, 1, 0, 0, centroids.Uri, DateTime.UtcNow, DateTime.UtcNow, 0, null));

            System.Diagnostics.Trace.WriteLine("Entering InitializeCentroids");
            target.InitializeCentroids();

            System.Diagnostics.Trace.WriteLine("Entering ProcessPoints");
            System.Diagnostics.Trace.WriteLine("ProcessPoints took " + AzureHelper.Time(() =>
            {
                target.ProcessPoints();
            }).TotalSeconds + " seconds");

            // Commit the blocks
            CloudBlockBlob newPointsBlob = AzureHelper.GetBlob(target.TaskResult.Points);
            using (Stream stream = AzureHelper.GetBlob(target.TaskResult.PointsBlockListBlob).OpenRead())
            {
                BinaryFormatter bf = new BinaryFormatter();
                List<string> pointsBlockList = bf.Deserialize(stream) as List<string>;
                newPointsBlob.PutBlockList(pointsBlockList);
            }

            using (ObjectStreamReader<ClusterPoint> stream = new ObjectStreamReader<ClusterPoint>(newPointsBlob, ClusterPoint.FromByteArray, ClusterPoint.Size))
            {
                foreach (ClusterPoint p in stream)
                {
                    Assert.AreEqual(centroidID, p.CentroidID);
                }
            }

            Assert.AreEqual(NumPoints, target.TaskResult.NumPointsChanged);
            Assert.IsTrue(target.TaskResult.PointsProcessedDataByCentroid.ContainsKey(centroidID));
            Assert.AreEqual(NumPoints, target.TaskResult.PointsProcessedDataByCentroid[centroidID].NumPointsProcessed);

            const double Epsilon = 0.0001;
            Assert.IsTrue(Math.Abs((1 * NumPoints) - target.TaskResult.PointsProcessedDataByCentroid[centroidID].PartialPointSum.X) < Epsilon);
            Assert.IsTrue(Math.Abs((2 * NumPoints) - target.TaskResult.PointsProcessedDataByCentroid[centroidID].PartialPointSum.Y) < Epsilon);
        }
        private void RecalculateCentroids()
        {
            AzureHelper.LogPerformance(() =>
            {
                // Initialize the output blob
                CloudBlob writeBlob = AzureHelper.CreateBlob(jobData.JobID.ToString(), Guid.NewGuid().ToString());

                // Do the mapping and write the new blob
                using (ObjectStreamReader<Centroid> stream = new ObjectStreamReader<Centroid>(Centroids, Centroid.FromByteArray, Centroid.Size))
                {
                    var newCentroids = stream.Select(c =>
                        {
                            Point newCentroidPoint;
                            if (totalPointsProcessedDataByCentroid.ContainsKey(c.ID) && totalPointsProcessedDataByCentroid[c.ID].NumPointsProcessed != 0)
                            {
                                newCentroidPoint = totalPointsProcessedDataByCentroid[c.ID].PartialPointSum
                                 / (double)totalPointsProcessedDataByCentroid[c.ID].NumPointsProcessed;
                            }
                            else
                            {
                                newCentroidPoint = new Point();
                            }

                            c.X = newCentroidPoint.X;
                            c.Y = newCentroidPoint.Y;

                            return c;
                        });

                    using (ObjectStreamWriter<Centroid> writeStream = new ObjectStreamWriter<Centroid>(writeBlob, point => point.ToByteArray(), Centroid.Size))
                    {
                        foreach (Centroid c in newCentroids)
                        {
                            writeStream.Write(c);
                        }
                    }
                }

                // Copy the contents of the new blob back into the old blob
                Centroids.CopyFromBlob(writeBlob);

                System.Diagnostics.Trace.TraceInformation("[ServerRole] Finished RecalculateCentroids(). Total points changed: {0}", TotalNumPointsChanged);

                ResetPointChangedCounts();

            }, jobData.JobID.ToString(), methodName: "RecalculateCentroids", iterationCount: IterationCount, points: Points.Uri.ToString(), centroids: Centroids.Uri.ToString(), machineID: MachineID);
        }
        /// <summary>
        /// Sets up the Azure storage (Points and Centroids) for the first k-means iteration.
        /// </summary>
        public void InitializeStorage()
        {
            AzureHelper.LogPerformance(() =>
            {
                Random random = new Random();

                if (jobData.Points == null)
                {
                    // Initialize the points blob with N random ClusterPoints
                    Points = AzureHelper.CreateBlob(jobData.JobID.ToString(), AzureHelper.PointsBlob);
                    using (ObjectStreamWriter<ClusterPoint> stream = new ObjectStreamWriter<ClusterPoint>(Points, point => point.ToByteArray(), ClusterPoint.Size))
                    {
                        for (int i = 0; i < jobData.N; i++)
                        {
                            stream.Write(new ClusterPoint(
                                random.NextDouble() * 100 - 50,
                                random.NextDouble() * 100 - 50,
                                Guid.Empty));
                        }
                    }
                }
                else
                {
                    // Use the given points blob
                    Points = AzureHelper.GetBlob(jobData.Points);

                    // Initialize N based on that
                    using (ObjectStreamReader<ClusterPoint> stream = new ObjectStreamReader<ClusterPoint>(Points, ClusterPoint.FromByteArray, ClusterPoint.Size))
                    {
                        jobData.N = (int)stream.Length;
                    }
                }

                // Initialize the centroids blob with K random Centroids
                Centroids = AzureHelper.CreateBlob(jobData.JobID.ToString(), AzureHelper.CentroidsBlob);
                using (ObjectStreamWriter<Centroid> stream = new ObjectStreamWriter<Centroid>(Centroids, point => point.ToByteArray(), Centroid.Size))
                {
                    for (int i = 0; i < jobData.K; i++)
                    {
                        stream.Write(new Centroid(
                            Guid.NewGuid(),
                            random.Next(-PointRange, PointRange),
                            random.Next(-PointRange, PointRange)));
                    }
                }
            }, jobID: jobData.JobID.ToString(), methodName: "InitializeStorage", iterationCount: IterationCount, points: new Lazy<string>(() => Points.Uri.ToString()), centroids: new Lazy<string>(() => Centroids.Uri.ToString()), machineID: MachineID);
        }
        public void ProcessPointsTest()
        {
            CloudBlobContainer container = AzureHelper.StorageAccount.CreateCloudBlobClient().GetContainerReference("test");

            container.CreateIfNotExist();
            CloudBlob points = container.GetBlobReference(Guid.NewGuid().ToString());
            CloudBlob centroids = container.GetBlobReference(Guid.NewGuid().ToString());
            const int NumPoints = 100, NumCentroids = 10;

            using (ObjectStreamWriter <ClusterPoint> pointStream = new ObjectStreamWriter <ClusterPoint>(points, point => point.ToByteArray(), ClusterPoint.Size))
            {
                for (int i = 0; i < NumPoints; i++)
                {
                    pointStream.Write(new ClusterPoint(1, 2, Guid.Empty));
                }
            }

            Guid centroidID = Guid.NewGuid();

            using (ObjectStreamWriter <Centroid> stream = new ObjectStreamWriter <Centroid>(centroids, point => point.ToByteArray(), Centroid.Size))
            {
                stream.Write(new Centroid(centroidID, 3, 4));

                for (int i = 0; i < NumCentroids - 1; i++)
                {
                    stream.Write(new Centroid(Guid.NewGuid(), 1000, 1000));
                }
            }

            KMeansTaskProcessor_Accessor target = new KMeansTaskProcessor_Accessor(new KMeansTaskData(Guid.NewGuid(), Guid.NewGuid(), NumPoints, points.Uri, NumCentroids, 1, 0, 0, centroids.Uri, DateTime.UtcNow, DateTime.UtcNow, 0, null));

            System.Diagnostics.Trace.WriteLine("Entering InitializeCentroids");
            target.InitializeCentroids();

            System.Diagnostics.Trace.WriteLine("Entering ProcessPoints");
            System.Diagnostics.Trace.WriteLine("ProcessPoints took " + AzureHelper.Time(() =>
            {
                target.ProcessPoints();
            }).TotalSeconds + " seconds");

            // Commit the blocks
            CloudBlockBlob newPointsBlob = AzureHelper.GetBlob(target.TaskResult.Points);

            using (Stream stream = AzureHelper.GetBlob(target.TaskResult.PointsBlockListBlob).OpenRead())
            {
                BinaryFormatter bf = new BinaryFormatter();
                List <string>   pointsBlockList = bf.Deserialize(stream) as List <string>;
                newPointsBlob.PutBlockList(pointsBlockList);
            }

            using (ObjectStreamReader <ClusterPoint> stream = new ObjectStreamReader <ClusterPoint>(newPointsBlob, ClusterPoint.FromByteArray, ClusterPoint.Size))
            {
                foreach (ClusterPoint p in stream)
                {
                    Assert.AreEqual(centroidID, p.CentroidID);
                }
            }

            Assert.AreEqual(NumPoints, target.TaskResult.NumPointsChanged);
            Assert.IsTrue(target.TaskResult.PointsProcessedDataByCentroid.ContainsKey(centroidID));
            Assert.AreEqual(NumPoints, target.TaskResult.PointsProcessedDataByCentroid[centroidID].NumPointsProcessed);

            const double Epsilon = 0.0001;

            Assert.IsTrue(Math.Abs((1 * NumPoints) - target.TaskResult.PointsProcessedDataByCentroid[centroidID].PartialPointSum.X) < Epsilon);
            Assert.IsTrue(Math.Abs((2 * NumPoints) - target.TaskResult.PointsProcessedDataByCentroid[centroidID].PartialPointSum.Y) < Epsilon);
        }