コード例 #1
0
 private void InitializeCentroids()
 {
     using (ObjectStreamReader<Centroid> stream = new ObjectStreamReader<Centroid>(AzureHelper.GetBlob(task.Centroids), Centroid.FromByteArray, Centroid.Size))
     {
         centroids = stream.ToList();
     }
 }
コード例 #2
0
        public void EnumeratorTest()
        {
            ClusterPoint p = new ClusterPoint(1, 2, Guid.NewGuid());
            MemoryStream stream = new MemoryStream();
            const int NumElements = 5;
            for (int i = 0; i < NumElements; i++)
            {
                stream.Write(p.ToByteArray(), 0, ClusterPoint.Size);
            }

            ObjectStreamReader<ClusterPoint> pointStream = new ObjectStreamReader<ClusterPoint>(new MemoryStream(stream.ToArray()), ClusterPoint.FromByteArray, ClusterPoint.Size);
            Assert.AreEqual(p.CentroidID, pointStream.First().CentroidID);

            DateTime serialStart = DateTime.Now;
            int[] serialOutput = pointStream.Select(point =>
            {
                System.Threading.Thread.Sleep(200);
                return 1;
            }).ToArray();
            DateTime serialEnd = DateTime.Now;
            Assert.AreEqual(NumElements, serialOutput.Length);

            DateTime parallelStart = DateTime.Now;
            int[] parallelOutput = pointStream.AsParallel().Select(point =>
            {
                System.Threading.Thread.Sleep(200);
                return 1;
            }).ToArray();
            DateTime parallelEnd = DateTime.Now;
            Assert.AreEqual(NumElements, parallelOutput.Length);

            System.Diagnostics.Trace.WriteLine(string.Format("serial: {0}, parallel: {1}",
                (serialEnd - serialStart).TotalSeconds,
                (parallelEnd - parallelStart).TotalSeconds));
        }
コード例 #3
0
        public void ProcessWorkerResponseTest()
        {
            KMeansJobData      jobData = new KMeansJobData(Guid.NewGuid(), 4, null, 2, 10, DateTime.Now);
            KMeansJob_Accessor target  = new KMeansJob_Accessor(jobData, "server");

            target.InitializeStorage();

            // Upload a block with an arbitrary ClusterPoint, so we can verify it gets copied
            ClusterPoint  arbitraryPoint = new ClusterPoint(1, 2, Guid.NewGuid());
            List <string> blockList;

            using (ObjectCachedBlockWriter <ClusterPoint> pointPartitionWriteStream = new ObjectCachedBlockWriter <ClusterPoint>(target.Points, point => point.ToByteArray(), ClusterPoint.Size,
                                                                                                                                 Environment.GetEnvironmentVariable("TEMP") + @"\" + Guid.NewGuid().ToString()))
            {
                pointPartitionWriteStream.Write(arbitraryPoint);
                pointPartitionWriteStream.FlushBlock();
                blockList = pointPartitionWriteStream.BlockList;
            }

            KMeansTaskData taskData = new KMeansTaskData(jobData, Guid.NewGuid(), 0, 1, target.Centroids.Uri, DateTime.Now, 0, null);

            target.tasks.Clear();
            target.tasks.Add(new KMeansTask(taskData));

            KMeansTaskResult taskResult          = new KMeansTaskResult(taskData);
            CloudBlob        pointsBlockListBlob = AzureHelper.CreateBlob(jobData.JobID.ToString(), Guid.NewGuid().ToString());

            using (Stream stream = pointsBlockListBlob.OpenWrite())
            {
                BinaryFormatter bf = new BinaryFormatter();
                bf.Serialize(stream, blockList);
            }
            taskResult.PointsBlockListBlob = pointsBlockListBlob.Uri;
            taskResult.NumPointsChanged    = 2;
            Guid centroidID = Guid.NewGuid();

            taskResult.PointsProcessedDataByCentroid = new Dictionary <Guid, PointsProcessedData> {
                { centroidID, new PointsProcessedData()
                  {
                      NumPointsProcessed = 2,
                      PartialPointSum    = new Point(1, 2)
                  } }
            };
            target.ProcessWorkerResponse(taskResult, new List <Worker>());

            // Verify that the first ClusterPoint in Points is indeed equal to arbitraryPoint
            using (ObjectStreamReader <ClusterPoint> pointsStream = new ObjectStreamReader <ClusterPoint>(target.Points, ClusterPoint.FromByteArray, ClusterPoint.Size))
            {
                ClusterPoint point = pointsStream.First();
                Assert.AreEqual(arbitraryPoint.X, point.X);
                Assert.AreEqual(arbitraryPoint.Y, point.Y);
                Assert.AreEqual(arbitraryPoint.CentroidID, point.CentroidID);
            }
        }
コード例 #4
0
        public void EnumeratorTest()
        {
            ClusterPoint p           = new ClusterPoint(1, 2, Guid.NewGuid());
            MemoryStream stream      = new MemoryStream();
            const int    NumElements = 5;

            for (int i = 0; i < NumElements; i++)
            {
                stream.Write(p.ToByteArray(), 0, ClusterPoint.Size);
            }

            ObjectStreamReader <ClusterPoint> pointStream = new ObjectStreamReader <ClusterPoint>(new MemoryStream(stream.ToArray()), ClusterPoint.FromByteArray, ClusterPoint.Size);

            Assert.AreEqual(p.CentroidID, pointStream.First().CentroidID);

            DateTime serialStart = DateTime.Now;

            int[] serialOutput = pointStream.Select(point =>
            {
                System.Threading.Thread.Sleep(200);
                return(1);
            }).ToArray();
            DateTime serialEnd = DateTime.Now;

            Assert.AreEqual(NumElements, serialOutput.Length);

            DateTime parallelStart = DateTime.Now;

            int[] parallelOutput = pointStream.AsParallel().Select(point =>
            {
                System.Threading.Thread.Sleep(200);
                return(1);
            }).ToArray();
            DateTime parallelEnd = DateTime.Now;

            Assert.AreEqual(NumElements, parallelOutput.Length);

            System.Diagnostics.Trace.WriteLine(string.Format("serial: {0}, parallel: {1}",
                                                             (serialEnd - serialStart).TotalSeconds,
                                                             (parallelEnd - parallelStart).TotalSeconds));
        }
コード例 #5
0
        private void UpdatePointsCentroids(CloudBlob points, CloudBlob centroids, bool final)
        {
            StringBuilder visualization = new StringBuilder();

            using (ObjectStreamReader <ClusterPoint> pointsStream = new ObjectStreamReader <ClusterPoint>(points, ClusterPoint.FromByteArray, ClusterPoint.Size))
            {
                int pointIndex = 0;
                foreach (ClusterPoint p in pointsStream)
                {
                    visualization.AppendFormat("<div class=\"point\" style=\"top:{0}px;left:{1}px;background-color:{2}\"></div>",
                                               PointUnitsToPixels(p.Y),
                                               PointUnitsToPixels(p.X),
                                               GuidToColor(p.CentroidID));

                    pointIndex++;
                    if (pointIndex > (final ? 1000 : 100))
                    {
                        break;
                    }
                }
            }

            using (ObjectStreamReader <Centroid> centroidsStream = new ObjectStreamReader <Centroid>(centroids, Centroid.FromByteArray, Centroid.Size))
            {
                foreach (Centroid p in centroidsStream)
                {
                    visualization.AppendFormat("<div class=\"centroid\" style=\"top:{0}px;left:{1}px;background-color:{2}\"></div>",
                                               PointUnitsToPixels(p.Y),
                                               PointUnitsToPixels(p.X),
                                               GuidToColor(p.ID));
                }
            }

            Visualization.Text = visualization.ToString();

            PointsURI.Text    = points.Uri.ToString();
            CentroidsURI.Text = centroids.Uri.ToString();
        }
コード例 #6
0
        public void ProcessPointsTest()
        {
            CloudBlobContainer container = AzureHelper.StorageAccount.CreateCloudBlobClient().GetContainerReference("test");
            container.CreateIfNotExist();
            CloudBlob points = container.GetBlobReference(Guid.NewGuid().ToString());
            CloudBlob centroids = container.GetBlobReference(Guid.NewGuid().ToString());
            const int NumPoints = 100, NumCentroids = 10;

            using (ObjectStreamWriter<ClusterPoint> pointStream = new ObjectStreamWriter<ClusterPoint>(points, point => point.ToByteArray(), ClusterPoint.Size))
            {
                for (int i = 0; i < NumPoints; i++)
                {
                    pointStream.Write(new ClusterPoint(1, 2, Guid.Empty));
                }
            }

            Guid centroidID = Guid.NewGuid();
            using (ObjectStreamWriter<Centroid> stream = new ObjectStreamWriter<Centroid>(centroids, point => point.ToByteArray(), Centroid.Size))
            {
                stream.Write(new Centroid(centroidID, 3, 4));

                for (int i = 0; i < NumCentroids - 1; i++)
                {
                    stream.Write(new Centroid(Guid.NewGuid(), 1000, 1000));
                }
            }

            KMeansTaskProcessor_Accessor target = new KMeansTaskProcessor_Accessor(new KMeansTaskData(Guid.NewGuid(), Guid.NewGuid(), NumPoints, points.Uri, NumCentroids, 1, 0, 0, centroids.Uri, DateTime.UtcNow, DateTime.UtcNow, 0, null));

            System.Diagnostics.Trace.WriteLine("Entering InitializeCentroids");
            target.InitializeCentroids();

            System.Diagnostics.Trace.WriteLine("Entering ProcessPoints");
            System.Diagnostics.Trace.WriteLine("ProcessPoints took " + AzureHelper.Time(() =>
            {
                target.ProcessPoints();
            }).TotalSeconds + " seconds");

            // Commit the blocks
            CloudBlockBlob newPointsBlob = AzureHelper.GetBlob(target.TaskResult.Points);
            using (Stream stream = AzureHelper.GetBlob(target.TaskResult.PointsBlockListBlob).OpenRead())
            {
                BinaryFormatter bf = new BinaryFormatter();
                List<string> pointsBlockList = bf.Deserialize(stream) as List<string>;
                newPointsBlob.PutBlockList(pointsBlockList);
            }

            using (ObjectStreamReader<ClusterPoint> stream = new ObjectStreamReader<ClusterPoint>(newPointsBlob, ClusterPoint.FromByteArray, ClusterPoint.Size))
            {
                foreach (ClusterPoint p in stream)
                {
                    Assert.AreEqual(centroidID, p.CentroidID);
                }
            }

            Assert.AreEqual(NumPoints, target.TaskResult.NumPointsChanged);
            Assert.IsTrue(target.TaskResult.PointsProcessedDataByCentroid.ContainsKey(centroidID));
            Assert.AreEqual(NumPoints, target.TaskResult.PointsProcessedDataByCentroid[centroidID].NumPointsProcessed);

            const double Epsilon = 0.0001;
            Assert.IsTrue(Math.Abs((1 * NumPoints) - target.TaskResult.PointsProcessedDataByCentroid[centroidID].PartialPointSum.X) < Epsilon);
            Assert.IsTrue(Math.Abs((2 * NumPoints) - target.TaskResult.PointsProcessedDataByCentroid[centroidID].PartialPointSum.Y) < Epsilon);
        }
コード例 #7
0
        private void UpdatePointsCentroids(CloudBlob points, CloudBlob centroids, bool final)
        {
            StringBuilder visualization = new StringBuilder();

            using (ObjectStreamReader<ClusterPoint> pointsStream = new ObjectStreamReader<ClusterPoint>(points, ClusterPoint.FromByteArray, ClusterPoint.Size))
            {
                int pointIndex = 0;
                foreach (ClusterPoint p in pointsStream)
                {
                    visualization.AppendFormat("<div class=\"point\" style=\"top:{0}px;left:{1}px;background-color:{2}\"></div>",
                        PointUnitsToPixels(p.Y),
                        PointUnitsToPixels(p.X),
                        GuidToColor(p.CentroidID));

                    pointIndex++;
                    if (pointIndex > (final ? 1000 : 100))
                        break;
                }
            }

            using (ObjectStreamReader<Centroid> centroidsStream = new ObjectStreamReader<Centroid>(centroids, Centroid.FromByteArray, Centroid.Size))
            {
                foreach (Centroid p in centroidsStream)
                {
                    visualization.AppendFormat("<div class=\"centroid\" style=\"top:{0}px;left:{1}px;background-color:{2}\"></div>",
                        PointUnitsToPixels(p.Y),
                        PointUnitsToPixels(p.X),
                        GuidToColor(p.ID));
                }
            }

            Visualization.Text = visualization.ToString();

            PointsURI.Text = points.Uri.ToString();
            CentroidsURI.Text = centroids.Uri.ToString();
        }
コード例 #8
0
        protected virtual Task <BaseObjectStreamReader> OpenReader(ActivityStreamFilter filter)
        {
            BaseObjectStreamReader ret = null;
            string path = null;

            foreach (var index in Indexes)
            {
                if (index.Contains(filter))
                {
                    path = Localize(index.ToUri(null));
                    if (System.IO.File.Exists(path))
                    {
                        break;
                    }
                    else
                    {
                        path = null;
                    }
                }
            }

            if (path == null)
            {
                path = Localize(filter.ToUri(null));
                if (!System.IO.File.Exists(path))
                {
                    path = Localize(filter.ToPath(null));
                }
            }

            if (System.IO.File.Exists(path))
            {
                switch (System.IO.Path.GetExtension(path))
                {
                case ".index":
                {
                    var s = System.IO.File.OpenRead(path);
                    if (filter.reverse)
                    {
                        ret = new ReverseObjectIdStreamReader(s, id =>
                            {
                                var p = Localize(filter.ToPath(id));
                                if (System.IO.File.Exists(p))
                                {
                                    return(System.IO.File.OpenRead(p));
                                }
                                else
                                {
                                    return(null);
                                }
                            }, _serializer);
                    }
                    else
                    {
                        ret = new ObjectIdStreamReader(s, id => System.IO.File.OpenRead(Localize(filter.ToPath(id))), _serializer);
                    }
                }
                break;

                case ".json":
                    ret = new ObjectStreamReader(System.IO.File.OpenRead(path), _serializer);
                    break;

                default:
                    throw new Exception($"Invalid activity stream type: {path}");
                }
            }
            else
            {
                ret = new EmptyObjectStreamReader();
            }
            return(Task.FromResult(ret));
        }
コード例 #9
0
        /// <summary>
        /// Sets up the Azure storage (Points and Centroids) for the first k-means iteration.
        /// </summary>
        public void InitializeStorage()
        {
            AzureHelper.LogPerformance(() =>
            {
                Random random = new Random();

                if (jobData.Points == null)
                {
                    // Initialize the points blob with N random ClusterPoints
                    Points = AzureHelper.CreateBlob(jobData.JobID.ToString(), AzureHelper.PointsBlob);
                    using (ObjectStreamWriter<ClusterPoint> stream = new ObjectStreamWriter<ClusterPoint>(Points, point => point.ToByteArray(), ClusterPoint.Size))
                    {
                        for (int i = 0; i < jobData.N; i++)
                        {
                            stream.Write(new ClusterPoint(
                                random.NextDouble() * 100 - 50,
                                random.NextDouble() * 100 - 50,
                                Guid.Empty));
                        }
                    }
                }
                else
                {
                    // Use the given points blob
                    Points = AzureHelper.GetBlob(jobData.Points);

                    // Initialize N based on that
                    using (ObjectStreamReader<ClusterPoint> stream = new ObjectStreamReader<ClusterPoint>(Points, ClusterPoint.FromByteArray, ClusterPoint.Size))
                    {
                        jobData.N = (int)stream.Length;
                    }
                }

                // Initialize the centroids blob with K random Centroids
                Centroids = AzureHelper.CreateBlob(jobData.JobID.ToString(), AzureHelper.CentroidsBlob);
                using (ObjectStreamWriter<Centroid> stream = new ObjectStreamWriter<Centroid>(Centroids, point => point.ToByteArray(), Centroid.Size))
                {
                    for (int i = 0; i < jobData.K; i++)
                    {
                        stream.Write(new Centroid(
                            Guid.NewGuid(),
                            random.Next(-PointRange, PointRange),
                            random.Next(-PointRange, PointRange)));
                    }
                }
            }, jobID: jobData.JobID.ToString(), methodName: "InitializeStorage", iterationCount: IterationCount, points: new Lazy<string>(() => Points.Uri.ToString()), centroids: new Lazy<string>(() => Centroids.Uri.ToString()), machineID: MachineID);
        }
コード例 #10
0
        private void RecalculateCentroids()
        {
            AzureHelper.LogPerformance(() =>
            {
                // Initialize the output blob
                CloudBlob writeBlob = AzureHelper.CreateBlob(jobData.JobID.ToString(), Guid.NewGuid().ToString());

                // Do the mapping and write the new blob
                using (ObjectStreamReader<Centroid> stream = new ObjectStreamReader<Centroid>(Centroids, Centroid.FromByteArray, Centroid.Size))
                {
                    var newCentroids = stream.Select(c =>
                        {
                            Point newCentroidPoint;
                            if (totalPointsProcessedDataByCentroid.ContainsKey(c.ID) && totalPointsProcessedDataByCentroid[c.ID].NumPointsProcessed != 0)
                            {
                                newCentroidPoint = totalPointsProcessedDataByCentroid[c.ID].PartialPointSum
                                 / (double)totalPointsProcessedDataByCentroid[c.ID].NumPointsProcessed;
                            }
                            else
                            {
                                newCentroidPoint = new Point();
                            }

                            c.X = newCentroidPoint.X;
                            c.Y = newCentroidPoint.Y;

                            return c;
                        });

                    using (ObjectStreamWriter<Centroid> writeStream = new ObjectStreamWriter<Centroid>(writeBlob, point => point.ToByteArray(), Centroid.Size))
                    {
                        foreach (Centroid c in newCentroids)
                        {
                            writeStream.Write(c);
                        }
                    }
                }

                // Copy the contents of the new blob back into the old blob
                Centroids.CopyFromBlob(writeBlob);

                System.Diagnostics.Trace.TraceInformation("[ServerRole] Finished RecalculateCentroids(). Total points changed: {0}", TotalNumPointsChanged);

                ResetPointChangedCounts();

            }, jobData.JobID.ToString(), methodName: "RecalculateCentroids", iterationCount: IterationCount, points: Points.Uri.ToString(), centroids: Centroids.Uri.ToString(), machineID: MachineID);
        }
コード例 #11
0
        public void ProcessPointsTest()
        {
            CloudBlobContainer container = AzureHelper.StorageAccount.CreateCloudBlobClient().GetContainerReference("test");

            container.CreateIfNotExist();
            CloudBlob points = container.GetBlobReference(Guid.NewGuid().ToString());
            CloudBlob centroids = container.GetBlobReference(Guid.NewGuid().ToString());
            const int NumPoints = 100, NumCentroids = 10;

            using (ObjectStreamWriter <ClusterPoint> pointStream = new ObjectStreamWriter <ClusterPoint>(points, point => point.ToByteArray(), ClusterPoint.Size))
            {
                for (int i = 0; i < NumPoints; i++)
                {
                    pointStream.Write(new ClusterPoint(1, 2, Guid.Empty));
                }
            }

            Guid centroidID = Guid.NewGuid();

            using (ObjectStreamWriter <Centroid> stream = new ObjectStreamWriter <Centroid>(centroids, point => point.ToByteArray(), Centroid.Size))
            {
                stream.Write(new Centroid(centroidID, 3, 4));

                for (int i = 0; i < NumCentroids - 1; i++)
                {
                    stream.Write(new Centroid(Guid.NewGuid(), 1000, 1000));
                }
            }

            KMeansTaskProcessor_Accessor target = new KMeansTaskProcessor_Accessor(new KMeansTaskData(Guid.NewGuid(), Guid.NewGuid(), NumPoints, points.Uri, NumCentroids, 1, 0, 0, centroids.Uri, DateTime.UtcNow, DateTime.UtcNow, 0, null));

            System.Diagnostics.Trace.WriteLine("Entering InitializeCentroids");
            target.InitializeCentroids();

            System.Diagnostics.Trace.WriteLine("Entering ProcessPoints");
            System.Diagnostics.Trace.WriteLine("ProcessPoints took " + AzureHelper.Time(() =>
            {
                target.ProcessPoints();
            }).TotalSeconds + " seconds");

            // Commit the blocks
            CloudBlockBlob newPointsBlob = AzureHelper.GetBlob(target.TaskResult.Points);

            using (Stream stream = AzureHelper.GetBlob(target.TaskResult.PointsBlockListBlob).OpenRead())
            {
                BinaryFormatter bf = new BinaryFormatter();
                List <string>   pointsBlockList = bf.Deserialize(stream) as List <string>;
                newPointsBlob.PutBlockList(pointsBlockList);
            }

            using (ObjectStreamReader <ClusterPoint> stream = new ObjectStreamReader <ClusterPoint>(newPointsBlob, ClusterPoint.FromByteArray, ClusterPoint.Size))
            {
                foreach (ClusterPoint p in stream)
                {
                    Assert.AreEqual(centroidID, p.CentroidID);
                }
            }

            Assert.AreEqual(NumPoints, target.TaskResult.NumPointsChanged);
            Assert.IsTrue(target.TaskResult.PointsProcessedDataByCentroid.ContainsKey(centroidID));
            Assert.AreEqual(NumPoints, target.TaskResult.PointsProcessedDataByCentroid[centroidID].NumPointsProcessed);

            const double Epsilon = 0.0001;

            Assert.IsTrue(Math.Abs((1 * NumPoints) - target.TaskResult.PointsProcessedDataByCentroid[centroidID].PartialPointSum.X) < Epsilon);
            Assert.IsTrue(Math.Abs((2 * NumPoints) - target.TaskResult.PointsProcessedDataByCentroid[centroidID].PartialPointSum.Y) < Epsilon);
        }
コード例 #12
0
        public void ProcessWorkerResponseTest()
        {
            KMeansJobData jobData = new KMeansJobData(Guid.NewGuid(), 4, null, 2, 10, DateTime.Now);
            KMeansJob_Accessor target = new KMeansJob_Accessor(jobData, "server");
            target.InitializeStorage();

            // Upload a block with an arbitrary ClusterPoint, so we can verify it gets copied
            ClusterPoint arbitraryPoint = new ClusterPoint(1, 2, Guid.NewGuid());
            List<string> blockList;
            using (ObjectCachedBlockWriter<ClusterPoint> pointPartitionWriteStream = new ObjectCachedBlockWriter<ClusterPoint>(target.Points, point => point.ToByteArray(), ClusterPoint.Size,
                Environment.GetEnvironmentVariable("TEMP") + @"\" + Guid.NewGuid().ToString()))
            {
                pointPartitionWriteStream.Write(arbitraryPoint);
                pointPartitionWriteStream.FlushBlock();
                blockList = pointPartitionWriteStream.BlockList;
            }

            KMeansTaskData taskData = new KMeansTaskData(jobData, Guid.NewGuid(), 0, 1, target.Centroids.Uri, DateTime.Now, 0, null);

            target.tasks.Clear();
            target.tasks.Add(new KMeansTask(taskData));

            KMeansTaskResult taskResult = new KMeansTaskResult(taskData);
            CloudBlob pointsBlockListBlob = AzureHelper.CreateBlob(jobData.JobID.ToString(), Guid.NewGuid().ToString());
            using (Stream stream = pointsBlockListBlob.OpenWrite())
            {
                BinaryFormatter bf = new BinaryFormatter();
                bf.Serialize(stream, blockList);
            }
            taskResult.PointsBlockListBlob = pointsBlockListBlob.Uri;
            taskResult.NumPointsChanged = 2;
            Guid centroidID = Guid.NewGuid();
            taskResult.PointsProcessedDataByCentroid = new Dictionary<Guid, PointsProcessedData> {
                { centroidID, new PointsProcessedData() {
                        NumPointsProcessed = 2,
                        PartialPointSum = new Point(1, 2)
                    }
                }
            };
            target.ProcessWorkerResponse(taskResult, new List<Worker>());

            // Verify that the first ClusterPoint in Points is indeed equal to arbitraryPoint
            using (ObjectStreamReader<ClusterPoint> pointsStream = new ObjectStreamReader<ClusterPoint>(target.Points, ClusterPoint.FromByteArray, ClusterPoint.Size))
            {
                ClusterPoint point = pointsStream.First();
                Assert.AreEqual(arbitraryPoint.X, point.X);
                Assert.AreEqual(arbitraryPoint.Y, point.Y);
                Assert.AreEqual(arbitraryPoint.CentroidID, point.CentroidID);
            }
        }