public void HACClusterDataTest4D()
        {
            HAC            hac         = new HAC();
            List <Cluster> testOutput1 = hac.ClusterData(_testInput2, "average", "euclidean", 0);
            List <Cluster> testOutput2 = hac.ClusterData(_testInput2, "minimum", "euclidean2", 0);
            List <Cluster> testOutput3 = hac.ClusterData(_testInput2, "minimum", "manhattan", 0);
            List <Cluster> testOutput4 = hac.ClusterData(_testInput2, "maximum", "euclidean", 0);
            List <Cluster> testOutput5 = hac.ClusterData(_testInput2, "maximum", "manhattan", 0);

            bool allEqual = true;

            if (testOutput1.Count != testOutput2.Count || testOutput1.Count != testOutput3.Count ||
                testOutput1.Count != testOutput4.Count || testOutput1.Count != testOutput5.Count)
            {
                allEqual = false;
            }
            else
            {
                foreach (Cluster c in testOutput1)
                {
                    if (!testOutput2.Contains(c) || !testOutput3.Contains(c) ||
                        !testOutput4.Contains(c) || !testOutput5.Contains(c))
                    {
                        allEqual = false;
                    }
                }
            }

            Assert.IsTrue(allEqual, "Outputs are not equal");
            Assert.IsTrue(testOutput1.Count.Equals(_testInput2.Count * 2 - 1), "HAC output incomplete");
            Assert.IsTrue(testOutput1.Contains(new Cluster(_p7)), "HAC output 1 incomplete");
            Assert.IsTrue(testOutput1.Contains(new Cluster(_p8)), "HAC output 1 incomplete");
            Assert.IsTrue(testOutput1.Contains(new Cluster(_p9)), "HAC output 1 incomplete");
            Assert.IsTrue(testOutput1.Contains(new Cluster(_p10)), "HAC output 1 incomplete");
            Assert.IsTrue(testOutput1.Contains(new Cluster(new List <Point> {
                _p7, _p8
            })), "HAC output 1 incomplete");
            Assert.IsTrue(testOutput1.Contains(new Cluster(new List <Point> {
                _p7, _p8, _p9
            })), "HAC output 1 incomplete");
            Assert.IsTrue(testOutput1.Contains(new Cluster(new List <Point> {
                _p7, _p8, _p9, _p10
            })), "HAC output 1 incomplete");
            Assert.IsFalse(testOutput1.Contains(new Cluster(new List <Point> {
                _p7, _p9
            })), "HAC output 1 has incorrect data");
            Assert.IsFalse(testOutput1.Contains(new Cluster(new List <Point> {
                _p9, _p10
            })), "HAC output 1 has incorrect data");
        }
Beispiel #2
0
        static void RunBenchmark(int[] pointCounts, int[] pointDims, int averageOf, string linkageMethod = "average", string distanceMethod = "euclideansquared", bool createFiles = false, string resultFilename = "benchmark_results.txt")
        {
            string startupMessage = $"Clustering benchmark with parameters: averageOf={averageOf}," +
                                    $" linkageMethod=\"{linkageMethod}\", distanceMethod=\"{distanceMethod}\"";

            Console.WriteLine(startupMessage);
            DataIO dio = new DataIO();
            HAC    hac = new HAC();

            using (StreamWriter file = new StreamWriter(resultFilename))
            {
                file.WriteLine(startupMessage);
                foreach (int count in pointCounts)
                {
                    foreach (int dim in pointDims)
                    {
                        long result = 0;
                        for (int i = 0; i < averageOf; ++i)
                        {
                            string inputFilename  = createFiles ? $"testGenerated-{count}-{dim}-{i}.txt" : "";
                            string outputFilename = createFiles ? $"testOutput-{count}-{dim}-{i}.txt" : "";

                            Console.WriteLine($"Now clustering: {count}-{dim}-{i}...");
                            Stopwatch    watch = new Stopwatch();
                            List <Point> input = dio.GenerateData(-1000000, 1000000, count, dim, inputFilename);
                            watch.Start();
                            List <Cluster> output = hac.ClusterData(input, "average", "euclidean2", 0);
                            watch.Stop();

                            result += watch.ElapsedMilliseconds;
                            if (createFiles)
                            {
                                dio.SaveData(output, outputFilename);
                            }
                        }
                        result /= averageOf;
                        file.WriteLine($"{count} points, {dim} dimensions = {result} milliseconds");
                    }
                }
            }
            Console.WriteLine("Benchmark finished successfully");
        }
        public void HACClusterDataTest2D()
        {
            HAC            hac         = new HAC();
            List <Cluster> testOutput  = hac.ClusterData(_testInput, "average", "euclidean2", 0);
            List <Cluster> testOutput2 = hac.ClusterData(_testInput, "average", "manhattan", 0);
            List <Cluster> testOutput3 = hac.ClusterData(_testInput, "minimum", "euclidean", 0);
            List <Cluster> testOutput4 = hac.ClusterData(_testInput, "maximum", "euclidean2", 0);

            bool equal124 = true;

            if (testOutput.Count != testOutput2.Count || testOutput.Count != testOutput4.Count)
            {
                equal124 = false;
            }
            else
            {
                foreach (Cluster c in testOutput)
                {
                    if (!testOutput2.Contains(c) || !testOutput4.Contains(c))
                    {
                        equal124 = false;
                    }
                }
            }

            Assert.IsTrue(equal124, "Outputs 1, 2 and 4 are not equal");

            Assert.IsTrue(testOutput.Count.Equals(_testInput.Count * 2 - 1), "HAC output incomplete");
            Assert.IsTrue(testOutput.Contains(new Cluster(_p1)), "HAC output 1 incomplete");
            Assert.IsTrue(testOutput.Contains(new Cluster(_p2)), "HAC output 1 incomplete");
            Assert.IsTrue(testOutput.Contains(new Cluster(_p3)), "HAC output 1 incomplete");
            Assert.IsTrue(testOutput.Contains(new Cluster(_p4)), "HAC output 1 incomplete");
            Assert.IsTrue(testOutput.Contains(new Cluster(_p5)), "HAC output 1 incomplete");
            Assert.IsTrue(testOutput.Contains(new Cluster(_p6)), "HAC output 1 incomplete");
            Assert.IsTrue(testOutput.Contains(new Cluster(new List <Point> {
                _p1, _p2
            })), "HAC output 1 incomplete");
            Assert.IsTrue(testOutput.Contains(new Cluster(new List <Point> {
                _p3, _p4
            })), "HAC output 1 incomplete");
            Assert.IsTrue(testOutput.Contains(new Cluster(new List <Point> {
                _p5, _p6
            })), "HAC output 1 incomplete");
            Assert.IsTrue(testOutput.Contains(new Cluster(new List <Point> {
                _p3, _p4, _p5, _p6
            })), "HAC output 1 incomplete");
            Assert.IsTrue(testOutput.Contains(new Cluster(new List <Point> {
                _p1, _p2, _p3, _p4, _p5, _p6
            })), "HAC output 1 incomplete");
            Assert.IsFalse(testOutput.Contains(new Cluster(new List <Point> {
                _p1, _p2, _p3
            })), "HAC output 1 has incorrect data");
            Assert.IsFalse(testOutput.Contains(new Cluster(new List <Point> {
                _p1, _p6
            })), "HAC output 1 has incorrect data");

            Assert.IsTrue(testOutput3.Count.Equals(_testInput.Count * 2 - 1), "HAC output 3 incomplete");
            Assert.IsTrue(testOutput3.Contains(new Cluster(_p1)), "HAC output 3 incomplete");
            Assert.IsTrue(testOutput3.Contains(new Cluster(_p2)), "HAC output 3 incomplete");
            Assert.IsTrue(testOutput3.Contains(new Cluster(_p3)), "HAC output 3 incomplete");
            Assert.IsTrue(testOutput3.Contains(new Cluster(_p4)), "HAC output 3 incomplete");
            Assert.IsTrue(testOutput3.Contains(new Cluster(_p5)), "HAC output 3 incomplete");
            Assert.IsTrue(testOutput3.Contains(new Cluster(_p6)), "HAC output 3 incomplete");
            Assert.IsTrue(testOutput3.Contains(new Cluster(new List <Point> {
                _p1, _p2
            })), "HAC output 3 incomplete");
            Assert.IsTrue(testOutput3.Contains(new Cluster(new List <Point> {
                _p3, _p4
            })), "HAC output 3 incomplete");
            Assert.IsTrue(testOutput3.Contains(new Cluster(new List <Point> {
                _p5, _p6
            })), "HAC output 3 incomplete");
            Assert.IsTrue(testOutput3.Contains(new Cluster(new List <Point> {
                _p1, _p2, _p3, _p4
            })), "HAC output 3 incomplete");
            Assert.IsTrue(testOutput3.Contains(new Cluster(new List <Point> {
                _p1, _p2, _p3, _p4, _p5, _p6
            })), "HAC output 3 incomplete");
            Assert.IsFalse(testOutput3.Contains(new Cluster(new List <Point> {
                _p1, _p2, _p3
            })), "HAC output 3 has incorrect data");
            Assert.IsFalse(testOutput3.Contains(new Cluster(new List <Point> {
                _p1, _p6
            })), "HAC output 3 has incorrect data");
        }